All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
@ 2022-04-11  7:47 Evan Quan
  2022-04-11  7:59   ` Paul Menzel
  2022-04-11  9:05 ` Lazar, Lijo
  0 siblings, 2 replies; 10+ messages in thread
From: Evan Quan @ 2022-04-11  7:47 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Evan Quan, arthur.marsh

By placing those unrelated code outside of adev->pm.mutex
protections or restructuring the call flow, we can eliminate
the deadlock issue. This comes with no real logics change.

Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c")

Signed-off-by: Evan Quan <evan.quan@amd.com>
Change-Id: I75de4350d9c2517aba0d6adc12e1bc69430fd800
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 39 +++++++++++++++++++
 .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 10 -----
 drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 35 -----------------
 .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 10 -----
 4 files changed, 39 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 5504d81c77b7..72e7b5d40af6 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso
 void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
 {
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	int i;
 
 	if (!adev->pm.dpm_enabled)
 		return;
@@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
 	if (!pp_funcs->pm_compute_clocks)
 		return;
 
+	if (adev->mode_info.num_crtc)
+		amdgpu_display_bandwidth_update(adev);
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		if (ring && ring->sched.ready)
+			amdgpu_fence_wait_empty(ring);
+	}
+
 	mutex_lock(&adev->pm.mutex);
 	pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
 	mutex_unlock(&adev->pm.mutex);
@@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
 {
 	int ret = 0;
 
+	if (adev->family == AMDGPU_FAMILY_SI) {
+		mutex_lock(&adev->pm.mutex);
+		if (enable) {
+			adev->pm.dpm.uvd_active = true;
+			adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
+		} else {
+			adev->pm.dpm.uvd_active = false;
+		}
+		mutex_unlock(&adev->pm.mutex);
+
+		amdgpu_dpm_compute_clocks(adev);
+		return;
+	}
+
 	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
 	if (ret)
 		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
@@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
 {
 	int ret = 0;
 
+	if (adev->family == AMDGPU_FAMILY_SI) {
+		mutex_lock(&adev->pm.mutex);
+		if (enable) {
+			adev->pm.dpm.vce_active = true;
+			/* XXX select vce level based on ring/task */
+			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
+		} else {
+			adev->pm.dpm.vce_active = false;
+		}
+		mutex_unlock(&adev->pm.mutex);
+
+		amdgpu_dpm_compute_clocks(adev);
+		return;
+	}
+
 	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
 	if (ret)
 		DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
index 9613c6181c17..d3fe149d8476 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
@@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
 void amdgpu_legacy_dpm_compute_clocks(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i = 0;
-
-	if (adev->mode_info.num_crtc)
-		amdgpu_display_bandwidth_update(adev);
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		if (ring && ring->sched.ready)
-			amdgpu_fence_wait_empty(ring);
-	}
 
 	amdgpu_dpm_get_active_displays(adev);
 
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index caae54487f9c..633dab14f51c 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev)
 }
 #endif
 
-static int si_set_powergating_by_smu(void *handle,
-				     uint32_t block_type,
-				     bool gate)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	switch (block_type) {
-	case AMD_IP_BLOCK_TYPE_UVD:
-		if (!gate) {
-			adev->pm.dpm.uvd_active = true;
-			adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
-		} else {
-			adev->pm.dpm.uvd_active = false;
-		}
-
-		amdgpu_legacy_dpm_compute_clocks(handle);
-		break;
-	case AMD_IP_BLOCK_TYPE_VCE:
-		if (!gate) {
-			adev->pm.dpm.vce_active = true;
-			/* XXX select vce level based on ring/task */
-			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
-		} else {
-			adev->pm.dpm.vce_active = false;
-		}
-
-		amdgpu_legacy_dpm_compute_clocks(handle);
-		break;
-	default:
-		break;
-	}
-	return 0;
-}
-
 static int si_set_sw_state(struct amdgpu_device *adev)
 {
 	return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
@@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = {
 	.print_power_state = &si_dpm_print_power_state,
 	.debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level,
 	.force_performance_level = &si_dpm_force_performance_level,
-	.set_powergating_by_smu = &si_set_powergating_by_smu,
 	.vblank_too_short = &si_dpm_vblank_too_short,
 	.set_fan_control_mode = &si_dpm_set_fan_control_mode,
 	.get_fan_control_mode = &si_dpm_get_fan_control_mode,
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index dbed72c1e0c6..1eb4e613b27a 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void *handle)
 {
 	struct pp_hwmgr *hwmgr = handle;
 	struct amdgpu_device *adev = hwmgr->adev;
-	int i = 0;
-
-	if (adev->mode_info.num_crtc)
-		amdgpu_display_bandwidth_update(adev);
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		if (ring && ring->sched.ready)
-			amdgpu_fence_wait_empty(ring);
-	}
 
 	if (!amdgpu_device_has_dc_support(adev)) {
 		amdgpu_dpm_get_active_displays(adev);
-- 
2.29.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
  2022-04-11  7:47 [PATCH] drm/amd/pm: fix the deadlock issue observed on SI Evan Quan
@ 2022-04-11  7:59   ` Paul Menzel
  2022-04-11  9:05 ` Lazar, Lijo
  1 sibling, 0 replies; 10+ messages in thread
From: Paul Menzel @ 2022-04-11  7:59 UTC (permalink / raw)
  To: Evan Quan; +Cc: Alexander.Deucher, arthur.marsh, regressions, amd-gfx

Dear Evan,


It would have been nice, if you put me in Cc as I also reported the 
regression.

Am 11.04.22 um 09:47 schrieb Evan Quan:
> By placing those unrelated code outside of adev->pm.mutex
> protections or restructuring the call flow, we can eliminate
> the deadlock issue. This comes with no real logics change.

Please describe the deadlock issue and the fix in more detail. What code 
is unrelated, and why was it put into the mutex protections in the first 
place?

> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c")

No blank line needed.

> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I75de4350d9c2517aba0d6adc12e1bc69430fd800

Without the Gerrit instance URL, the Change-Id is useless.

As it’s a regression, please follow the documentation, and add the 
related tags.

Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c")
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reported-by: Arthur Marsh <arthur.marsh@internode.on.net>
Link: 
https://lore.kernel.org/r/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg.de
BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1957

Also add the link from Arthur.


Kind regards,

Paul


[1]: https://linux-regtracking.leemhuis.info/regzbot/mainline/

> ---
>   drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 39 +++++++++++++++++++
>   .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 10 -----
>   drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 35 -----------------
>   .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 10 -----
>   4 files changed, 39 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 5504d81c77b7..72e7b5d40af6 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso
>   void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
>   {
>   	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> +	int i;

You could make it unsigned, as the variable never should be assigned 
negaitve values.

>   
>   	if (!adev->pm.dpm_enabled)
>   		return;
> @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
>   	if (!pp_funcs->pm_compute_clocks)
>   		return;
>   
> +	if (adev->mode_info.num_crtc)
> +		amdgpu_display_bandwidth_update(adev);
> +
> +	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> +		struct amdgpu_ring *ring = adev->rings[i];
> +		if (ring && ring->sched.ready)
> +			amdgpu_fence_wait_empty(ring);
> +	}
> +
>   	mutex_lock(&adev->pm.mutex);
>   	pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
>   	mutex_unlock(&adev->pm.mutex);
> @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
>   {
>   	int ret = 0;
>   
> +	if (adev->family == AMDGPU_FAMILY_SI) {
> +		mutex_lock(&adev->pm.mutex);
> +		if (enable) {
> +			adev->pm.dpm.uvd_active = true;
> +			adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
> +		} else {
> +			adev->pm.dpm.uvd_active = false;
> +		}
> +		mutex_unlock(&adev->pm.mutex);
> +
> +		amdgpu_dpm_compute_clocks(adev);
> +		return;
> +	}
> +
>   	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
>   	if (ret)
>   		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
> @@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
>   {
>   	int ret = 0;
>   
> +	if (adev->family == AMDGPU_FAMILY_SI) {
> +		mutex_lock(&adev->pm.mutex);
> +		if (enable) {
> +			adev->pm.dpm.vce_active = true;
> +			/* XXX select vce level based on ring/task */
> +			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
> +		} else {
> +			adev->pm.dpm.vce_active = false;
> +		}
> +		mutex_unlock(&adev->pm.mutex);
> +
> +		amdgpu_dpm_compute_clocks(adev);
> +		return;
> +	}
> +
>   	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
>   	if (ret)
>   		DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> index 9613c6181c17..d3fe149d8476 100644
> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> @@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
>   void amdgpu_legacy_dpm_compute_clocks(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -	int i = 0;
> -
> -	if (adev->mode_info.num_crtc)
> -		amdgpu_display_bandwidth_update(adev);
> -
> -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> -		struct amdgpu_ring *ring = adev->rings[i];
> -		if (ring && ring->sched.ready)
> -			amdgpu_fence_wait_empty(ring);
> -	}
>   
>   	amdgpu_dpm_get_active_displays(adev);
>   
> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> index caae54487f9c..633dab14f51c 100644
> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev)
>   }
>   #endif
>   
> -static int si_set_powergating_by_smu(void *handle,
> -				     uint32_t block_type,
> -				     bool gate)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -
> -	switch (block_type) {
> -	case AMD_IP_BLOCK_TYPE_UVD:
> -		if (!gate) {
> -			adev->pm.dpm.uvd_active = true;
> -			adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
> -		} else {
> -			adev->pm.dpm.uvd_active = false;
> -		}
> -
> -		amdgpu_legacy_dpm_compute_clocks(handle);
> -		break;
> -	case AMD_IP_BLOCK_TYPE_VCE:
> -		if (!gate) {
> -			adev->pm.dpm.vce_active = true;
> -			/* XXX select vce level based on ring/task */
> -			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
> -		} else {
> -			adev->pm.dpm.vce_active = false;
> -		}
> -
> -		amdgpu_legacy_dpm_compute_clocks(handle);
> -		break;
> -	default:
> -		break;
> -	}
> -	return 0;
> -}
> -
>   static int si_set_sw_state(struct amdgpu_device *adev)
>   {
>   	return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
> @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = {
>   	.print_power_state = &si_dpm_print_power_state,
>   	.debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level,
>   	.force_performance_level = &si_dpm_force_performance_level,
> -	.set_powergating_by_smu = &si_set_powergating_by_smu,
>   	.vblank_too_short = &si_dpm_vblank_too_short,
>   	.set_fan_control_mode = &si_dpm_set_fan_control_mode,
>   	.get_fan_control_mode = &si_dpm_get_fan_control_mode,
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> index dbed72c1e0c6..1eb4e613b27a 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void *handle)
>   {
>   	struct pp_hwmgr *hwmgr = handle;
>   	struct amdgpu_device *adev = hwmgr->adev;
> -	int i = 0;
> -
> -	if (adev->mode_info.num_crtc)
> -		amdgpu_display_bandwidth_update(adev);
> -
> -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> -		struct amdgpu_ring *ring = adev->rings[i];
> -		if (ring && ring->sched.ready)
> -			amdgpu_fence_wait_empty(ring);
> -	}
>   
>   	if (!amdgpu_device_has_dc_support(adev)) {
>   		amdgpu_dpm_get_active_displays(adev);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
@ 2022-04-11  7:59   ` Paul Menzel
  0 siblings, 0 replies; 10+ messages in thread
From: Paul Menzel @ 2022-04-11  7:59 UTC (permalink / raw)
  To: Evan Quan; +Cc: amd-gfx, Alexander.Deucher, arthur.marsh, regressions

Dear Evan,


It would have been nice, if you put me in Cc as I also reported the 
regression.

Am 11.04.22 um 09:47 schrieb Evan Quan:
> By placing those unrelated code outside of adev->pm.mutex
> protections or restructuring the call flow, we can eliminate
> the deadlock issue. This comes with no real logics change.

Please describe the deadlock issue and the fix in more detail. What code 
is unrelated, and why was it put into the mutex protections in the first 
place?

> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c")

No blank line needed.

> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I75de4350d9c2517aba0d6adc12e1bc69430fd800

Without the Gerrit instance URL, the Change-Id is useless.

As it’s a regression, please follow the documentation, and add the 
related tags.

Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c")
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reported-by: Arthur Marsh <arthur.marsh@internode.on.net>
Link: 
https://lore.kernel.org/r/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg.de
BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1957

Also add the link from Arthur.


Kind regards,

Paul


[1]: https://linux-regtracking.leemhuis.info/regzbot/mainline/

> ---
>   drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 39 +++++++++++++++++++
>   .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 10 -----
>   drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 35 -----------------
>   .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 10 -----
>   4 files changed, 39 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 5504d81c77b7..72e7b5d40af6 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso
>   void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
>   {
>   	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> +	int i;

You could make it unsigned, as the variable never should be assigned 
negaitve values.

>   
>   	if (!adev->pm.dpm_enabled)
>   		return;
> @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
>   	if (!pp_funcs->pm_compute_clocks)
>   		return;
>   
> +	if (adev->mode_info.num_crtc)
> +		amdgpu_display_bandwidth_update(adev);
> +
> +	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> +		struct amdgpu_ring *ring = adev->rings[i];
> +		if (ring && ring->sched.ready)
> +			amdgpu_fence_wait_empty(ring);
> +	}
> +
>   	mutex_lock(&adev->pm.mutex);
>   	pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
>   	mutex_unlock(&adev->pm.mutex);
> @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
>   {
>   	int ret = 0;
>   
> +	if (adev->family == AMDGPU_FAMILY_SI) {
> +		mutex_lock(&adev->pm.mutex);
> +		if (enable) {
> +			adev->pm.dpm.uvd_active = true;
> +			adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
> +		} else {
> +			adev->pm.dpm.uvd_active = false;
> +		}
> +		mutex_unlock(&adev->pm.mutex);
> +
> +		amdgpu_dpm_compute_clocks(adev);
> +		return;
> +	}
> +
>   	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
>   	if (ret)
>   		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
> @@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
>   {
>   	int ret = 0;
>   
> +	if (adev->family == AMDGPU_FAMILY_SI) {
> +		mutex_lock(&adev->pm.mutex);
> +		if (enable) {
> +			adev->pm.dpm.vce_active = true;
> +			/* XXX select vce level based on ring/task */
> +			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
> +		} else {
> +			adev->pm.dpm.vce_active = false;
> +		}
> +		mutex_unlock(&adev->pm.mutex);
> +
> +		amdgpu_dpm_compute_clocks(adev);
> +		return;
> +	}
> +
>   	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
>   	if (ret)
>   		DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> index 9613c6181c17..d3fe149d8476 100644
> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> @@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
>   void amdgpu_legacy_dpm_compute_clocks(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -	int i = 0;
> -
> -	if (adev->mode_info.num_crtc)
> -		amdgpu_display_bandwidth_update(adev);
> -
> -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> -		struct amdgpu_ring *ring = adev->rings[i];
> -		if (ring && ring->sched.ready)
> -			amdgpu_fence_wait_empty(ring);
> -	}
>   
>   	amdgpu_dpm_get_active_displays(adev);
>   
> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> index caae54487f9c..633dab14f51c 100644
> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev)
>   }
>   #endif
>   
> -static int si_set_powergating_by_smu(void *handle,
> -				     uint32_t block_type,
> -				     bool gate)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -
> -	switch (block_type) {
> -	case AMD_IP_BLOCK_TYPE_UVD:
> -		if (!gate) {
> -			adev->pm.dpm.uvd_active = true;
> -			adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
> -		} else {
> -			adev->pm.dpm.uvd_active = false;
> -		}
> -
> -		amdgpu_legacy_dpm_compute_clocks(handle);
> -		break;
> -	case AMD_IP_BLOCK_TYPE_VCE:
> -		if (!gate) {
> -			adev->pm.dpm.vce_active = true;
> -			/* XXX select vce level based on ring/task */
> -			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
> -		} else {
> -			adev->pm.dpm.vce_active = false;
> -		}
> -
> -		amdgpu_legacy_dpm_compute_clocks(handle);
> -		break;
> -	default:
> -		break;
> -	}
> -	return 0;
> -}
> -
>   static int si_set_sw_state(struct amdgpu_device *adev)
>   {
>   	return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
> @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = {
>   	.print_power_state = &si_dpm_print_power_state,
>   	.debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level,
>   	.force_performance_level = &si_dpm_force_performance_level,
> -	.set_powergating_by_smu = &si_set_powergating_by_smu,
>   	.vblank_too_short = &si_dpm_vblank_too_short,
>   	.set_fan_control_mode = &si_dpm_set_fan_control_mode,
>   	.get_fan_control_mode = &si_dpm_get_fan_control_mode,
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> index dbed72c1e0c6..1eb4e613b27a 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void *handle)
>   {
>   	struct pp_hwmgr *hwmgr = handle;
>   	struct amdgpu_device *adev = hwmgr->adev;
> -	int i = 0;
> -
> -	if (adev->mode_info.num_crtc)
> -		amdgpu_display_bandwidth_update(adev);
> -
> -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> -		struct amdgpu_ring *ring = adev->rings[i];
> -		if (ring && ring->sched.ready)
> -			amdgpu_fence_wait_empty(ring);
> -	}
>   
>   	if (!amdgpu_device_has_dc_support(adev)) {
>   		amdgpu_dpm_get_active_displays(adev);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
  2022-04-11  7:59   ` Paul Menzel
@ 2022-04-11  8:30     ` Thorsten Leemhuis
  -1 siblings, 0 replies; 10+ messages in thread
From: Thorsten Leemhuis @ 2022-04-11  8:30 UTC (permalink / raw)
  To: Paul Menzel, Evan Quan
  Cc: amd-gfx, Alexander.Deucher, arthur.marsh, regressions

On 11.04.22 09:59, Paul Menzel wrote:
> Am 11.04.22 um 09:47 schrieb Evan Quan:
>
> As it’s a regression, please follow the documentation, and add the
> related tags.

Yes please, otherwise you break tools that reply on this, like my
regression tracking efforts.

> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in
> amdgpu_dpm.c")
> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
> Reported-by: Arthur Marsh <arthur.marsh@internode.on.net>
> Link:
> https://lore.kernel.org/r/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg.de
> BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1957
> 
> Also add the link from Arthur.

What is BugLink? That is not in the kernel's documentation afaics (BTW,
as other people use it as wll: where does that actually come from?
GitLab?) . It should just be "Link:". Or am I missing something?

Ciao, Thorsten


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
@ 2022-04-11  8:30     ` Thorsten Leemhuis
  0 siblings, 0 replies; 10+ messages in thread
From: Thorsten Leemhuis @ 2022-04-11  8:30 UTC (permalink / raw)
  To: Paul Menzel, Evan Quan
  Cc: Alexander.Deucher, arthur.marsh, regressions, amd-gfx

On 11.04.22 09:59, Paul Menzel wrote:
> Am 11.04.22 um 09:47 schrieb Evan Quan:
>
> As it’s a regression, please follow the documentation, and add the
> related tags.

Yes please, otherwise you break tools that reply on this, like my
regression tracking efforts.

> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in
> amdgpu_dpm.c")
> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
> Reported-by: Arthur Marsh <arthur.marsh@internode.on.net>
> Link:
> https://lore.kernel.org/r/9e689fea-6c69-f4b0-8dee-32c4cf7d8f9c@molgen.mpg.de
> BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1957
> 
> Also add the link from Arthur.

What is BugLink? That is not in the kernel's documentation afaics (BTW,
as other people use it as wll: where does that actually come from?
GitLab?) . It should just be "Link:". Or am I missing something?

Ciao, Thorsten


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
  2022-04-11  7:59   ` Paul Menzel
@ 2022-04-11  8:55     ` Quan, Evan
  -1 siblings, 0 replies; 10+ messages in thread
From: Quan, Evan @ 2022-04-11  8:55 UTC (permalink / raw)
  To: Paul Menzel; +Cc: amd-gfx, Deucher, Alexander, arthur.marsh, regressions

[AMD Official Use Only]

Please check the V2 version.

Thanks,
Evan
> -----Original Message-----
> From: Paul Menzel <pmenzel@molgen.mpg.de>
> Sent: Monday, April 11, 2022 3:59 PM
> To: Quan, Evan <Evan.Quan@amd.com>
> Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher@amd.com>; arthur.marsh@internode.on.net;
> regressions@lists.linux.dev
> Subject: Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
> 
> Dear Evan,
> 
> 
> It would have been nice, if you put me in Cc as I also reported the regression.
> 
> Am 11.04.22 um 09:47 schrieb Evan Quan:
> > By placing those unrelated code outside of adev->pm.mutex protections
> > or restructuring the call flow, we can eliminate the deadlock issue.
> > This comes with no real logics change.
> 
> Please describe the deadlock issue and the fix in more detail. What code is
> unrelated, and why was it put into the mutex protections in the first place?
> 
> > Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in
> > amdgpu_dpm.c")
> 
> No blank line needed.
> 
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > Change-Id: I75de4350d9c2517aba0d6adc12e1bc69430fd800
> 
> Without the Gerrit instance URL, the Change-Id is useless.
> 
> As it's a regression, please follow the documentation, and add the related
> tags.
> 
> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in
> amdgpu_dpm.c")
> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
> Reported-by: Arthur Marsh <arthur.marsh@internode.on.net>
> Link:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.
> kernel.org%2Fr%2F9e689fea-6c69-f4b0-8dee-
> 32c4cf7d8f9c%40molgen.mpg.de&amp;data=04%7C01%7Cevan.quan%40am
> d.com%7C27d20f9b6ec445d34d1508da1b912981%7C3dd8961fe4884e608e11a
> 82d994e183d%7C0%7C0%7C637852608491368980%7CUnknown%7CTWFpbGZ
> sb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6M
> n0%3D%7C3000&amp;sdata=wtBav3WwrD17U53AArdnONjI0GBJjHI4OhxU0Z
> 4ymfA%3D&amp;reserved=0
> BugLink:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitla
> b.freedesktop.org%2Fdrm%2Famd%2F-
> %2Fissues%2F1957&amp;data=04%7C01%7Cevan.quan%40amd.com%7C27d
> 20f9b6ec445d34d1508da1b912981%7C3dd8961fe4884e608e11a82d994e183d
> %7C0%7C0%7C637852608491368980%7CUnknown%7CTWFpbGZsb3d8eyJWIj
> oiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3
> 000&amp;sdata=bbrWm5C03R1PcDAY%2FDWeEBnirLsXVgo%2BTTl2eIsJJdE%
> 3D&amp;reserved=0
> 
> Also add the link from Arthur.
> 
> 
> Kind regards,
> 
> Paul
> 
> 
> [1]:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flinux
> -
> regtracking.leemhuis.info%2Fregzbot%2Fmainline%2F&amp;data=04%7C01
> %7Cevan.quan%40amd.com%7C27d20f9b6ec445d34d1508da1b912981%7C3d
> d8961fe4884e608e11a82d994e183d%7C0%7C0%7C637852608491368980%7CU
> nknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI
> 6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=RAjnkKPFbzmUp2w2aKz
> UlgBNtbxblqpTqMMZ86Fpx2A%3D&amp;reserved=0
> 
> > ---
> >   drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 39
> +++++++++++++++++++
> >   .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 10 -----
> >   drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 35 -----------------
> >   .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 10 -----
> >   4 files changed, 39 insertions(+), 55 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > index 5504d81c77b7..72e7b5d40af6 100644
> > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct
> amdgpu_device *adev, enum amd_pp_sensors senso
> >   void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
> >   {
> >   	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> > +	int i;
> 
> You could make it unsigned, as the variable never should be assigned
> negaitve values.
> 
> >
> >   	if (!adev->pm.dpm_enabled)
> >   		return;
> > @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct
> amdgpu_device *adev)
> >   	if (!pp_funcs->pm_compute_clocks)
> >   		return;
> >
> > +	if (adev->mode_info.num_crtc)
> > +		amdgpu_display_bandwidth_update(adev);
> > +
> > +	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> > +		struct amdgpu_ring *ring = adev->rings[i];
> > +		if (ring && ring->sched.ready)
> > +			amdgpu_fence_wait_empty(ring);
> > +	}
> > +
> >   	mutex_lock(&adev->pm.mutex);
> >   	pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
> >   	mutex_unlock(&adev->pm.mutex);
> > @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct
> amdgpu_device *adev, bool enable)
> >   {
> >   	int ret = 0;
> >
> > +	if (adev->family == AMDGPU_FAMILY_SI) {
> > +		mutex_lock(&adev->pm.mutex);
> > +		if (enable) {
> > +			adev->pm.dpm.uvd_active = true;
> > +			adev->pm.dpm.state =
> POWER_STATE_TYPE_INTERNAL_UVD;
> > +		} else {
> > +			adev->pm.dpm.uvd_active = false;
> > +		}
> > +		mutex_unlock(&adev->pm.mutex);
> > +
> > +		amdgpu_dpm_compute_clocks(adev);
> > +		return;
> > +	}
> > +
> >   	ret = amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_UVD, !enable);
> >   	if (ret)
> >   		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6
> +477,21 @@
> > void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
> >   {
> >   	int ret = 0;
> >
> > +	if (adev->family == AMDGPU_FAMILY_SI) {
> > +		mutex_lock(&adev->pm.mutex);
> > +		if (enable) {
> > +			adev->pm.dpm.vce_active = true;
> > +			/* XXX select vce level based on ring/task */
> > +			adev->pm.dpm.vce_level =
> AMD_VCE_LEVEL_AC_ALL;
> > +		} else {
> > +			adev->pm.dpm.vce_active = false;
> > +		}
> > +		mutex_unlock(&adev->pm.mutex);
> > +
> > +		amdgpu_dpm_compute_clocks(adev);
> > +		return;
> > +	}
> > +
> >   	ret = amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_VCE, !enable);
> >   	if (ret)
> >   		DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git
> > a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > index 9613c6181c17..d3fe149d8476 100644
> > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > @@ -1028,16 +1028,6 @@ static int
> amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
> >   void amdgpu_legacy_dpm_compute_clocks(void *handle)
> >   {
> >   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > -	int i = 0;
> > -
> > -	if (adev->mode_info.num_crtc)
> > -		amdgpu_display_bandwidth_update(adev);
> > -
> > -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> > -		struct amdgpu_ring *ring = adev->rings[i];
> > -		if (ring && ring->sched.ready)
> > -			amdgpu_fence_wait_empty(ring);
> > -	}
> >
> >   	amdgpu_dpm_get_active_displays(adev);
> >
> > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > index caae54487f9c..633dab14f51c 100644
> > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct
> amdgpu_device *adev)
> >   }
> >   #endif
> >
> > -static int si_set_powergating_by_smu(void *handle,
> > -				     uint32_t block_type,
> > -				     bool gate)
> > -{
> > -	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > -
> > -	switch (block_type) {
> > -	case AMD_IP_BLOCK_TYPE_UVD:
> > -		if (!gate) {
> > -			adev->pm.dpm.uvd_active = true;
> > -			adev->pm.dpm.state =
> POWER_STATE_TYPE_INTERNAL_UVD;
> > -		} else {
> > -			adev->pm.dpm.uvd_active = false;
> > -		}
> > -
> > -		amdgpu_legacy_dpm_compute_clocks(handle);
> > -		break;
> > -	case AMD_IP_BLOCK_TYPE_VCE:
> > -		if (!gate) {
> > -			adev->pm.dpm.vce_active = true;
> > -			/* XXX select vce level based on ring/task */
> > -			adev->pm.dpm.vce_level =
> AMD_VCE_LEVEL_AC_ALL;
> > -		} else {
> > -			adev->pm.dpm.vce_active = false;
> > -		}
> > -
> > -		amdgpu_legacy_dpm_compute_clocks(handle);
> > -		break;
> > -	default:
> > -		break;
> > -	}
> > -	return 0;
> > -}
> > -
> >   static int si_set_sw_state(struct amdgpu_device *adev)
> >   {
> >   	return (amdgpu_si_send_msg_to_smc(adev,
> PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
> > @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs
> = {
> >   	.print_power_state = &si_dpm_print_power_state,
> >   	.debugfs_print_current_performance_level =
> &si_dpm_debugfs_print_current_performance_level,
> >   	.force_performance_level = &si_dpm_force_performance_level,
> > -	.set_powergating_by_smu = &si_set_powergating_by_smu,
> >   	.vblank_too_short = &si_dpm_vblank_too_short,
> >   	.set_fan_control_mode = &si_dpm_set_fan_control_mode,
> >   	.get_fan_control_mode = &si_dpm_get_fan_control_mode, diff --
> git
> > a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > index dbed72c1e0c6..1eb4e613b27a 100644
> > --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void
> *handle)
> >   {
> >   	struct pp_hwmgr *hwmgr = handle;
> >   	struct amdgpu_device *adev = hwmgr->adev;
> > -	int i = 0;
> > -
> > -	if (adev->mode_info.num_crtc)
> > -		amdgpu_display_bandwidth_update(adev);
> > -
> > -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> > -		struct amdgpu_ring *ring = adev->rings[i];
> > -		if (ring && ring->sched.ready)
> > -			amdgpu_fence_wait_empty(ring);
> > -	}
> >
> >   	if (!amdgpu_device_has_dc_support(adev)) {
> >   		amdgpu_dpm_get_active_displays(adev);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
@ 2022-04-11  8:55     ` Quan, Evan
  0 siblings, 0 replies; 10+ messages in thread
From: Quan, Evan @ 2022-04-11  8:55 UTC (permalink / raw)
  To: Paul Menzel; +Cc: Deucher, Alexander, arthur.marsh, regressions, amd-gfx

[AMD Official Use Only]

Please check the V2 version.

Thanks,
Evan
> -----Original Message-----
> From: Paul Menzel <pmenzel@molgen.mpg.de>
> Sent: Monday, April 11, 2022 3:59 PM
> To: Quan, Evan <Evan.Quan@amd.com>
> Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher@amd.com>; arthur.marsh@internode.on.net;
> regressions@lists.linux.dev
> Subject: Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
> 
> Dear Evan,
> 
> 
> It would have been nice, if you put me in Cc as I also reported the regression.
> 
> Am 11.04.22 um 09:47 schrieb Evan Quan:
> > By placing those unrelated code outside of adev->pm.mutex protections
> > or restructuring the call flow, we can eliminate the deadlock issue.
> > This comes with no real logics change.
> 
> Please describe the deadlock issue and the fix in more detail. What code is
> unrelated, and why was it put into the mutex protections in the first place?
> 
> > Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in
> > amdgpu_dpm.c")
> 
> No blank line needed.
> 
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > Change-Id: I75de4350d9c2517aba0d6adc12e1bc69430fd800
> 
> Without the Gerrit instance URL, the Change-Id is useless.
> 
> As it's a regression, please follow the documentation, and add the related
> tags.
> 
> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in
> amdgpu_dpm.c")
> Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
> Reported-by: Arthur Marsh <arthur.marsh@internode.on.net>
> Link:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.
> kernel.org%2Fr%2F9e689fea-6c69-f4b0-8dee-
> 32c4cf7d8f9c%40molgen.mpg.de&amp;data=04%7C01%7Cevan.quan%40am
> d.com%7C27d20f9b6ec445d34d1508da1b912981%7C3dd8961fe4884e608e11a
> 82d994e183d%7C0%7C0%7C637852608491368980%7CUnknown%7CTWFpbGZ
> sb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6M
> n0%3D%7C3000&amp;sdata=wtBav3WwrD17U53AArdnONjI0GBJjHI4OhxU0Z
> 4ymfA%3D&amp;reserved=0
> BugLink:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitla
> b.freedesktop.org%2Fdrm%2Famd%2F-
> %2Fissues%2F1957&amp;data=04%7C01%7Cevan.quan%40amd.com%7C27d
> 20f9b6ec445d34d1508da1b912981%7C3dd8961fe4884e608e11a82d994e183d
> %7C0%7C0%7C637852608491368980%7CUnknown%7CTWFpbGZsb3d8eyJWIj
> oiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3
> 000&amp;sdata=bbrWm5C03R1PcDAY%2FDWeEBnirLsXVgo%2BTTl2eIsJJdE%
> 3D&amp;reserved=0
> 
> Also add the link from Arthur.
> 
> 
> Kind regards,
> 
> Paul
> 
> 
> [1]:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flinux
> -
> regtracking.leemhuis.info%2Fregzbot%2Fmainline%2F&amp;data=04%7C01
> %7Cevan.quan%40amd.com%7C27d20f9b6ec445d34d1508da1b912981%7C3d
> d8961fe4884e608e11a82d994e183d%7C0%7C0%7C637852608491368980%7CU
> nknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI
> 6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=RAjnkKPFbzmUp2w2aKz
> UlgBNtbxblqpTqMMZ86Fpx2A%3D&amp;reserved=0
> 
> > ---
> >   drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 39
> +++++++++++++++++++
> >   .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 10 -----
> >   drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 35 -----------------
> >   .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 10 -----
> >   4 files changed, 39 insertions(+), 55 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > index 5504d81c77b7..72e7b5d40af6 100644
> > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct
> amdgpu_device *adev, enum amd_pp_sensors senso
> >   void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
> >   {
> >   	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> > +	int i;
> 
> You could make it unsigned, as the variable never should be assigned
> negaitve values.
> 
> >
> >   	if (!adev->pm.dpm_enabled)
> >   		return;
> > @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct
> amdgpu_device *adev)
> >   	if (!pp_funcs->pm_compute_clocks)
> >   		return;
> >
> > +	if (adev->mode_info.num_crtc)
> > +		amdgpu_display_bandwidth_update(adev);
> > +
> > +	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> > +		struct amdgpu_ring *ring = adev->rings[i];
> > +		if (ring && ring->sched.ready)
> > +			amdgpu_fence_wait_empty(ring);
> > +	}
> > +
> >   	mutex_lock(&adev->pm.mutex);
> >   	pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
> >   	mutex_unlock(&adev->pm.mutex);
> > @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct
> amdgpu_device *adev, bool enable)
> >   {
> >   	int ret = 0;
> >
> > +	if (adev->family == AMDGPU_FAMILY_SI) {
> > +		mutex_lock(&adev->pm.mutex);
> > +		if (enable) {
> > +			adev->pm.dpm.uvd_active = true;
> > +			adev->pm.dpm.state =
> POWER_STATE_TYPE_INTERNAL_UVD;
> > +		} else {
> > +			adev->pm.dpm.uvd_active = false;
> > +		}
> > +		mutex_unlock(&adev->pm.mutex);
> > +
> > +		amdgpu_dpm_compute_clocks(adev);
> > +		return;
> > +	}
> > +
> >   	ret = amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_UVD, !enable);
> >   	if (ret)
> >   		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6
> +477,21 @@
> > void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
> >   {
> >   	int ret = 0;
> >
> > +	if (adev->family == AMDGPU_FAMILY_SI) {
> > +		mutex_lock(&adev->pm.mutex);
> > +		if (enable) {
> > +			adev->pm.dpm.vce_active = true;
> > +			/* XXX select vce level based on ring/task */
> > +			adev->pm.dpm.vce_level =
> AMD_VCE_LEVEL_AC_ALL;
> > +		} else {
> > +			adev->pm.dpm.vce_active = false;
> > +		}
> > +		mutex_unlock(&adev->pm.mutex);
> > +
> > +		amdgpu_dpm_compute_clocks(adev);
> > +		return;
> > +	}
> > +
> >   	ret = amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_VCE, !enable);
> >   	if (ret)
> >   		DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git
> > a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > index 9613c6181c17..d3fe149d8476 100644
> > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > @@ -1028,16 +1028,6 @@ static int
> amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
> >   void amdgpu_legacy_dpm_compute_clocks(void *handle)
> >   {
> >   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > -	int i = 0;
> > -
> > -	if (adev->mode_info.num_crtc)
> > -		amdgpu_display_bandwidth_update(adev);
> > -
> > -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> > -		struct amdgpu_ring *ring = adev->rings[i];
> > -		if (ring && ring->sched.ready)
> > -			amdgpu_fence_wait_empty(ring);
> > -	}
> >
> >   	amdgpu_dpm_get_active_displays(adev);
> >
> > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > index caae54487f9c..633dab14f51c 100644
> > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct
> amdgpu_device *adev)
> >   }
> >   #endif
> >
> > -static int si_set_powergating_by_smu(void *handle,
> > -				     uint32_t block_type,
> > -				     bool gate)
> > -{
> > -	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > -
> > -	switch (block_type) {
> > -	case AMD_IP_BLOCK_TYPE_UVD:
> > -		if (!gate) {
> > -			adev->pm.dpm.uvd_active = true;
> > -			adev->pm.dpm.state =
> POWER_STATE_TYPE_INTERNAL_UVD;
> > -		} else {
> > -			adev->pm.dpm.uvd_active = false;
> > -		}
> > -
> > -		amdgpu_legacy_dpm_compute_clocks(handle);
> > -		break;
> > -	case AMD_IP_BLOCK_TYPE_VCE:
> > -		if (!gate) {
> > -			adev->pm.dpm.vce_active = true;
> > -			/* XXX select vce level based on ring/task */
> > -			adev->pm.dpm.vce_level =
> AMD_VCE_LEVEL_AC_ALL;
> > -		} else {
> > -			adev->pm.dpm.vce_active = false;
> > -		}
> > -
> > -		amdgpu_legacy_dpm_compute_clocks(handle);
> > -		break;
> > -	default:
> > -		break;
> > -	}
> > -	return 0;
> > -}
> > -
> >   static int si_set_sw_state(struct amdgpu_device *adev)
> >   {
> >   	return (amdgpu_si_send_msg_to_smc(adev,
> PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
> > @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs
> = {
> >   	.print_power_state = &si_dpm_print_power_state,
> >   	.debugfs_print_current_performance_level =
> &si_dpm_debugfs_print_current_performance_level,
> >   	.force_performance_level = &si_dpm_force_performance_level,
> > -	.set_powergating_by_smu = &si_set_powergating_by_smu,
> >   	.vblank_too_short = &si_dpm_vblank_too_short,
> >   	.set_fan_control_mode = &si_dpm_set_fan_control_mode,
> >   	.get_fan_control_mode = &si_dpm_get_fan_control_mode, diff --
> git
> > a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > index dbed72c1e0c6..1eb4e613b27a 100644
> > --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void
> *handle)
> >   {
> >   	struct pp_hwmgr *hwmgr = handle;
> >   	struct amdgpu_device *adev = hwmgr->adev;
> > -	int i = 0;
> > -
> > -	if (adev->mode_info.num_crtc)
> > -		amdgpu_display_bandwidth_update(adev);
> > -
> > -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> > -		struct amdgpu_ring *ring = adev->rings[i];
> > -		if (ring && ring->sched.ready)
> > -			amdgpu_fence_wait_empty(ring);
> > -	}
> >
> >   	if (!amdgpu_device_has_dc_support(adev)) {
> >   		amdgpu_dpm_get_active_displays(adev);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
  2022-04-11  7:47 [PATCH] drm/amd/pm: fix the deadlock issue observed on SI Evan Quan
  2022-04-11  7:59   ` Paul Menzel
@ 2022-04-11  9:05 ` Lazar, Lijo
  2022-04-11  9:25   ` Quan, Evan
  1 sibling, 1 reply; 10+ messages in thread
From: Lazar, Lijo @ 2022-04-11  9:05 UTC (permalink / raw)
  To: Evan Quan, amd-gfx; +Cc: Alexander.Deucher, arthur.marsh



On 4/11/2022 1:17 PM, Evan Quan wrote:
> By placing those unrelated code outside of adev->pm.mutex
> protections or restructuring the call flow, we can eliminate
> the deadlock issue. This comes with no real logics change.
> 
> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c")
> 
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> Change-Id: I75de4350d9c2517aba0d6adc12e1bc69430fd800
> ---
>   drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 39 +++++++++++++++++++
>   .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 10 -----
>   drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 35 -----------------
>   .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 10 -----
>   4 files changed, 39 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 5504d81c77b7..72e7b5d40af6 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso
>   void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
>   {
>   	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> +	int i;
>   
>   	if (!adev->pm.dpm_enabled)
>   		return;
> @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
>   	if (!pp_funcs->pm_compute_clocks)
>   		return;
>   
> +	if (adev->mode_info.num_crtc)
> +		amdgpu_display_bandwidth_update(adev);
> +
> +	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> +		struct amdgpu_ring *ring = adev->rings[i];
> +		if (ring && ring->sched.ready)
> +			amdgpu_fence_wait_empty(ring);
> +	}
> +
>   	mutex_lock(&adev->pm.mutex);
>   	pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
>   	mutex_unlock(&adev->pm.mutex);
> @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
>   {
>   	int ret = 0;
>   
> +	if (adev->family == AMDGPU_FAMILY_SI) {

Bringing family specific code to this layer is not the proper fix. Need 
to specify what is the deadlock and why can't it be handled in family 
specific implementation.

Thanks,
Lijo

> +		mutex_lock(&adev->pm.mutex);
> +		if (enable) {
> +			adev->pm.dpm.uvd_active = true;
> +			adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
> +		} else {
> +			adev->pm.dpm.uvd_active = false;
> +		}
> +		mutex_unlock(&adev->pm.mutex);
> +
> +		amdgpu_dpm_compute_clocks(adev);
> +		return;
> +	}
> +
>   	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
>   	if (ret)
>   		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
> @@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
>   {
>   	int ret = 0;
>   
> +	if (adev->family == AMDGPU_FAMILY_SI) {
> +		mutex_lock(&adev->pm.mutex);
> +		if (enable) {
> +			adev->pm.dpm.vce_active = true;
> +			/* XXX select vce level based on ring/task */
> +			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
> +		} else {
> +			adev->pm.dpm.vce_active = false;
> +		}
> +		mutex_unlock(&adev->pm.mutex);
> +
> +		amdgpu_dpm_compute_clocks(adev);
> +		return;
> +	}
> +
>   	ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
>   	if (ret)
>   		DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> index 9613c6181c17..d3fe149d8476 100644
> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> @@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
>   void amdgpu_legacy_dpm_compute_clocks(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -	int i = 0;
> -
> -	if (adev->mode_info.num_crtc)
> -		amdgpu_display_bandwidth_update(adev);
> -
> -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> -		struct amdgpu_ring *ring = adev->rings[i];
> -		if (ring && ring->sched.ready)
> -			amdgpu_fence_wait_empty(ring);
> -	}
>   
>   	amdgpu_dpm_get_active_displays(adev);
>   
> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> index caae54487f9c..633dab14f51c 100644
> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev)
>   }
>   #endif
>   
> -static int si_set_powergating_by_smu(void *handle,
> -				     uint32_t block_type,
> -				     bool gate)
> -{
> -	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -
> -	switch (block_type) {
> -	case AMD_IP_BLOCK_TYPE_UVD:
> -		if (!gate) {
> -			adev->pm.dpm.uvd_active = true;
> -			adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
> -		} else {
> -			adev->pm.dpm.uvd_active = false;
> -		}
> -
> -		amdgpu_legacy_dpm_compute_clocks(handle);
> -		break;
> -	case AMD_IP_BLOCK_TYPE_VCE:
> -		if (!gate) {
> -			adev->pm.dpm.vce_active = true;
> -			/* XXX select vce level based on ring/task */
> -			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
> -		} else {
> -			adev->pm.dpm.vce_active = false;
> -		}
> -
> -		amdgpu_legacy_dpm_compute_clocks(handle);
> -		break;
> -	default:
> -		break;
> -	}
> -	return 0;
> -}
> -
>   static int si_set_sw_state(struct amdgpu_device *adev)
>   {
>   	return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
> @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = {
>   	.print_power_state = &si_dpm_print_power_state,
>   	.debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level,
>   	.force_performance_level = &si_dpm_force_performance_level,
> -	.set_powergating_by_smu = &si_set_powergating_by_smu,
>   	.vblank_too_short = &si_dpm_vblank_too_short,
>   	.set_fan_control_mode = &si_dpm_set_fan_control_mode,
>   	.get_fan_control_mode = &si_dpm_get_fan_control_mode,
> diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> index dbed72c1e0c6..1eb4e613b27a 100644
> --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void *handle)
>   {
>   	struct pp_hwmgr *hwmgr = handle;
>   	struct amdgpu_device *adev = hwmgr->adev;
> -	int i = 0;
> -
> -	if (adev->mode_info.num_crtc)
> -		amdgpu_display_bandwidth_update(adev);
> -
> -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> -		struct amdgpu_ring *ring = adev->rings[i];
> -		if (ring && ring->sched.ready)
> -			amdgpu_fence_wait_empty(ring);
> -	}
>   
>   	if (!amdgpu_device_has_dc_support(adev)) {
>   		amdgpu_dpm_get_active_displays(adev);
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
  2022-04-11  9:05 ` Lazar, Lijo
@ 2022-04-11  9:25   ` Quan, Evan
  2022-04-11  9:47     ` Lazar, Lijo
  0 siblings, 1 reply; 10+ messages in thread
From: Quan, Evan @ 2022-04-11  9:25 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx; +Cc: Deucher, Alexander, arthur.marsh

[AMD Official Use Only]



> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar@amd.com>
> Sent: Monday, April 11, 2022 5:05 PM
> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>;
> arthur.marsh@internode.on.net
> Subject: Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
> 
> 
> 
> On 4/11/2022 1:17 PM, Evan Quan wrote:
> > By placing those unrelated code outside of adev->pm.mutex protections
> > or restructuring the call flow, we can eliminate the deadlock issue.
> > This comes with no real logics change.
> >
> > Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in
> > amdgpu_dpm.c")
> >
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > Change-Id: I75de4350d9c2517aba0d6adc12e1bc69430fd800
> > ---
> >   drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 39
> +++++++++++++++++++
> >   .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 10 -----
> >   drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 35 -----------------
> >   .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 10 -----
> >   4 files changed, 39 insertions(+), 55 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > index 5504d81c77b7..72e7b5d40af6 100644
> > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct
> amdgpu_device *adev, enum amd_pp_sensors senso
> >   void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
> >   {
> >   	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
> > +	int i;
> >
> >   	if (!adev->pm.dpm_enabled)
> >   		return;
> > @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct
> amdgpu_device *adev)
> >   	if (!pp_funcs->pm_compute_clocks)
> >   		return;
> >
> > +	if (adev->mode_info.num_crtc)
> > +		amdgpu_display_bandwidth_update(adev);
> > +
> > +	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> > +		struct amdgpu_ring *ring = adev->rings[i];
> > +		if (ring && ring->sched.ready)
> > +			amdgpu_fence_wait_empty(ring);
> > +	}
> > +
> >   	mutex_lock(&adev->pm.mutex);
> >   	pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
> >   	mutex_unlock(&adev->pm.mutex);
> > @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct
> amdgpu_device *adev, bool enable)
> >   {
> >   	int ret = 0;
> >
> > +	if (adev->family == AMDGPU_FAMILY_SI) {
> 
> Bringing family specific code to this layer is not the proper fix. Need to specify
> what is the deadlock and why can't it be handled in family specific
> implementation.
[Quan, Evan] Please check V2. I added some details about this deadlock there.
For those legacy ASICs, let's make the solution simple and straight forward.

Thanks,
Evan
> 
> Thanks,
> Lijo
> 
> > +		mutex_lock(&adev->pm.mutex);
> > +		if (enable) {
> > +			adev->pm.dpm.uvd_active = true;
> > +			adev->pm.dpm.state =
> POWER_STATE_TYPE_INTERNAL_UVD;
> > +		} else {
> > +			adev->pm.dpm.uvd_active = false;
> > +		}
> > +		mutex_unlock(&adev->pm.mutex);
> > +
> > +		amdgpu_dpm_compute_clocks(adev);
> > +		return;
> > +	}
> > +
> >   	ret = amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_UVD, !enable);
> >   	if (ret)
> >   		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6
> +477,21 @@
> > void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
> >   {
> >   	int ret = 0;
> >
> > +	if (adev->family == AMDGPU_FAMILY_SI) {
> > +		mutex_lock(&adev->pm.mutex);
> > +		if (enable) {
> > +			adev->pm.dpm.vce_active = true;
> > +			/* XXX select vce level based on ring/task */
> > +			adev->pm.dpm.vce_level =
> AMD_VCE_LEVEL_AC_ALL;
> > +		} else {
> > +			adev->pm.dpm.vce_active = false;
> > +		}
> > +		mutex_unlock(&adev->pm.mutex);
> > +
> > +		amdgpu_dpm_compute_clocks(adev);
> > +		return;
> > +	}
> > +
> >   	ret = amdgpu_dpm_set_powergating_by_smu(adev,
> AMD_IP_BLOCK_TYPE_VCE, !enable);
> >   	if (ret)
> >   		DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git
> > a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > index 9613c6181c17..d3fe149d8476 100644
> > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
> > @@ -1028,16 +1028,6 @@ static int
> amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
> >   void amdgpu_legacy_dpm_compute_clocks(void *handle)
> >   {
> >   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > -	int i = 0;
> > -
> > -	if (adev->mode_info.num_crtc)
> > -		amdgpu_display_bandwidth_update(adev);
> > -
> > -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> > -		struct amdgpu_ring *ring = adev->rings[i];
> > -		if (ring && ring->sched.ready)
> > -			amdgpu_fence_wait_empty(ring);
> > -	}
> >
> >   	amdgpu_dpm_get_active_displays(adev);
> >
> > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > index caae54487f9c..633dab14f51c 100644
> > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
> > @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct
> amdgpu_device *adev)
> >   }
> >   #endif
> >
> > -static int si_set_powergating_by_smu(void *handle,
> > -				     uint32_t block_type,
> > -				     bool gate)
> > -{
> > -	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > -
> > -	switch (block_type) {
> > -	case AMD_IP_BLOCK_TYPE_UVD:
> > -		if (!gate) {
> > -			adev->pm.dpm.uvd_active = true;
> > -			adev->pm.dpm.state =
> POWER_STATE_TYPE_INTERNAL_UVD;
> > -		} else {
> > -			adev->pm.dpm.uvd_active = false;
> > -		}
> > -
> > -		amdgpu_legacy_dpm_compute_clocks(handle);
> > -		break;
> > -	case AMD_IP_BLOCK_TYPE_VCE:
> > -		if (!gate) {
> > -			adev->pm.dpm.vce_active = true;
> > -			/* XXX select vce level based on ring/task */
> > -			adev->pm.dpm.vce_level =
> AMD_VCE_LEVEL_AC_ALL;
> > -		} else {
> > -			adev->pm.dpm.vce_active = false;
> > -		}
> > -
> > -		amdgpu_legacy_dpm_compute_clocks(handle);
> > -		break;
> > -	default:
> > -		break;
> > -	}
> > -	return 0;
> > -}
> > -
> >   static int si_set_sw_state(struct amdgpu_device *adev)
> >   {
> >   	return (amdgpu_si_send_msg_to_smc(adev,
> PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
> > @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs
> = {
> >   	.print_power_state = &si_dpm_print_power_state,
> >   	.debugfs_print_current_performance_level =
> &si_dpm_debugfs_print_current_performance_level,
> >   	.force_performance_level = &si_dpm_force_performance_level,
> > -	.set_powergating_by_smu = &si_set_powergating_by_smu,
> >   	.vblank_too_short = &si_dpm_vblank_too_short,
> >   	.set_fan_control_mode = &si_dpm_set_fan_control_mode,
> >   	.get_fan_control_mode = &si_dpm_get_fan_control_mode, diff --
> git
> > a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > index dbed72c1e0c6..1eb4e613b27a 100644
> > --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
> > @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void
> *handle)
> >   {
> >   	struct pp_hwmgr *hwmgr = handle;
> >   	struct amdgpu_device *adev = hwmgr->adev;
> > -	int i = 0;
> > -
> > -	if (adev->mode_info.num_crtc)
> > -		amdgpu_display_bandwidth_update(adev);
> > -
> > -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> > -		struct amdgpu_ring *ring = adev->rings[i];
> > -		if (ring && ring->sched.ready)
> > -			amdgpu_fence_wait_empty(ring);
> > -	}
> >
> >   	if (!amdgpu_device_has_dc_support(adev)) {
> >   		amdgpu_dpm_get_active_displays(adev);
> >

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
  2022-04-11  9:25   ` Quan, Evan
@ 2022-04-11  9:47     ` Lazar, Lijo
  0 siblings, 0 replies; 10+ messages in thread
From: Lazar, Lijo @ 2022-04-11  9:47 UTC (permalink / raw)
  To: Quan, Evan, amd-gfx; +Cc: Deucher, Alexander, arthur.marsh



On 4/11/2022 2:55 PM, Quan, Evan wrote:
> [AMD Official Use Only]
> 
> 
> 
>> -----Original Message-----
>> From: Lazar, Lijo <Lijo.Lazar@amd.com>
>> Sent: Monday, April 11, 2022 5:05 PM
>> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>;
>> arthur.marsh@internode.on.net
>> Subject: Re: [PATCH] drm/amd/pm: fix the deadlock issue observed on SI
>>
>>
>>
>> On 4/11/2022 1:17 PM, Evan Quan wrote:
>>> By placing those unrelated code outside of adev->pm.mutex protections
>>> or restructuring the call flow, we can eliminate the deadlock issue.
>>> This comes with no real logics change.
>>>
>>> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in
>>> amdgpu_dpm.c")
>>>
>>> Signed-off-by: Evan Quan <evan.quan@amd.com>
>>> Change-Id: I75de4350d9c2517aba0d6adc12e1bc69430fd800
>>> ---
>>>    drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 39
>> +++++++++++++++++++
>>>    .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    | 10 -----
>>>    drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 35 -----------------
>>>    .../gpu/drm/amd/pm/powerplay/amd_powerplay.c  | 10 -----
>>>    4 files changed, 39 insertions(+), 55 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> index 5504d81c77b7..72e7b5d40af6 100644
>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct
>> amdgpu_device *adev, enum amd_pp_sensors senso
>>>    void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
>>>    {
>>>    	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
>>> +	int i;
>>>
>>>    	if (!adev->pm.dpm_enabled)
>>>    		return;
>>> @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct
>> amdgpu_device *adev)
>>>    	if (!pp_funcs->pm_compute_clocks)
>>>    		return;
>>>
>>> +	if (adev->mode_info.num_crtc)
>>> +		amdgpu_display_bandwidth_update(adev);
>>> +
>>> +	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
>>> +		struct amdgpu_ring *ring = adev->rings[i];
>>> +		if (ring && ring->sched.ready)
>>> +			amdgpu_fence_wait_empty(ring);
>>> +	}
>>> +
>>>    	mutex_lock(&adev->pm.mutex);
>>>    	pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
>>>    	mutex_unlock(&adev->pm.mutex);
>>> @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct
>> amdgpu_device *adev, bool enable)
>>>    {
>>>    	int ret = 0;
>>>
>>> +	if (adev->family == AMDGPU_FAMILY_SI) {
>>
>> Bringing family specific code to this layer is not the proper fix. Need to specify
>> what is the deadlock and why can't it be handled in family specific
>> implementation.
> [Quan, Evan] Please check V2. I added some details about this deadlock there.
> For those legacy ASICs, let's make the solution simple and straight forward.
> 

Now it does watermark programming outside of lock. I don't think that is 
the right assumption.

Thanks,
Lijo

> Thanks,
> Evan
>>
>> Thanks,
>> Lijo
>>
>>> +		mutex_lock(&adev->pm.mutex);
>>> +		if (enable) {
>>> +			adev->pm.dpm.uvd_active = true;
>>> +			adev->pm.dpm.state =
>> POWER_STATE_TYPE_INTERNAL_UVD;
>>> +		} else {
>>> +			adev->pm.dpm.uvd_active = false;
>>> +		}
>>> +		mutex_unlock(&adev->pm.mutex);
>>> +
>>> +		amdgpu_dpm_compute_clocks(adev);
>>> +		return;
>>> +	}
>>> +
>>>    	ret = amdgpu_dpm_set_powergating_by_smu(adev,
>> AMD_IP_BLOCK_TYPE_UVD, !enable);
>>>    	if (ret)
>>>    		DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6
>> +477,21 @@
>>> void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
>>>    {
>>>    	int ret = 0;
>>>
>>> +	if (adev->family == AMDGPU_FAMILY_SI) {
>>> +		mutex_lock(&adev->pm.mutex);
>>> +		if (enable) {
>>> +			adev->pm.dpm.vce_active = true;
>>> +			/* XXX select vce level based on ring/task */
>>> +			adev->pm.dpm.vce_level =
>> AMD_VCE_LEVEL_AC_ALL;
>>> +		} else {
>>> +			adev->pm.dpm.vce_active = false;
>>> +		}
>>> +		mutex_unlock(&adev->pm.mutex);
>>> +
>>> +		amdgpu_dpm_compute_clocks(adev);
>>> +		return;
>>> +	}
>>> +
>>>    	ret = amdgpu_dpm_set_powergating_by_smu(adev,
>> AMD_IP_BLOCK_TYPE_VCE, !enable);
>>>    	if (ret)
>>>    		DRM_ERROR("Dpm %s vce failed, ret = %d. \n", diff --git
>>> a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
>>> b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
>>> index 9613c6181c17..d3fe149d8476 100644
>>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
>>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
>>> @@ -1028,16 +1028,6 @@ static int
>> amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
>>>    void amdgpu_legacy_dpm_compute_clocks(void *handle)
>>>    {
>>>    	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>> -	int i = 0;
>>> -
>>> -	if (adev->mode_info.num_crtc)
>>> -		amdgpu_display_bandwidth_update(adev);
>>> -
>>> -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
>>> -		struct amdgpu_ring *ring = adev->rings[i];
>>> -		if (ring && ring->sched.ready)
>>> -			amdgpu_fence_wait_empty(ring);
>>> -	}
>>>
>>>    	amdgpu_dpm_get_active_displays(adev);
>>>
>>> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
>>> b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
>>> index caae54487f9c..633dab14f51c 100644
>>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
>>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
>>> @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct
>> amdgpu_device *adev)
>>>    }
>>>    #endif
>>>
>>> -static int si_set_powergating_by_smu(void *handle,
>>> -				     uint32_t block_type,
>>> -				     bool gate)
>>> -{
>>> -	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>> -
>>> -	switch (block_type) {
>>> -	case AMD_IP_BLOCK_TYPE_UVD:
>>> -		if (!gate) {
>>> -			adev->pm.dpm.uvd_active = true;
>>> -			adev->pm.dpm.state =
>> POWER_STATE_TYPE_INTERNAL_UVD;
>>> -		} else {
>>> -			adev->pm.dpm.uvd_active = false;
>>> -		}
>>> -
>>> -		amdgpu_legacy_dpm_compute_clocks(handle);
>>> -		break;
>>> -	case AMD_IP_BLOCK_TYPE_VCE:
>>> -		if (!gate) {
>>> -			adev->pm.dpm.vce_active = true;
>>> -			/* XXX select vce level based on ring/task */
>>> -			adev->pm.dpm.vce_level =
>> AMD_VCE_LEVEL_AC_ALL;
>>> -		} else {
>>> -			adev->pm.dpm.vce_active = false;
>>> -		}
>>> -
>>> -		amdgpu_legacy_dpm_compute_clocks(handle);
>>> -		break;
>>> -	default:
>>> -		break;
>>> -	}
>>> -	return 0;
>>> -}
>>> -
>>>    static int si_set_sw_state(struct amdgpu_device *adev)
>>>    {
>>>    	return (amdgpu_si_send_msg_to_smc(adev,
>> PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
>>> @@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs
>> = {
>>>    	.print_power_state = &si_dpm_print_power_state,
>>>    	.debugfs_print_current_performance_level =
>> &si_dpm_debugfs_print_current_performance_level,
>>>    	.force_performance_level = &si_dpm_force_performance_level,
>>> -	.set_powergating_by_smu = &si_set_powergating_by_smu,
>>>    	.vblank_too_short = &si_dpm_vblank_too_short,
>>>    	.set_fan_control_mode = &si_dpm_set_fan_control_mode,
>>>    	.get_fan_control_mode = &si_dpm_get_fan_control_mode, diff --
>> git
>>> a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
>>> b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
>>> index dbed72c1e0c6..1eb4e613b27a 100644
>>> --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
>>> +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
>>> @@ -1503,16 +1503,6 @@ static void pp_pm_compute_clocks(void
>> *handle)
>>>    {
>>>    	struct pp_hwmgr *hwmgr = handle;
>>>    	struct amdgpu_device *adev = hwmgr->adev;
>>> -	int i = 0;
>>> -
>>> -	if (adev->mode_info.num_crtc)
>>> -		amdgpu_display_bandwidth_update(adev);
>>> -
>>> -	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
>>> -		struct amdgpu_ring *ring = adev->rings[i];
>>> -		if (ring && ring->sched.ready)
>>> -			amdgpu_fence_wait_empty(ring);
>>> -	}
>>>
>>>    	if (!amdgpu_device_has_dc_support(adev)) {
>>>    		amdgpu_dpm_get_active_displays(adev);
>>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-04-11 13:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-11  7:47 [PATCH] drm/amd/pm: fix the deadlock issue observed on SI Evan Quan
2022-04-11  7:59 ` Paul Menzel
2022-04-11  7:59   ` Paul Menzel
2022-04-11  8:30   ` Thorsten Leemhuis
2022-04-11  8:30     ` Thorsten Leemhuis
2022-04-11  8:55   ` Quan, Evan
2022-04-11  8:55     ` Quan, Evan
2022-04-11  9:05 ` Lazar, Lijo
2022-04-11  9:25   ` Quan, Evan
2022-04-11  9:47     ` Lazar, Lijo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.