All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-09-03  3:44 Quan, Evan
       [not found] ` <20190903034336.16005-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Quan, Evan @ 2019-09-03  3:44 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Li, Candice, Gui, Jack, Quan, Evan

These are needed for smu_reset support.

Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 17 +++++++++++++++++
 drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  3 +++
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c      | 10 ++++++++++
 3 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index d5ee13a78eb7..3cf8d944f890 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
 	return ret;
 }
 
+static int smu_stop_dpms(struct smu_context *smu)
+{
+	return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures);
+}
+
 static int smu_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
 		smu_powergate_vcn(&adev->smu, true);
 	}
 
+	ret = smu_stop_thermal_control(smu);
+	if (ret) {
+		pr_warn("Fail to stop thermal control!\n");
+		return ret;
+	}
+
+	ret = smu_stop_dpms(smu);
+	if (ret) {
+		pr_warn("Fail to stop Dpms!\n");
+		return ret;
+	}
+
 	kfree(table_context->driver_pptable);
 	table_context->driver_pptable = NULL;
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index b19224cb6d6d..8e4b0ad24712 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -498,6 +498,7 @@ struct smu_funcs
 	int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
 	int (*init_max_sustainable_clocks)(struct smu_context *smu);
 	int (*start_thermal_control)(struct smu_context *smu);
+	int (*stop_thermal_control)(struct smu_context *smu);
 	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
 			   void *data, uint32_t *size);
 	int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk);
@@ -647,6 +648,8 @@ struct smu_funcs
 	((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
 #define smu_start_thermal_control(smu) \
 	((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
+#define smu_stop_thermal_control(smu) \
+	((smu)->funcs->stop_thermal_control? (smu)->funcs->stop_thermal_control((smu)) : 0)
 #define smu_read_sensor(smu, sensor, data, size) \
 	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)
 #define smu_smc_read_sensor(smu, sensor, data, size) \
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index db5e94ce54af..1a38af84394e 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
 	return ret;
 }
 
+static int smu_v11_0_stop_thermal_control(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
+
+	return 0;
+}
+
 static uint16_t convert_to_vddc(uint8_t vid)
 {
 	return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE);
@@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
 	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
 	.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
 	.start_thermal_control = smu_v11_0_start_thermal_control,
+	.stop_thermal_control = smu_v11_0_stop_thermal_control,
 	.read_sensor = smu_v11_0_read_sensor,
 	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
 	.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
-- 
2.23.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* RE: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
       [not found] ` <20190903034336.16005-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
@ 2019-09-03  5:16   ` Gui, Jack
  2019-09-08 23:05   ` Deucher, Alexander
  2019-11-08 20:50     ` Matt Coffin
  2 siblings, 0 replies; 9+ messages in thread
From: Gui, Jack @ 2019-09-03  5:16 UTC (permalink / raw)
  To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Li, Candice

Reviewed-by: Jack Gui <Jack.Gui@amd.com>

-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com> 
Sent: Tuesday, September 3, 2019 11:44 AM
To: amd-gfx@lists.freedesktop.org
Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Quan, Evan <Evan.Quan@amd.com>
Subject: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini

These are needed for smu_reset support.

Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 17 +++++++++++++++++
 drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  3 +++
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c      | 10 ++++++++++
 3 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index d5ee13a78eb7..3cf8d944f890 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
 	return ret;
 }
 
+static int smu_stop_dpms(struct smu_context *smu) {
+	return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
+
 static int smu_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
 		smu_powergate_vcn(&adev->smu, true);
 	}
 
+	ret = smu_stop_thermal_control(smu);
+	if (ret) {
+		pr_warn("Fail to stop thermal control!\n");
+		return ret;
+	}
+
+	ret = smu_stop_dpms(smu);
+	if (ret) {
+		pr_warn("Fail to stop Dpms!\n");
+		return ret;
+	}
+
 	kfree(table_context->driver_pptable);
 	table_context->driver_pptable = NULL;
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index b19224cb6d6d..8e4b0ad24712 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -498,6 +498,7 @@ struct smu_funcs
 	int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
 	int (*init_max_sustainable_clocks)(struct smu_context *smu);
 	int (*start_thermal_control)(struct smu_context *smu);
+	int (*stop_thermal_control)(struct smu_context *smu);
 	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
 			   void *data, uint32_t *size);
 	int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk); @@ -647,6 +648,8 @@ struct smu_funcs
 	((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)  #define smu_start_thermal_control(smu) \
 	((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
+#define smu_stop_thermal_control(smu) \
+	((smu)->funcs->stop_thermal_control? 
+(smu)->funcs->stop_thermal_control((smu)) : 0)
 #define smu_read_sensor(smu, sensor, data, size) \
 	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)  #define smu_smc_read_sensor(smu, sensor, data, size) \ diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index db5e94ce54af..1a38af84394e 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
 	return ret;
 }
 
+static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
+	struct amdgpu_device *adev = smu->adev;
+
+	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
+
+	return 0;
+}
+
 static uint16_t convert_to_vddc(uint8_t vid)  {
 	return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
 	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
 	.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
 	.start_thermal_control = smu_v11_0_start_thermal_control,
+	.stop_thermal_control = smu_v11_0_stop_thermal_control,
 	.read_sensor = smu_v11_0_read_sensor,
 	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
 	.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
--
2.23.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
       [not found] ` <20190903034336.16005-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
  2019-09-03  5:16   ` Gui, Jack
@ 2019-09-08 23:05   ` Deucher, Alexander
  2019-11-08 20:50     ` Matt Coffin
  2 siblings, 0 replies; 9+ messages in thread
From: Deucher, Alexander @ 2019-09-08 23:05 UTC (permalink / raw)
  To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Li, Candice, Gui, Jack


[-- Attachment #1.1: Type: text/plain, Size: 5213 bytes --]

We should probably also add the callbacks for set_mp1_state like we have for the older powerplay code so we properly send the SMU shutdown or reset message on GPU reset and driver unload.

Alex

________________________________
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> on behalf of Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>
Sent: Monday, September 2, 2019 11:44 PM
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Cc: Li, Candice <Candice.Li-5C7GfCeVMHo@public.gmane.org>; Gui, Jack <Jack.Gui-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>
Subject: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini

These are needed for smu_reset support.

Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
Signed-off-by: Evan Quan <evan.quan-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 17 +++++++++++++++++
 drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  3 +++
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c      | 10 ++++++++++
 3 files changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index d5ee13a78eb7..3cf8d944f890 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
         return ret;
 }

+static int smu_stop_dpms(struct smu_context *smu)
+{
+       return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures);
+}
+
 static int smu_hw_fini(void *handle)
 {
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
                 smu_powergate_vcn(&adev->smu, true);
         }

+       ret = smu_stop_thermal_control(smu);
+       if (ret) {
+               pr_warn("Fail to stop thermal control!\n");
+               return ret;
+       }
+
+       ret = smu_stop_dpms(smu);
+       if (ret) {
+               pr_warn("Fail to stop Dpms!\n");
+               return ret;
+       }
+
         kfree(table_context->driver_pptable);
         table_context->driver_pptable = NULL;

diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index b19224cb6d6d..8e4b0ad24712 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -498,6 +498,7 @@ struct smu_funcs
         int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
         int (*init_max_sustainable_clocks)(struct smu_context *smu);
         int (*start_thermal_control)(struct smu_context *smu);
+       int (*stop_thermal_control)(struct smu_context *smu);
         int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
                            void *data, uint32_t *size);
         int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk);
@@ -647,6 +648,8 @@ struct smu_funcs
         ((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
 #define smu_start_thermal_control(smu) \
         ((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
+#define smu_stop_thermal_control(smu) \
+       ((smu)->funcs->stop_thermal_control? (smu)->funcs->stop_thermal_control((smu)) : 0)
 #define smu_read_sensor(smu, sensor, data, size) \
         ((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)
 #define smu_smc_read_sensor(smu, sensor, data, size) \
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index db5e94ce54af..1a38af84394e 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
         return ret;
 }

+static int smu_v11_0_stop_thermal_control(struct smu_context *smu)
+{
+       struct amdgpu_device *adev = smu->adev;
+
+       WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
+
+       return 0;
+}
+
 static uint16_t convert_to_vddc(uint8_t vid)
 {
         return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE);
@@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
         .get_current_clk_freq = smu_v11_0_get_current_clk_freq,
         .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
         .start_thermal_control = smu_v11_0_start_thermal_control,
+       .stop_thermal_control = smu_v11_0_stop_thermal_control,
         .read_sensor = smu_v11_0_read_sensor,
         .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
         .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
--
2.23.0

_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 8983 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-08 20:50     ` Matt Coffin
  0 siblings, 0 replies; 9+ messages in thread
From: Matt Coffin @ 2019-11-08 20:50 UTC (permalink / raw)
  To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alex Deucher, Li, Candice, Gui, Jack

Hey guys,



This patch caused some kind of reversion with smu_reset on Navi10. I'm
no expert since everything I know comes from just reading through the
code, so this could be some kind of intended behavior, but after this
patch, if you write a pptable to the sysfs pp_table interface on navi10,
then the SMU will fail to reset successfully, and the result is
seemingly an unrecoverable situation.



I put in a report on bugzilla with dmesg logs
:
https://bugs.freedesktop.org/show_bug.cgi?id=112234


Finding this change was the result of a bisect to find where the issue
started, and reverting the changes to smu_hw_fini resolved the issue.
Any advice on possible proper fixes?

Thanks in advance,

Matt

On 9/2/19 9:44 PM, Quan, Evan wrote:
> These are needed for smu_reset support.
> 
> Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> ---
>  drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 17 +++++++++++++++++
>  drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  3 +++
>  drivers/gpu/drm/amd/powerplay/smu_v11_0.c      | 10 ++++++++++
>  3 files changed, 30 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> index d5ee13a78eb7..3cf8d944f890 100644
> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
>  	return ret;
>  }
>  
> +static int smu_stop_dpms(struct smu_context *smu)
> +{
> +	return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures);
> +}
> +
>  static int smu_hw_fini(void *handle)
>  {
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
>  		smu_powergate_vcn(&adev->smu, true);
>  	}
>  
> +	ret = smu_stop_thermal_control(smu);
> +	if (ret) {
> +		pr_warn("Fail to stop thermal control!\n");
> +		return ret;
> +	}
> +
> +	ret = smu_stop_dpms(smu);
> +	if (ret) {
> +		pr_warn("Fail to stop Dpms!\n");
> +		return ret;
> +	}
> +
>  	kfree(table_context->driver_pptable);
>  	table_context->driver_pptable = NULL;
>  
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> index b19224cb6d6d..8e4b0ad24712 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> @@ -498,6 +498,7 @@ struct smu_funcs
>  	int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
>  	int (*init_max_sustainable_clocks)(struct smu_context *smu);
>  	int (*start_thermal_control)(struct smu_context *smu);
> +	int (*stop_thermal_control)(struct smu_context *smu);
>  	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
>  			   void *data, uint32_t *size);
>  	int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk);
> @@ -647,6 +648,8 @@ struct smu_funcs
>  	((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
>  #define smu_start_thermal_control(smu) \
>  	((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
> +#define smu_stop_thermal_control(smu) \
> +	((smu)->funcs->stop_thermal_control? (smu)->funcs->stop_thermal_control((smu)) : 0)
>  #define smu_read_sensor(smu, sensor, data, size) \
>  	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)
>  #define smu_smc_read_sensor(smu, sensor, data, size) \
> diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> index db5e94ce54af..1a38af84394e 100644
> --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> @@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
>  	return ret;
>  }
>  
> +static int smu_v11_0_stop_thermal_control(struct smu_context *smu)
> +{
> +	struct amdgpu_device *adev = smu->adev;
> +
> +	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
> +
> +	return 0;
> +}
> +
>  static uint16_t convert_to_vddc(uint8_t vid)
>  {
>  	return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE);
> @@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
>  	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
>  	.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
>  	.start_thermal_control = smu_v11_0_start_thermal_control,
> +	.stop_thermal_control = smu_v11_0_stop_thermal_control,
>  	.read_sensor = smu_v11_0_read_sensor,
>  	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
>  	.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-08 20:50     ` Matt Coffin
  0 siblings, 0 replies; 9+ messages in thread
From: Matt Coffin @ 2019-11-08 20:50 UTC (permalink / raw)
  To: Quan, Evan, amd-gfx; +Cc: Alex Deucher, Li, Candice, Gui, Jack

Hey guys,



This patch caused some kind of reversion with smu_reset on Navi10. I'm
no expert since everything I know comes from just reading through the
code, so this could be some kind of intended behavior, but after this
patch, if you write a pptable to the sysfs pp_table interface on navi10,
then the SMU will fail to reset successfully, and the result is
seemingly an unrecoverable situation.



I put in a report on bugzilla with dmesg logs
:
https://bugs.freedesktop.org/show_bug.cgi?id=112234


Finding this change was the result of a bisect to find where the issue
started, and reverting the changes to smu_hw_fini resolved the issue.
Any advice on possible proper fixes?

Thanks in advance,

Matt

On 9/2/19 9:44 PM, Quan, Evan wrote:
> These are needed for smu_reset support.
> 
> Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> ---
>  drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 17 +++++++++++++++++
>  drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  3 +++
>  drivers/gpu/drm/amd/powerplay/smu_v11_0.c      | 10 ++++++++++
>  3 files changed, 30 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> index d5ee13a78eb7..3cf8d944f890 100644
> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
>  	return ret;
>  }
>  
> +static int smu_stop_dpms(struct smu_context *smu)
> +{
> +	return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures);
> +}
> +
>  static int smu_hw_fini(void *handle)
>  {
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
>  		smu_powergate_vcn(&adev->smu, true);
>  	}
>  
> +	ret = smu_stop_thermal_control(smu);
> +	if (ret) {
> +		pr_warn("Fail to stop thermal control!\n");
> +		return ret;
> +	}
> +
> +	ret = smu_stop_dpms(smu);
> +	if (ret) {
> +		pr_warn("Fail to stop Dpms!\n");
> +		return ret;
> +	}
> +
>  	kfree(table_context->driver_pptable);
>  	table_context->driver_pptable = NULL;
>  
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> index b19224cb6d6d..8e4b0ad24712 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> @@ -498,6 +498,7 @@ struct smu_funcs
>  	int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
>  	int (*init_max_sustainable_clocks)(struct smu_context *smu);
>  	int (*start_thermal_control)(struct smu_context *smu);
> +	int (*stop_thermal_control)(struct smu_context *smu);
>  	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
>  			   void *data, uint32_t *size);
>  	int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk);
> @@ -647,6 +648,8 @@ struct smu_funcs
>  	((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
>  #define smu_start_thermal_control(smu) \
>  	((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
> +#define smu_stop_thermal_control(smu) \
> +	((smu)->funcs->stop_thermal_control? (smu)->funcs->stop_thermal_control((smu)) : 0)
>  #define smu_read_sensor(smu, sensor, data, size) \
>  	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)
>  #define smu_smc_read_sensor(smu, sensor, data, size) \
> diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> index db5e94ce54af..1a38af84394e 100644
> --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> @@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
>  	return ret;
>  }
>  
> +static int smu_v11_0_stop_thermal_control(struct smu_context *smu)
> +{
> +	struct amdgpu_device *adev = smu->adev;
> +
> +	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
> +
> +	return 0;
> +}
> +
>  static uint16_t convert_to_vddc(uint8_t vid)
>  {
>  	return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE);
> @@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
>  	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
>  	.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
>  	.start_thermal_control = smu_v11_0_start_thermal_control,
> +	.stop_thermal_control = smu_v11_0_stop_thermal_control,
>  	.read_sensor = smu_v11_0_read_sensor,
>  	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
>  	.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-11  9:28         ` Quan, Evan
  0 siblings, 0 replies; 9+ messages in thread
From: Quan, Evan @ 2019-11-11  9:28 UTC (permalink / raw)
  To: Matt Coffin, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alex Deucher, Li, Candice, Gui, Jack

Just sent out a patch which should be able to address this issue.
https://lists.freedesktop.org/archives/amd-gfx/2019-November/042458.html

Regards,
Evan
> -----Original Message-----
> From: Matt Coffin <mcoffin13@gmail.com>
> Sent: Saturday, November 9, 2019 4:50 AM
> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Alex
> Deucher <alexdeucher@gmail.com>
> Subject: Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
> 
> Hey guys,
> 
> 
> 
> This patch caused some kind of reversion with smu_reset on Navi10. I'm no
> expert since everything I know comes from just reading through the code, so
> this could be some kind of intended behavior, but after this patch, if you write a
> pptable to the sysfs pp_table interface on navi10, then the SMU will fail to reset
> successfully, and the result is seemingly an unrecoverable situation.
> 
> 
> 
> I put in a report on bugzilla with dmesg logs
> :
> https://bugs.freedesktop.org/show_bug.cgi?id=112234
> 
> 
> Finding this change was the result of a bisect to find where the issue started,
> and reverting the changes to smu_hw_fini resolved the issue.
> Any advice on possible proper fixes?
> 
> Thanks in advance,
> 
> Matt
> 
> On 9/2/19 9:44 PM, Quan, Evan wrote:
> > These are needed for smu_reset support.
> >
> > Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > ---
> >  drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 17
> +++++++++++++++++
> >  drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  3 +++
> >  drivers/gpu/drm/amd/powerplay/smu_v11_0.c      | 10 ++++++++++
> >  3 files changed, 30 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > index d5ee13a78eb7..3cf8d944f890 100644
> > --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
> >  	return ret;
> >  }
> >
> > +static int smu_stop_dpms(struct smu_context *smu) {
> > +	return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
> > +
> >  static int smu_hw_fini(void *handle)
> >  {
> >  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
> > -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
> >  		smu_powergate_vcn(&adev->smu, true);
> >  	}
> >
> > +	ret = smu_stop_thermal_control(smu);
> > +	if (ret) {
> > +		pr_warn("Fail to stop thermal control!\n");
> > +		return ret;
> > +	}
> > +
> > +	ret = smu_stop_dpms(smu);
> > +	if (ret) {
> > +		pr_warn("Fail to stop Dpms!\n");
> > +		return ret;
> > +	}
> > +
> >  	kfree(table_context->driver_pptable);
> >  	table_context->driver_pptable = NULL;
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > index b19224cb6d6d..8e4b0ad24712 100644
> > --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > @@ -498,6 +498,7 @@ struct smu_funcs
> >  	int (*get_current_clk_freq)(struct smu_context *smu, enum
> smu_clk_type clk_id, uint32_t *value);
> >  	int (*init_max_sustainable_clocks)(struct smu_context *smu);
> >  	int (*start_thermal_control)(struct smu_context *smu);
> > +	int (*stop_thermal_control)(struct smu_context *smu);
> >  	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors
> sensor,
> >  			   void *data, uint32_t *size);
> >  	int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t
> > clk); @@ -647,6 +648,8 @@ struct smu_funcs
> >  	((smu)->ppt_funcs->set_thermal_fan_table ?
> > (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)  #define
> smu_start_thermal_control(smu) \
> >  	((smu)->funcs->start_thermal_control?
> > (smu)->funcs->start_thermal_control((smu)) : 0)
> > +#define smu_stop_thermal_control(smu) \
> > +	((smu)->funcs->stop_thermal_control?
> > +(smu)->funcs->stop_thermal_control((smu)) : 0)
> >  #define smu_read_sensor(smu, sensor, data, size) \
> >  	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs-
> >read_sensor((smu),
> > (sensor), (data), (size)) : 0)  #define smu_smc_read_sensor(smu,
> > sensor, data, size) \ diff --git
> > a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > index db5e94ce54af..1a38af84394e 100644
> > --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > @@ -1209,6 +1209,15 @@ static int
> smu_v11_0_start_thermal_control(struct smu_context *smu)
> >  	return ret;
> >  }
> >
> > +static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
> > +	struct amdgpu_device *adev = smu->adev;
> > +
> > +	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
> > +
> > +	return 0;
> > +}
> > +
> >  static uint16_t convert_to_vddc(uint8_t vid)  {
> >  	return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@
> > -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
> >  	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
> >  	.init_max_sustainable_clocks =
> smu_v11_0_init_max_sustainable_clocks,
> >  	.start_thermal_control = smu_v11_0_start_thermal_control,
> > +	.stop_thermal_control = smu_v11_0_stop_thermal_control,
> >  	.read_sensor = smu_v11_0_read_sensor,
> >  	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
> >  	.display_clock_voltage_request =
> > smu_v11_0_display_clock_voltage_request,
> >
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-11  9:28         ` Quan, Evan
  0 siblings, 0 replies; 9+ messages in thread
From: Quan, Evan @ 2019-11-11  9:28 UTC (permalink / raw)
  To: Matt Coffin, amd-gfx; +Cc: Alex Deucher, Li, Candice, Gui, Jack

Just sent out a patch which should be able to address this issue.
https://lists.freedesktop.org/archives/amd-gfx/2019-November/042458.html

Regards,
Evan
> -----Original Message-----
> From: Matt Coffin <mcoffin13@gmail.com>
> Sent: Saturday, November 9, 2019 4:50 AM
> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Alex
> Deucher <alexdeucher@gmail.com>
> Subject: Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
> 
> Hey guys,
> 
> 
> 
> This patch caused some kind of reversion with smu_reset on Navi10. I'm no
> expert since everything I know comes from just reading through the code, so
> this could be some kind of intended behavior, but after this patch, if you write a
> pptable to the sysfs pp_table interface on navi10, then the SMU will fail to reset
> successfully, and the result is seemingly an unrecoverable situation.
> 
> 
> 
> I put in a report on bugzilla with dmesg logs
> :
> https://bugs.freedesktop.org/show_bug.cgi?id=112234
> 
> 
> Finding this change was the result of a bisect to find where the issue started,
> and reverting the changes to smu_hw_fini resolved the issue.
> Any advice on possible proper fixes?
> 
> Thanks in advance,
> 
> Matt
> 
> On 9/2/19 9:44 PM, Quan, Evan wrote:
> > These are needed for smu_reset support.
> >
> > Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > ---
> >  drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 17
> +++++++++++++++++
> >  drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  3 +++
> >  drivers/gpu/drm/amd/powerplay/smu_v11_0.c      | 10 ++++++++++
> >  3 files changed, 30 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > index d5ee13a78eb7..3cf8d944f890 100644
> > --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
> >  	return ret;
> >  }
> >
> > +static int smu_stop_dpms(struct smu_context *smu) {
> > +	return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
> > +
> >  static int smu_hw_fini(void *handle)
> >  {
> >  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
> > -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
> >  		smu_powergate_vcn(&adev->smu, true);
> >  	}
> >
> > +	ret = smu_stop_thermal_control(smu);
> > +	if (ret) {
> > +		pr_warn("Fail to stop thermal control!\n");
> > +		return ret;
> > +	}
> > +
> > +	ret = smu_stop_dpms(smu);
> > +	if (ret) {
> > +		pr_warn("Fail to stop Dpms!\n");
> > +		return ret;
> > +	}
> > +
> >  	kfree(table_context->driver_pptable);
> >  	table_context->driver_pptable = NULL;
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > index b19224cb6d6d..8e4b0ad24712 100644
> > --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > @@ -498,6 +498,7 @@ struct smu_funcs
> >  	int (*get_current_clk_freq)(struct smu_context *smu, enum
> smu_clk_type clk_id, uint32_t *value);
> >  	int (*init_max_sustainable_clocks)(struct smu_context *smu);
> >  	int (*start_thermal_control)(struct smu_context *smu);
> > +	int (*stop_thermal_control)(struct smu_context *smu);
> >  	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors
> sensor,
> >  			   void *data, uint32_t *size);
> >  	int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t
> > clk); @@ -647,6 +648,8 @@ struct smu_funcs
> >  	((smu)->ppt_funcs->set_thermal_fan_table ?
> > (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)  #define
> smu_start_thermal_control(smu) \
> >  	((smu)->funcs->start_thermal_control?
> > (smu)->funcs->start_thermal_control((smu)) : 0)
> > +#define smu_stop_thermal_control(smu) \
> > +	((smu)->funcs->stop_thermal_control?
> > +(smu)->funcs->stop_thermal_control((smu)) : 0)
> >  #define smu_read_sensor(smu, sensor, data, size) \
> >  	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs-
> >read_sensor((smu),
> > (sensor), (data), (size)) : 0)  #define smu_smc_read_sensor(smu,
> > sensor, data, size) \ diff --git
> > a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > index db5e94ce54af..1a38af84394e 100644
> > --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > @@ -1209,6 +1209,15 @@ static int
> smu_v11_0_start_thermal_control(struct smu_context *smu)
> >  	return ret;
> >  }
> >
> > +static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
> > +	struct amdgpu_device *adev = smu->adev;
> > +
> > +	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
> > +
> > +	return 0;
> > +}
> > +
> >  static uint16_t convert_to_vddc(uint8_t vid)  {
> >  	return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@
> > -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
> >  	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
> >  	.init_max_sustainable_clocks =
> smu_v11_0_init_max_sustainable_clocks,
> >  	.start_thermal_control = smu_v11_0_start_thermal_control,
> > +	.stop_thermal_control = smu_v11_0_stop_thermal_control,
> >  	.read_sensor = smu_v11_0_read_sensor,
> >  	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
> >  	.display_clock_voltage_request =
> > smu_v11_0_display_clock_voltage_request,
> >
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-11 23:28             ` Matt Coffin
  0 siblings, 0 replies; 9+ messages in thread
From: Matt Coffin @ 2019-11-11 23:28 UTC (permalink / raw)
  To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alex Deucher, Li, Candice, Gui, Jack

Thanks Evan. I can confirm that the linked patch resolves the issue for me.

I commented and resolved the bug as well in case other people find it.

Cheers,
Matt

On 11/11/19 2:28 AM, Quan, Evan wrote:
> Just sent out a patch which should be able to address this issue.
> https://lists.freedesktop.org/archives/amd-gfx/2019-November/042458.html
> 
> Regards,
> Evan
>> -----Original Message-----
>> From: Matt Coffin <mcoffin13@gmail.com>
>> Sent: Saturday, November 9, 2019 4:50 AM
>> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Alex
>> Deucher <alexdeucher@gmail.com>
>> Subject: Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
>>
>> Hey guys,
>>
>>
>>
>> This patch caused some kind of reversion with smu_reset on Navi10. I'm no
>> expert since everything I know comes from just reading through the code, so
>> this could be some kind of intended behavior, but after this patch, if you write a
>> pptable to the sysfs pp_table interface on navi10, then the SMU will fail to reset
>> successfully, and the result is seemingly an unrecoverable situation.
>>
>>
>>
>> I put in a report on bugzilla with dmesg logs
>> :
>> https://bugs.freedesktop.org/show_bug.cgi?id=112234
>>
>>
>> Finding this change was the result of a bisect to find where the issue started,
>> and reverting the changes to smu_hw_fini resolved the issue.
>> Any advice on possible proper fixes?
>>
>> Thanks in advance,
>>
>> Matt
>>
>> On 9/2/19 9:44 PM, Quan, Evan wrote:
>>> These are needed for smu_reset support.
>>>
>>> Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
>>> Signed-off-by: Evan Quan <evan.quan@amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 17
>> +++++++++++++++++
>>>  drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  3 +++
>>>  drivers/gpu/drm/amd/powerplay/smu_v11_0.c      | 10 ++++++++++
>>>  3 files changed, 30 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> index d5ee13a78eb7..3cf8d944f890 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
>>>  	return ret;
>>>  }
>>>
>>> +static int smu_stop_dpms(struct smu_context *smu) {
>>> +	return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
>>> +
>>>  static int smu_hw_fini(void *handle)
>>>  {
>>>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
>>> -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
>>>  		smu_powergate_vcn(&adev->smu, true);
>>>  	}
>>>
>>> +	ret = smu_stop_thermal_control(smu);
>>> +	if (ret) {
>>> +		pr_warn("Fail to stop thermal control!\n");
>>> +		return ret;
>>> +	}
>>> +
>>> +	ret = smu_stop_dpms(smu);
>>> +	if (ret) {
>>> +		pr_warn("Fail to stop Dpms!\n");
>>> +		return ret;
>>> +	}
>>> +
>>>  	kfree(table_context->driver_pptable);
>>>  	table_context->driver_pptable = NULL;
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> index b19224cb6d6d..8e4b0ad24712 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> @@ -498,6 +498,7 @@ struct smu_funcs
>>>  	int (*get_current_clk_freq)(struct smu_context *smu, enum
>> smu_clk_type clk_id, uint32_t *value);
>>>  	int (*init_max_sustainable_clocks)(struct smu_context *smu);
>>>  	int (*start_thermal_control)(struct smu_context *smu);
>>> +	int (*stop_thermal_control)(struct smu_context *smu);
>>>  	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors
>> sensor,
>>>  			   void *data, uint32_t *size);
>>>  	int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t
>>> clk); @@ -647,6 +648,8 @@ struct smu_funcs
>>>  	((smu)->ppt_funcs->set_thermal_fan_table ?
>>> (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)  #define
>> smu_start_thermal_control(smu) \
>>>  	((smu)->funcs->start_thermal_control?
>>> (smu)->funcs->start_thermal_control((smu)) : 0)
>>> +#define smu_stop_thermal_control(smu) \
>>> +	((smu)->funcs->stop_thermal_control?
>>> +(smu)->funcs->stop_thermal_control((smu)) : 0)
>>>  #define smu_read_sensor(smu, sensor, data, size) \
>>>  	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs-
>>> read_sensor((smu),
>>> (sensor), (data), (size)) : 0)  #define smu_smc_read_sensor(smu,
>>> sensor, data, size) \ diff --git
>>> a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> index db5e94ce54af..1a38af84394e 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> @@ -1209,6 +1209,15 @@ static int
>> smu_v11_0_start_thermal_control(struct smu_context *smu)
>>>  	return ret;
>>>  }
>>>
>>> +static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
>>> +	struct amdgpu_device *adev = smu->adev;
>>> +
>>> +	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>  static uint16_t convert_to_vddc(uint8_t vid)  {
>>>  	return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@
>>> -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
>>>  	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
>>>  	.init_max_sustainable_clocks =
>> smu_v11_0_init_max_sustainable_clocks,
>>>  	.start_thermal_control = smu_v11_0_start_thermal_control,
>>> +	.stop_thermal_control = smu_v11_0_stop_thermal_control,
>>>  	.read_sensor = smu_v11_0_read_sensor,
>>>  	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
>>>  	.display_clock_voltage_request =
>>> smu_v11_0_display_clock_voltage_request,
>>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-11 23:28             ` Matt Coffin
  0 siblings, 0 replies; 9+ messages in thread
From: Matt Coffin @ 2019-11-11 23:28 UTC (permalink / raw)
  To: Quan, Evan, amd-gfx; +Cc: Alex Deucher, Li, Candice, Gui, Jack

Thanks Evan. I can confirm that the linked patch resolves the issue for me.

I commented and resolved the bug as well in case other people find it.

Cheers,
Matt

On 11/11/19 2:28 AM, Quan, Evan wrote:
> Just sent out a patch which should be able to address this issue.
> https://lists.freedesktop.org/archives/amd-gfx/2019-November/042458.html
> 
> Regards,
> Evan
>> -----Original Message-----
>> From: Matt Coffin <mcoffin13@gmail.com>
>> Sent: Saturday, November 9, 2019 4:50 AM
>> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Alex
>> Deucher <alexdeucher@gmail.com>
>> Subject: Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
>>
>> Hey guys,
>>
>>
>>
>> This patch caused some kind of reversion with smu_reset on Navi10. I'm no
>> expert since everything I know comes from just reading through the code, so
>> this could be some kind of intended behavior, but after this patch, if you write a
>> pptable to the sysfs pp_table interface on navi10, then the SMU will fail to reset
>> successfully, and the result is seemingly an unrecoverable situation.
>>
>>
>>
>> I put in a report on bugzilla with dmesg logs
>> :
>> https://bugs.freedesktop.org/show_bug.cgi?id=112234
>>
>>
>> Finding this change was the result of a bisect to find where the issue started,
>> and reverting the changes to smu_hw_fini resolved the issue.
>> Any advice on possible proper fixes?
>>
>> Thanks in advance,
>>
>> Matt
>>
>> On 9/2/19 9:44 PM, Quan, Evan wrote:
>>> These are needed for smu_reset support.
>>>
>>> Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
>>> Signed-off-by: Evan Quan <evan.quan@amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 17
>> +++++++++++++++++
>>>  drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h |  3 +++
>>>  drivers/gpu/drm/amd/powerplay/smu_v11_0.c      | 10 ++++++++++
>>>  3 files changed, 30 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> index d5ee13a78eb7..3cf8d944f890 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
>>>  	return ret;
>>>  }
>>>
>>> +static int smu_stop_dpms(struct smu_context *smu) {
>>> +	return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
>>> +
>>>  static int smu_hw_fini(void *handle)
>>>  {
>>>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
>>> -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
>>>  		smu_powergate_vcn(&adev->smu, true);
>>>  	}
>>>
>>> +	ret = smu_stop_thermal_control(smu);
>>> +	if (ret) {
>>> +		pr_warn("Fail to stop thermal control!\n");
>>> +		return ret;
>>> +	}
>>> +
>>> +	ret = smu_stop_dpms(smu);
>>> +	if (ret) {
>>> +		pr_warn("Fail to stop Dpms!\n");
>>> +		return ret;
>>> +	}
>>> +
>>>  	kfree(table_context->driver_pptable);
>>>  	table_context->driver_pptable = NULL;
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> index b19224cb6d6d..8e4b0ad24712 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> @@ -498,6 +498,7 @@ struct smu_funcs
>>>  	int (*get_current_clk_freq)(struct smu_context *smu, enum
>> smu_clk_type clk_id, uint32_t *value);
>>>  	int (*init_max_sustainable_clocks)(struct smu_context *smu);
>>>  	int (*start_thermal_control)(struct smu_context *smu);
>>> +	int (*stop_thermal_control)(struct smu_context *smu);
>>>  	int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors
>> sensor,
>>>  			   void *data, uint32_t *size);
>>>  	int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t
>>> clk); @@ -647,6 +648,8 @@ struct smu_funcs
>>>  	((smu)->ppt_funcs->set_thermal_fan_table ?
>>> (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)  #define
>> smu_start_thermal_control(smu) \
>>>  	((smu)->funcs->start_thermal_control?
>>> (smu)->funcs->start_thermal_control((smu)) : 0)
>>> +#define smu_stop_thermal_control(smu) \
>>> +	((smu)->funcs->stop_thermal_control?
>>> +(smu)->funcs->stop_thermal_control((smu)) : 0)
>>>  #define smu_read_sensor(smu, sensor, data, size) \
>>>  	((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs-
>>> read_sensor((smu),
>>> (sensor), (data), (size)) : 0)  #define smu_smc_read_sensor(smu,
>>> sensor, data, size) \ diff --git
>>> a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> index db5e94ce54af..1a38af84394e 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> @@ -1209,6 +1209,15 @@ static int
>> smu_v11_0_start_thermal_control(struct smu_context *smu)
>>>  	return ret;
>>>  }
>>>
>>> +static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
>>> +	struct amdgpu_device *adev = smu->adev;
>>> +
>>> +	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>  static uint16_t convert_to_vddc(uint8_t vid)  {
>>>  	return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@
>>> -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
>>>  	.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
>>>  	.init_max_sustainable_clocks =
>> smu_v11_0_init_max_sustainable_clocks,
>>>  	.start_thermal_control = smu_v11_0_start_thermal_control,
>>> +	.stop_thermal_control = smu_v11_0_stop_thermal_control,
>>>  	.read_sensor = smu_v11_0_read_sensor,
>>>  	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
>>>  	.display_clock_voltage_request =
>>> smu_v11_0_display_clock_voltage_request,
>>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-11-11 23:28 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-03  3:44 [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini Quan, Evan
     [not found] ` <20190903034336.16005-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-09-03  5:16   ` Gui, Jack
2019-09-08 23:05   ` Deucher, Alexander
2019-11-08 20:50   ` Matt Coffin
2019-11-08 20:50     ` Matt Coffin
     [not found]     ` <50104e7b-e38f-6750-7aae-dfef399790f3-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-11-11  9:28       ` Quan, Evan
2019-11-11  9:28         ` Quan, Evan
     [not found]         ` <MN2PR12MB3344AE3F4B75313B425FAB9EE4740-rweVpJHSKToDMgCC8P//OwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-11 23:28           ` Matt Coffin
2019-11-11 23:28             ` Matt Coffin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.