* [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-09-03 3:44 Quan, Evan
[not found] ` <20190903034336.16005-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 9+ messages in thread
From: Quan, Evan @ 2019-09-03 3:44 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Li, Candice, Gui, Jack, Quan, Evan
These are needed for smu_reset support.
Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17 +++++++++++++++++
drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
3 files changed, 30 insertions(+)
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index d5ee13a78eb7..3cf8d944f890 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
return ret;
}
+static int smu_stop_dpms(struct smu_context *smu)
+{
+ return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures);
+}
+
static int smu_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
smu_powergate_vcn(&adev->smu, true);
}
+ ret = smu_stop_thermal_control(smu);
+ if (ret) {
+ pr_warn("Fail to stop thermal control!\n");
+ return ret;
+ }
+
+ ret = smu_stop_dpms(smu);
+ if (ret) {
+ pr_warn("Fail to stop Dpms!\n");
+ return ret;
+ }
+
kfree(table_context->driver_pptable);
table_context->driver_pptable = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index b19224cb6d6d..8e4b0ad24712 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -498,6 +498,7 @@ struct smu_funcs
int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
int (*init_max_sustainable_clocks)(struct smu_context *smu);
int (*start_thermal_control)(struct smu_context *smu);
+ int (*stop_thermal_control)(struct smu_context *smu);
int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
void *data, uint32_t *size);
int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk);
@@ -647,6 +648,8 @@ struct smu_funcs
((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
#define smu_start_thermal_control(smu) \
((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
+#define smu_stop_thermal_control(smu) \
+ ((smu)->funcs->stop_thermal_control? (smu)->funcs->stop_thermal_control((smu)) : 0)
#define smu_read_sensor(smu, sensor, data, size) \
((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)
#define smu_smc_read_sensor(smu, sensor, data, size) \
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index db5e94ce54af..1a38af84394e 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
return ret;
}
+static int smu_v11_0_stop_thermal_control(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+
+ WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
+
+ return 0;
+}
+
static uint16_t convert_to_vddc(uint8_t vid)
{
return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE);
@@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
.start_thermal_control = smu_v11_0_start_thermal_control,
+ .stop_thermal_control = smu_v11_0_stop_thermal_control,
.read_sensor = smu_v11_0_read_sensor,
.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* RE: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
[not found] ` <20190903034336.16005-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
@ 2019-09-03 5:16 ` Gui, Jack
2019-09-08 23:05 ` Deucher, Alexander
2019-11-08 20:50 ` Matt Coffin
2 siblings, 0 replies; 9+ messages in thread
From: Gui, Jack @ 2019-09-03 5:16 UTC (permalink / raw)
To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Li, Candice
Reviewed-by: Jack Gui <Jack.Gui@amd.com>
-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com>
Sent: Tuesday, September 3, 2019 11:44 AM
To: amd-gfx@lists.freedesktop.org
Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Quan, Evan <Evan.Quan@amd.com>
Subject: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
These are needed for smu_reset support.
Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17 +++++++++++++++++
drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
3 files changed, 30 insertions(+)
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index d5ee13a78eb7..3cf8d944f890 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
return ret;
}
+static int smu_stop_dpms(struct smu_context *smu) {
+ return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
+
static int smu_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
smu_powergate_vcn(&adev->smu, true);
}
+ ret = smu_stop_thermal_control(smu);
+ if (ret) {
+ pr_warn("Fail to stop thermal control!\n");
+ return ret;
+ }
+
+ ret = smu_stop_dpms(smu);
+ if (ret) {
+ pr_warn("Fail to stop Dpms!\n");
+ return ret;
+ }
+
kfree(table_context->driver_pptable);
table_context->driver_pptable = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index b19224cb6d6d..8e4b0ad24712 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -498,6 +498,7 @@ struct smu_funcs
int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
int (*init_max_sustainable_clocks)(struct smu_context *smu);
int (*start_thermal_control)(struct smu_context *smu);
+ int (*stop_thermal_control)(struct smu_context *smu);
int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
void *data, uint32_t *size);
int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk); @@ -647,6 +648,8 @@ struct smu_funcs
((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0) #define smu_start_thermal_control(smu) \
((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
+#define smu_stop_thermal_control(smu) \
+ ((smu)->funcs->stop_thermal_control?
+(smu)->funcs->stop_thermal_control((smu)) : 0)
#define smu_read_sensor(smu, sensor, data, size) \
((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0) #define smu_smc_read_sensor(smu, sensor, data, size) \ diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index db5e94ce54af..1a38af84394e 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
return ret;
}
+static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
+ struct amdgpu_device *adev = smu->adev;
+
+ WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
+
+ return 0;
+}
+
static uint16_t convert_to_vddc(uint8_t vid) {
return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
.start_thermal_control = smu_v11_0_start_thermal_control,
+ .stop_thermal_control = smu_v11_0_stop_thermal_control,
.read_sensor = smu_v11_0_read_sensor,
.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
[not found] ` <20190903034336.16005-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-09-03 5:16 ` Gui, Jack
@ 2019-09-08 23:05 ` Deucher, Alexander
2019-11-08 20:50 ` Matt Coffin
2 siblings, 0 replies; 9+ messages in thread
From: Deucher, Alexander @ 2019-09-08 23:05 UTC (permalink / raw)
To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Li, Candice, Gui, Jack
[-- Attachment #1.1: Type: text/plain, Size: 5213 bytes --]
We should probably also add the callbacks for set_mp1_state like we have for the older powerplay code so we properly send the SMU shutdown or reset message on GPU reset and driver unload.
Alex
________________________________
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> on behalf of Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>
Sent: Monday, September 2, 2019 11:44 PM
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Cc: Li, Candice <Candice.Li-5C7GfCeVMHo@public.gmane.org>; Gui, Jack <Jack.Gui-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>
Subject: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
These are needed for smu_reset support.
Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
Signed-off-by: Evan Quan <evan.quan-5C7GfCeVMHo@public.gmane.org>
---
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17 +++++++++++++++++
drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
3 files changed, 30 insertions(+)
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index d5ee13a78eb7..3cf8d944f890 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
return ret;
}
+static int smu_stop_dpms(struct smu_context *smu)
+{
+ return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures);
+}
+
static int smu_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
smu_powergate_vcn(&adev->smu, true);
}
+ ret = smu_stop_thermal_control(smu);
+ if (ret) {
+ pr_warn("Fail to stop thermal control!\n");
+ return ret;
+ }
+
+ ret = smu_stop_dpms(smu);
+ if (ret) {
+ pr_warn("Fail to stop Dpms!\n");
+ return ret;
+ }
+
kfree(table_context->driver_pptable);
table_context->driver_pptable = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index b19224cb6d6d..8e4b0ad24712 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -498,6 +498,7 @@ struct smu_funcs
int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
int (*init_max_sustainable_clocks)(struct smu_context *smu);
int (*start_thermal_control)(struct smu_context *smu);
+ int (*stop_thermal_control)(struct smu_context *smu);
int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
void *data, uint32_t *size);
int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk);
@@ -647,6 +648,8 @@ struct smu_funcs
((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
#define smu_start_thermal_control(smu) \
((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
+#define smu_stop_thermal_control(smu) \
+ ((smu)->funcs->stop_thermal_control? (smu)->funcs->stop_thermal_control((smu)) : 0)
#define smu_read_sensor(smu, sensor, data, size) \
((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)
#define smu_smc_read_sensor(smu, sensor, data, size) \
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index db5e94ce54af..1a38af84394e 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
return ret;
}
+static int smu_v11_0_stop_thermal_control(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+
+ WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
+
+ return 0;
+}
+
static uint16_t convert_to_vddc(uint8_t vid)
{
return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE);
@@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
.get_current_clk_freq = smu_v11_0_get_current_clk_freq,
.init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
.start_thermal_control = smu_v11_0_start_thermal_control,
+ .stop_thermal_control = smu_v11_0_stop_thermal_control,
.read_sensor = smu_v11_0_read_sensor,
.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
--
2.23.0
_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[-- Attachment #1.2: Type: text/html, Size: 8983 bytes --]
[-- Attachment #2: Type: text/plain, Size: 153 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-08 20:50 ` Matt Coffin
0 siblings, 0 replies; 9+ messages in thread
From: Matt Coffin @ 2019-11-08 20:50 UTC (permalink / raw)
To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Alex Deucher, Li, Candice, Gui, Jack
Hey guys,
This patch caused some kind of reversion with smu_reset on Navi10. I'm
no expert since everything I know comes from just reading through the
code, so this could be some kind of intended behavior, but after this
patch, if you write a pptable to the sysfs pp_table interface on navi10,
then the SMU will fail to reset successfully, and the result is
seemingly an unrecoverable situation.
I put in a report on bugzilla with dmesg logs
:
https://bugs.freedesktop.org/show_bug.cgi?id=112234
Finding this change was the result of a bisect to find where the issue
started, and reverting the changes to smu_hw_fini resolved the issue.
Any advice on possible proper fixes?
Thanks in advance,
Matt
On 9/2/19 9:44 PM, Quan, Evan wrote:
> These are needed for smu_reset support.
>
> Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> ---
> drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17 +++++++++++++++++
> drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
> drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
> 3 files changed, 30 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> index d5ee13a78eb7..3cf8d944f890 100644
> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
> return ret;
> }
>
> +static int smu_stop_dpms(struct smu_context *smu)
> +{
> + return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures);
> +}
> +
> static int smu_hw_fini(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
> smu_powergate_vcn(&adev->smu, true);
> }
>
> + ret = smu_stop_thermal_control(smu);
> + if (ret) {
> + pr_warn("Fail to stop thermal control!\n");
> + return ret;
> + }
> +
> + ret = smu_stop_dpms(smu);
> + if (ret) {
> + pr_warn("Fail to stop Dpms!\n");
> + return ret;
> + }
> +
> kfree(table_context->driver_pptable);
> table_context->driver_pptable = NULL;
>
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> index b19224cb6d6d..8e4b0ad24712 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> @@ -498,6 +498,7 @@ struct smu_funcs
> int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
> int (*init_max_sustainable_clocks)(struct smu_context *smu);
> int (*start_thermal_control)(struct smu_context *smu);
> + int (*stop_thermal_control)(struct smu_context *smu);
> int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
> void *data, uint32_t *size);
> int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk);
> @@ -647,6 +648,8 @@ struct smu_funcs
> ((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
> #define smu_start_thermal_control(smu) \
> ((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
> +#define smu_stop_thermal_control(smu) \
> + ((smu)->funcs->stop_thermal_control? (smu)->funcs->stop_thermal_control((smu)) : 0)
> #define smu_read_sensor(smu, sensor, data, size) \
> ((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)
> #define smu_smc_read_sensor(smu, sensor, data, size) \
> diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> index db5e94ce54af..1a38af84394e 100644
> --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> @@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
> return ret;
> }
>
> +static int smu_v11_0_stop_thermal_control(struct smu_context *smu)
> +{
> + struct amdgpu_device *adev = smu->adev;
> +
> + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
> +
> + return 0;
> +}
> +
> static uint16_t convert_to_vddc(uint8_t vid)
> {
> return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE);
> @@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
> .get_current_clk_freq = smu_v11_0_get_current_clk_freq,
> .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
> .start_thermal_control = smu_v11_0_start_thermal_control,
> + .stop_thermal_control = smu_v11_0_stop_thermal_control,
> .read_sensor = smu_v11_0_read_sensor,
> .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
> .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-08 20:50 ` Matt Coffin
0 siblings, 0 replies; 9+ messages in thread
From: Matt Coffin @ 2019-11-08 20:50 UTC (permalink / raw)
To: Quan, Evan, amd-gfx; +Cc: Alex Deucher, Li, Candice, Gui, Jack
Hey guys,
This patch caused some kind of reversion with smu_reset on Navi10. I'm
no expert since everything I know comes from just reading through the
code, so this could be some kind of intended behavior, but after this
patch, if you write a pptable to the sysfs pp_table interface on navi10,
then the SMU will fail to reset successfully, and the result is
seemingly an unrecoverable situation.
I put in a report on bugzilla with dmesg logs
:
https://bugs.freedesktop.org/show_bug.cgi?id=112234
Finding this change was the result of a bisect to find where the issue
started, and reverting the changes to smu_hw_fini resolved the issue.
Any advice on possible proper fixes?
Thanks in advance,
Matt
On 9/2/19 9:44 PM, Quan, Evan wrote:
> These are needed for smu_reset support.
>
> Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> ---
> drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17 +++++++++++++++++
> drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
> drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
> 3 files changed, 30 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> index d5ee13a78eb7..3cf8d944f890 100644
> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
> return ret;
> }
>
> +static int smu_stop_dpms(struct smu_context *smu)
> +{
> + return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures);
> +}
> +
> static int smu_hw_fini(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
> smu_powergate_vcn(&adev->smu, true);
> }
>
> + ret = smu_stop_thermal_control(smu);
> + if (ret) {
> + pr_warn("Fail to stop thermal control!\n");
> + return ret;
> + }
> +
> + ret = smu_stop_dpms(smu);
> + if (ret) {
> + pr_warn("Fail to stop Dpms!\n");
> + return ret;
> + }
> +
> kfree(table_context->driver_pptable);
> table_context->driver_pptable = NULL;
>
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> index b19224cb6d6d..8e4b0ad24712 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> @@ -498,6 +498,7 @@ struct smu_funcs
> int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value);
> int (*init_max_sustainable_clocks)(struct smu_context *smu);
> int (*start_thermal_control)(struct smu_context *smu);
> + int (*stop_thermal_control)(struct smu_context *smu);
> int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor,
> void *data, uint32_t *size);
> int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t clk);
> @@ -647,6 +648,8 @@ struct smu_funcs
> ((smu)->ppt_funcs->set_thermal_fan_table ? (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0)
> #define smu_start_thermal_control(smu) \
> ((smu)->funcs->start_thermal_control? (smu)->funcs->start_thermal_control((smu)) : 0)
> +#define smu_stop_thermal_control(smu) \
> + ((smu)->funcs->stop_thermal_control? (smu)->funcs->stop_thermal_control((smu)) : 0)
> #define smu_read_sensor(smu, sensor, data, size) \
> ((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs->read_sensor((smu), (sensor), (data), (size)) : 0)
> #define smu_smc_read_sensor(smu, sensor, data, size) \
> diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> index db5e94ce54af..1a38af84394e 100644
> --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> @@ -1209,6 +1209,15 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
> return ret;
> }
>
> +static int smu_v11_0_stop_thermal_control(struct smu_context *smu)
> +{
> + struct amdgpu_device *adev = smu->adev;
> +
> + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
> +
> + return 0;
> +}
> +
> static uint16_t convert_to_vddc(uint8_t vid)
> {
> return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE);
> @@ -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
> .get_current_clk_freq = smu_v11_0_get_current_clk_freq,
> .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks,
> .start_thermal_control = smu_v11_0_start_thermal_control,
> + .stop_thermal_control = smu_v11_0_stop_thermal_control,
> .read_sensor = smu_v11_0_read_sensor,
> .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
> .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-11 9:28 ` Quan, Evan
0 siblings, 0 replies; 9+ messages in thread
From: Quan, Evan @ 2019-11-11 9:28 UTC (permalink / raw)
To: Matt Coffin, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Alex Deucher, Li, Candice, Gui, Jack
Just sent out a patch which should be able to address this issue.
https://lists.freedesktop.org/archives/amd-gfx/2019-November/042458.html
Regards,
Evan
> -----Original Message-----
> From: Matt Coffin <mcoffin13@gmail.com>
> Sent: Saturday, November 9, 2019 4:50 AM
> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Alex
> Deucher <alexdeucher@gmail.com>
> Subject: Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
>
> Hey guys,
>
>
>
> This patch caused some kind of reversion with smu_reset on Navi10. I'm no
> expert since everything I know comes from just reading through the code, so
> this could be some kind of intended behavior, but after this patch, if you write a
> pptable to the sysfs pp_table interface on navi10, then the SMU will fail to reset
> successfully, and the result is seemingly an unrecoverable situation.
>
>
>
> I put in a report on bugzilla with dmesg logs
> :
> https://bugs.freedesktop.org/show_bug.cgi?id=112234
>
>
> Finding this change was the result of a bisect to find where the issue started,
> and reverting the changes to smu_hw_fini resolved the issue.
> Any advice on possible proper fixes?
>
> Thanks in advance,
>
> Matt
>
> On 9/2/19 9:44 PM, Quan, Evan wrote:
> > These are needed for smu_reset support.
> >
> > Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > ---
> > drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17
> +++++++++++++++++
> > drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
> > drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
> > 3 files changed, 30 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > index d5ee13a78eb7..3cf8d944f890 100644
> > --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
> > return ret;
> > }
> >
> > +static int smu_stop_dpms(struct smu_context *smu) {
> > + return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
> > +
> > static int smu_hw_fini(void *handle)
> > {
> > struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
> > -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
> > smu_powergate_vcn(&adev->smu, true);
> > }
> >
> > + ret = smu_stop_thermal_control(smu);
> > + if (ret) {
> > + pr_warn("Fail to stop thermal control!\n");
> > + return ret;
> > + }
> > +
> > + ret = smu_stop_dpms(smu);
> > + if (ret) {
> > + pr_warn("Fail to stop Dpms!\n");
> > + return ret;
> > + }
> > +
> > kfree(table_context->driver_pptable);
> > table_context->driver_pptable = NULL;
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > index b19224cb6d6d..8e4b0ad24712 100644
> > --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > @@ -498,6 +498,7 @@ struct smu_funcs
> > int (*get_current_clk_freq)(struct smu_context *smu, enum
> smu_clk_type clk_id, uint32_t *value);
> > int (*init_max_sustainable_clocks)(struct smu_context *smu);
> > int (*start_thermal_control)(struct smu_context *smu);
> > + int (*stop_thermal_control)(struct smu_context *smu);
> > int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors
> sensor,
> > void *data, uint32_t *size);
> > int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t
> > clk); @@ -647,6 +648,8 @@ struct smu_funcs
> > ((smu)->ppt_funcs->set_thermal_fan_table ?
> > (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0) #define
> smu_start_thermal_control(smu) \
> > ((smu)->funcs->start_thermal_control?
> > (smu)->funcs->start_thermal_control((smu)) : 0)
> > +#define smu_stop_thermal_control(smu) \
> > + ((smu)->funcs->stop_thermal_control?
> > +(smu)->funcs->stop_thermal_control((smu)) : 0)
> > #define smu_read_sensor(smu, sensor, data, size) \
> > ((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs-
> >read_sensor((smu),
> > (sensor), (data), (size)) : 0) #define smu_smc_read_sensor(smu,
> > sensor, data, size) \ diff --git
> > a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > index db5e94ce54af..1a38af84394e 100644
> > --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > @@ -1209,6 +1209,15 @@ static int
> smu_v11_0_start_thermal_control(struct smu_context *smu)
> > return ret;
> > }
> >
> > +static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
> > + struct amdgpu_device *adev = smu->adev;
> > +
> > + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
> > +
> > + return 0;
> > +}
> > +
> > static uint16_t convert_to_vddc(uint8_t vid) {
> > return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@
> > -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
> > .get_current_clk_freq = smu_v11_0_get_current_clk_freq,
> > .init_max_sustainable_clocks =
> smu_v11_0_init_max_sustainable_clocks,
> > .start_thermal_control = smu_v11_0_start_thermal_control,
> > + .stop_thermal_control = smu_v11_0_stop_thermal_control,
> > .read_sensor = smu_v11_0_read_sensor,
> > .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
> > .display_clock_voltage_request =
> > smu_v11_0_display_clock_voltage_request,
> >
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-11 9:28 ` Quan, Evan
0 siblings, 0 replies; 9+ messages in thread
From: Quan, Evan @ 2019-11-11 9:28 UTC (permalink / raw)
To: Matt Coffin, amd-gfx; +Cc: Alex Deucher, Li, Candice, Gui, Jack
Just sent out a patch which should be able to address this issue.
https://lists.freedesktop.org/archives/amd-gfx/2019-November/042458.html
Regards,
Evan
> -----Original Message-----
> From: Matt Coffin <mcoffin13@gmail.com>
> Sent: Saturday, November 9, 2019 4:50 AM
> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Alex
> Deucher <alexdeucher@gmail.com>
> Subject: Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
>
> Hey guys,
>
>
>
> This patch caused some kind of reversion with smu_reset on Navi10. I'm no
> expert since everything I know comes from just reading through the code, so
> this could be some kind of intended behavior, but after this patch, if you write a
> pptable to the sysfs pp_table interface on navi10, then the SMU will fail to reset
> successfully, and the result is seemingly an unrecoverable situation.
>
>
>
> I put in a report on bugzilla with dmesg logs
> :
> https://bugs.freedesktop.org/show_bug.cgi?id=112234
>
>
> Finding this change was the result of a bisect to find where the issue started,
> and reverting the changes to smu_hw_fini resolved the issue.
> Any advice on possible proper fixes?
>
> Thanks in advance,
>
> Matt
>
> On 9/2/19 9:44 PM, Quan, Evan wrote:
> > These are needed for smu_reset support.
> >
> > Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > ---
> > drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17
> +++++++++++++++++
> > drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
> > drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
> > 3 files changed, 30 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > index d5ee13a78eb7..3cf8d944f890 100644
> > --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> > @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
> > return ret;
> > }
> >
> > +static int smu_stop_dpms(struct smu_context *smu) {
> > + return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
> > +
> > static int smu_hw_fini(void *handle)
> > {
> > struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
> > -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
> > smu_powergate_vcn(&adev->smu, true);
> > }
> >
> > + ret = smu_stop_thermal_control(smu);
> > + if (ret) {
> > + pr_warn("Fail to stop thermal control!\n");
> > + return ret;
> > + }
> > +
> > + ret = smu_stop_dpms(smu);
> > + if (ret) {
> > + pr_warn("Fail to stop Dpms!\n");
> > + return ret;
> > + }
> > +
> > kfree(table_context->driver_pptable);
> > table_context->driver_pptable = NULL;
> >
> > diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > index b19224cb6d6d..8e4b0ad24712 100644
> > --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
> > @@ -498,6 +498,7 @@ struct smu_funcs
> > int (*get_current_clk_freq)(struct smu_context *smu, enum
> smu_clk_type clk_id, uint32_t *value);
> > int (*init_max_sustainable_clocks)(struct smu_context *smu);
> > int (*start_thermal_control)(struct smu_context *smu);
> > + int (*stop_thermal_control)(struct smu_context *smu);
> > int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors
> sensor,
> > void *data, uint32_t *size);
> > int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t
> > clk); @@ -647,6 +648,8 @@ struct smu_funcs
> > ((smu)->ppt_funcs->set_thermal_fan_table ?
> > (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0) #define
> smu_start_thermal_control(smu) \
> > ((smu)->funcs->start_thermal_control?
> > (smu)->funcs->start_thermal_control((smu)) : 0)
> > +#define smu_stop_thermal_control(smu) \
> > + ((smu)->funcs->stop_thermal_control?
> > +(smu)->funcs->stop_thermal_control((smu)) : 0)
> > #define smu_read_sensor(smu, sensor, data, size) \
> > ((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs-
> >read_sensor((smu),
> > (sensor), (data), (size)) : 0) #define smu_smc_read_sensor(smu,
> > sensor, data, size) \ diff --git
> > a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > index db5e94ce54af..1a38af84394e 100644
> > --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
> > @@ -1209,6 +1209,15 @@ static int
> smu_v11_0_start_thermal_control(struct smu_context *smu)
> > return ret;
> > }
> >
> > +static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
> > + struct amdgpu_device *adev = smu->adev;
> > +
> > + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
> > +
> > + return 0;
> > +}
> > +
> > static uint16_t convert_to_vddc(uint8_t vid) {
> > return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@
> > -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
> > .get_current_clk_freq = smu_v11_0_get_current_clk_freq,
> > .init_max_sustainable_clocks =
> smu_v11_0_init_max_sustainable_clocks,
> > .start_thermal_control = smu_v11_0_start_thermal_control,
> > + .stop_thermal_control = smu_v11_0_stop_thermal_control,
> > .read_sensor = smu_v11_0_read_sensor,
> > .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
> > .display_clock_voltage_request =
> > smu_v11_0_display_clock_voltage_request,
> >
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-11 23:28 ` Matt Coffin
0 siblings, 0 replies; 9+ messages in thread
From: Matt Coffin @ 2019-11-11 23:28 UTC (permalink / raw)
To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Alex Deucher, Li, Candice, Gui, Jack
Thanks Evan. I can confirm that the linked patch resolves the issue for me.
I commented and resolved the bug as well in case other people find it.
Cheers,
Matt
On 11/11/19 2:28 AM, Quan, Evan wrote:
> Just sent out a patch which should be able to address this issue.
> https://lists.freedesktop.org/archives/amd-gfx/2019-November/042458.html
>
> Regards,
> Evan
>> -----Original Message-----
>> From: Matt Coffin <mcoffin13@gmail.com>
>> Sent: Saturday, November 9, 2019 4:50 AM
>> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Alex
>> Deucher <alexdeucher@gmail.com>
>> Subject: Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
>>
>> Hey guys,
>>
>>
>>
>> This patch caused some kind of reversion with smu_reset on Navi10. I'm no
>> expert since everything I know comes from just reading through the code, so
>> this could be some kind of intended behavior, but after this patch, if you write a
>> pptable to the sysfs pp_table interface on navi10, then the SMU will fail to reset
>> successfully, and the result is seemingly an unrecoverable situation.
>>
>>
>>
>> I put in a report on bugzilla with dmesg logs
>> :
>> https://bugs.freedesktop.org/show_bug.cgi?id=112234
>>
>>
>> Finding this change was the result of a bisect to find where the issue started,
>> and reverting the changes to smu_hw_fini resolved the issue.
>> Any advice on possible proper fixes?
>>
>> Thanks in advance,
>>
>> Matt
>>
>> On 9/2/19 9:44 PM, Quan, Evan wrote:
>>> These are needed for smu_reset support.
>>>
>>> Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
>>> Signed-off-by: Evan Quan <evan.quan@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17
>> +++++++++++++++++
>>> drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
>>> drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
>>> 3 files changed, 30 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> index d5ee13a78eb7..3cf8d944f890 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
>>> return ret;
>>> }
>>>
>>> +static int smu_stop_dpms(struct smu_context *smu) {
>>> + return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
>>> +
>>> static int smu_hw_fini(void *handle)
>>> {
>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
>>> -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
>>> smu_powergate_vcn(&adev->smu, true);
>>> }
>>>
>>> + ret = smu_stop_thermal_control(smu);
>>> + if (ret) {
>>> + pr_warn("Fail to stop thermal control!\n");
>>> + return ret;
>>> + }
>>> +
>>> + ret = smu_stop_dpms(smu);
>>> + if (ret) {
>>> + pr_warn("Fail to stop Dpms!\n");
>>> + return ret;
>>> + }
>>> +
>>> kfree(table_context->driver_pptable);
>>> table_context->driver_pptable = NULL;
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> index b19224cb6d6d..8e4b0ad24712 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> @@ -498,6 +498,7 @@ struct smu_funcs
>>> int (*get_current_clk_freq)(struct smu_context *smu, enum
>> smu_clk_type clk_id, uint32_t *value);
>>> int (*init_max_sustainable_clocks)(struct smu_context *smu);
>>> int (*start_thermal_control)(struct smu_context *smu);
>>> + int (*stop_thermal_control)(struct smu_context *smu);
>>> int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors
>> sensor,
>>> void *data, uint32_t *size);
>>> int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t
>>> clk); @@ -647,6 +648,8 @@ struct smu_funcs
>>> ((smu)->ppt_funcs->set_thermal_fan_table ?
>>> (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0) #define
>> smu_start_thermal_control(smu) \
>>> ((smu)->funcs->start_thermal_control?
>>> (smu)->funcs->start_thermal_control((smu)) : 0)
>>> +#define smu_stop_thermal_control(smu) \
>>> + ((smu)->funcs->stop_thermal_control?
>>> +(smu)->funcs->stop_thermal_control((smu)) : 0)
>>> #define smu_read_sensor(smu, sensor, data, size) \
>>> ((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs-
>>> read_sensor((smu),
>>> (sensor), (data), (size)) : 0) #define smu_smc_read_sensor(smu,
>>> sensor, data, size) \ diff --git
>>> a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> index db5e94ce54af..1a38af84394e 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> @@ -1209,6 +1209,15 @@ static int
>> smu_v11_0_start_thermal_control(struct smu_context *smu)
>>> return ret;
>>> }
>>>
>>> +static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
>>> + struct amdgpu_device *adev = smu->adev;
>>> +
>>> + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static uint16_t convert_to_vddc(uint8_t vid) {
>>> return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@
>>> -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
>>> .get_current_clk_freq = smu_v11_0_get_current_clk_freq,
>>> .init_max_sustainable_clocks =
>> smu_v11_0_init_max_sustainable_clocks,
>>> .start_thermal_control = smu_v11_0_start_thermal_control,
>>> + .stop_thermal_control = smu_v11_0_stop_thermal_control,
>>> .read_sensor = smu_v11_0_read_sensor,
>>> .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
>>> .display_clock_voltage_request =
>>> smu_v11_0_display_clock_voltage_request,
>>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
@ 2019-11-11 23:28 ` Matt Coffin
0 siblings, 0 replies; 9+ messages in thread
From: Matt Coffin @ 2019-11-11 23:28 UTC (permalink / raw)
To: Quan, Evan, amd-gfx; +Cc: Alex Deucher, Li, Candice, Gui, Jack
Thanks Evan. I can confirm that the linked patch resolves the issue for me.
I commented and resolved the bug as well in case other people find it.
Cheers,
Matt
On 11/11/19 2:28 AM, Quan, Evan wrote:
> Just sent out a patch which should be able to address this issue.
> https://lists.freedesktop.org/archives/amd-gfx/2019-November/042458.html
>
> Regards,
> Evan
>> -----Original Message-----
>> From: Matt Coffin <mcoffin13@gmail.com>
>> Sent: Saturday, November 9, 2019 4:50 AM
>> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Li, Candice <Candice.Li@amd.com>; Gui, Jack <Jack.Gui@amd.com>; Alex
>> Deucher <alexdeucher@gmail.com>
>> Subject: Re: [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini
>>
>> Hey guys,
>>
>>
>>
>> This patch caused some kind of reversion with smu_reset on Navi10. I'm no
>> expert since everything I know comes from just reading through the code, so
>> this could be some kind of intended behavior, but after this patch, if you write a
>> pptable to the sysfs pp_table interface on navi10, then the SMU will fail to reset
>> successfully, and the result is seemingly an unrecoverable situation.
>>
>>
>>
>> I put in a report on bugzilla with dmesg logs
>> :
>> https://bugs.freedesktop.org/show_bug.cgi?id=112234
>>
>>
>> Finding this change was the result of a bisect to find where the issue started,
>> and reverting the changes to smu_hw_fini resolved the issue.
>> Any advice on possible proper fixes?
>>
>> Thanks in advance,
>>
>> Matt
>>
>> On 9/2/19 9:44 PM, Quan, Evan wrote:
>>> These are needed for smu_reset support.
>>>
>>> Change-Id: If29ede4b99758adb08fd4e16665f44fd893ec99b
>>> Signed-off-by: Evan Quan <evan.quan@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 17
>> +++++++++++++++++
>>> drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 3 +++
>>> drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++++++++++
>>> 3 files changed, 30 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> index d5ee13a78eb7..3cf8d944f890 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>>> @@ -1286,6 +1286,11 @@ static int smu_hw_init(void *handle)
>>> return ret;
>>> }
>>>
>>> +static int smu_stop_dpms(struct smu_context *smu) {
>>> + return smu_send_smc_msg(smu, SMU_MSG_DisableAllSmuFeatures); }
>>> +
>>> static int smu_hw_fini(void *handle)
>>> {
>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
>>> -1298,6 +1303,18 @@ static int smu_hw_fini(void *handle)
>>> smu_powergate_vcn(&adev->smu, true);
>>> }
>>>
>>> + ret = smu_stop_thermal_control(smu);
>>> + if (ret) {
>>> + pr_warn("Fail to stop thermal control!\n");
>>> + return ret;
>>> + }
>>> +
>>> + ret = smu_stop_dpms(smu);
>>> + if (ret) {
>>> + pr_warn("Fail to stop Dpms!\n");
>>> + return ret;
>>> + }
>>> +
>>> kfree(table_context->driver_pptable);
>>> table_context->driver_pptable = NULL;
>>>
>>> diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> index b19224cb6d6d..8e4b0ad24712 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
>>> @@ -498,6 +498,7 @@ struct smu_funcs
>>> int (*get_current_clk_freq)(struct smu_context *smu, enum
>> smu_clk_type clk_id, uint32_t *value);
>>> int (*init_max_sustainable_clocks)(struct smu_context *smu);
>>> int (*start_thermal_control)(struct smu_context *smu);
>>> + int (*stop_thermal_control)(struct smu_context *smu);
>>> int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors
>> sensor,
>>> void *data, uint32_t *size);
>>> int (*set_deep_sleep_dcefclk)(struct smu_context *smu, uint32_t
>>> clk); @@ -647,6 +648,8 @@ struct smu_funcs
>>> ((smu)->ppt_funcs->set_thermal_fan_table ?
>>> (smu)->ppt_funcs->set_thermal_fan_table((smu)) : 0) #define
>> smu_start_thermal_control(smu) \
>>> ((smu)->funcs->start_thermal_control?
>>> (smu)->funcs->start_thermal_control((smu)) : 0)
>>> +#define smu_stop_thermal_control(smu) \
>>> + ((smu)->funcs->stop_thermal_control?
>>> +(smu)->funcs->stop_thermal_control((smu)) : 0)
>>> #define smu_read_sensor(smu, sensor, data, size) \
>>> ((smu)->ppt_funcs->read_sensor? (smu)->ppt_funcs-
>>> read_sensor((smu),
>>> (sensor), (data), (size)) : 0) #define smu_smc_read_sensor(smu,
>>> sensor, data, size) \ diff --git
>>> a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> index db5e94ce54af..1a38af84394e 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
>>> @@ -1209,6 +1209,15 @@ static int
>> smu_v11_0_start_thermal_control(struct smu_context *smu)
>>> return ret;
>>> }
>>>
>>> +static int smu_v11_0_stop_thermal_control(struct smu_context *smu) {
>>> + struct amdgpu_device *adev = smu->adev;
>>> +
>>> + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static uint16_t convert_to_vddc(uint8_t vid) {
>>> return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); @@
>>> -1783,6 +1792,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
>>> .get_current_clk_freq = smu_v11_0_get_current_clk_freq,
>>> .init_max_sustainable_clocks =
>> smu_v11_0_init_max_sustainable_clocks,
>>> .start_thermal_control = smu_v11_0_start_thermal_control,
>>> + .stop_thermal_control = smu_v11_0_stop_thermal_control,
>>> .read_sensor = smu_v11_0_read_sensor,
>>> .set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
>>> .display_clock_voltage_request =
>>> smu_v11_0_display_clock_voltage_request,
>>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2019-11-11 23:28 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-03 3:44 [PATCH] drm/amd/powerplay: do proper cleanups on hw_fini Quan, Evan
[not found] ` <20190903034336.16005-1-evan.quan-5C7GfCeVMHo@public.gmane.org>
2019-09-03 5:16 ` Gui, Jack
2019-09-08 23:05 ` Deucher, Alexander
2019-11-08 20:50 ` Matt Coffin
2019-11-08 20:50 ` Matt Coffin
[not found] ` <50104e7b-e38f-6750-7aae-dfef399790f3-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-11-11 9:28 ` Quan, Evan
2019-11-11 9:28 ` Quan, Evan
[not found] ` <MN2PR12MB3344AE3F4B75313B425FAB9EE4740-rweVpJHSKToDMgCC8P//OwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-11 23:28 ` Matt Coffin
2019-11-11 23:28 ` Matt Coffin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.