* [PATCH] drm/amdgpu: support new mode-1 reset interface (v2)
@ 2021-11-16 10:28 Tao Zhou
2021-11-16 10:40 ` Lazar, Lijo
2021-11-17 1:49 ` Quan, Evan
0 siblings, 2 replies; 5+ messages in thread
From: Tao Zhou @ 2021-11-16 10:28 UTC (permalink / raw)
To: amd-gfx, hawking.zhang, john.clements, stanley.yang, equan,
Lijo.Lazar, KevinYang.Wang
Cc: Tao Zhou
If gpu reset is triggered by ras fatal error, tell it to smu in mode-1
reset message.
v2: move mode-1 reset function to aldebaran_ppt.c since it's aldebaran
specific currently.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 3 +-
.../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 36 ++++++++++++++++++-
.../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 21 -----------
3 files changed, 37 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
index e5d3b0d1a032..bbc608c990b0 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
@@ -29,6 +29,8 @@
#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
#define SMU13_DRIVER_IF_VERSION_ALDE 0x07
+#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
+
/* MP Apertures */
#define MP0_Public 0x03800000
#define MP0_SRAM 0x03900000
@@ -216,7 +218,6 @@ int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
int smu_v13_0_baco_enter(struct smu_context *smu);
int smu_v13_0_baco_exit(struct smu_context *smu);
-int smu_v13_0_mode1_reset(struct smu_context *smu);
int smu_v13_0_mode2_reset(struct smu_context *smu);
int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 59a7d276541d..e50d4491aa96 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1765,6 +1765,40 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
return sizeof(struct gpu_metrics_v1_3);
}
+static int aldebaran_mode1_reset(struct smu_context *smu)
+{
+ u32 smu_version, fatal_err, param;
+ int ret = 0;
+ struct amdgpu_device *adev = smu->adev;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ fatal_err = 0;
+ param = SMU_RESET_MODE_1;
+
+ /*
+ * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
+ */
+ smu_cmn_get_smc_version(smu, NULL, &smu_version);
+ if (smu_version < 0x00440700)
+ ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
+ else {
+ /* fatal error triggered by ras, PMFW supports the flag
+ from 68.44.0 */
+ if ((smu_version >= 0x00442c00) && ras &&
+ atomic_read(&ras->in_recovery))
+ fatal_err = 1;
+
+ param |= (fatal_err << 16);
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_GfxDeviceDriverReset, param, NULL);
+ }
+
+ if (!ret)
+ msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
+
+ return ret;
+}
+
static int aldebaran_mode2_reset(struct smu_context *smu)
{
u32 smu_version;
@@ -1925,7 +1959,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = {
.get_gpu_metrics = aldebaran_get_gpu_metrics,
.mode1_reset_is_support = aldebaran_is_mode1_reset_supported,
.mode2_reset_is_support = aldebaran_is_mode2_reset_supported,
- .mode1_reset = smu_v13_0_mode1_reset,
+ .mode1_reset = aldebaran_mode1_reset,
.set_mp1_state = aldebaran_set_mp1_state,
.mode2_reset = aldebaran_mode2_reset,
.wait_for_event = smu_v13_0_wait_for_event,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 35145db6eedf..4d96099a9bb1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -60,8 +60,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin");
#define SMU13_VOLTAGE_SCALE 4
-#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
-
#define LINK_WIDTH_MAX 6
#define LINK_SPEED_MAX 3
@@ -1424,25 +1422,6 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu)
return ret;
}
-int smu_v13_0_mode1_reset(struct smu_context *smu)
-{
- u32 smu_version;
- int ret = 0;
- /*
- * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
- */
- smu_cmn_get_smc_version(smu, NULL, &smu_version);
- if (smu_version < 0x00440700)
- ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
- else
- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_1, NULL);
-
- if (!ret)
- msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
-
- return ret;
-}
-
static int smu_v13_0_wait_for_reset_complete(struct smu_context *smu,
uint64_t event_arg)
{
--
2.17.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu: support new mode-1 reset interface (v2)
2021-11-16 10:28 [PATCH] drm/amdgpu: support new mode-1 reset interface (v2) Tao Zhou
@ 2021-11-16 10:40 ` Lazar, Lijo
2021-11-16 11:57 ` Chen, Guchun
2021-11-17 1:49 ` Quan, Evan
1 sibling, 1 reply; 5+ messages in thread
From: Lazar, Lijo @ 2021-11-16 10:40 UTC (permalink / raw)
To: Tao Zhou, amd-gfx, hawking.zhang, john.clements, stanley.yang,
equan, KevinYang.Wang
On 11/16/2021 3:58 PM, Tao Zhou wrote:
> If gpu reset is triggered by ras fatal error, tell it to smu in mode-1
> reset message.
>
> v2: move mode-1 reset function to aldebaran_ppt.c since it's aldebaran
> specific currently.
>
> Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Thanks,
Lijo
> ---
> drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 3 +-
> .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 36 ++++++++++++++++++-
> .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 21 -----------
> 3 files changed, 37 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> index e5d3b0d1a032..bbc608c990b0 100644
> --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> @@ -29,6 +29,8 @@
> #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
> #define SMU13_DRIVER_IF_VERSION_ALDE 0x07
>
> +#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
> +
> /* MP Apertures */
> #define MP0_Public 0x03800000
> #define MP0_SRAM 0x03900000
> @@ -216,7 +218,6 @@ int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
> int smu_v13_0_baco_enter(struct smu_context *smu);
> int smu_v13_0_baco_exit(struct smu_context *smu);
>
> -int smu_v13_0_mode1_reset(struct smu_context *smu);
> int smu_v13_0_mode2_reset(struct smu_context *smu);
>
> int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> index 59a7d276541d..e50d4491aa96 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> @@ -1765,6 +1765,40 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
> return sizeof(struct gpu_metrics_v1_3);
> }
>
> +static int aldebaran_mode1_reset(struct smu_context *smu)
> +{
> + u32 smu_version, fatal_err, param;
> + int ret = 0;
> + struct amdgpu_device *adev = smu->adev;
> + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> + fatal_err = 0;
> + param = SMU_RESET_MODE_1;
> +
> + /*
> + * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
> + */
> + smu_cmn_get_smc_version(smu, NULL, &smu_version);
> + if (smu_version < 0x00440700)
> + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
> + else {
> + /* fatal error triggered by ras, PMFW supports the flag
> + from 68.44.0 */
> + if ((smu_version >= 0x00442c00) && ras &&
> + atomic_read(&ras->in_recovery))
> + fatal_err = 1;
> +
> + param |= (fatal_err << 16);
> + ret = smu_cmn_send_smc_msg_with_param(smu,
> + SMU_MSG_GfxDeviceDriverReset, param, NULL);
> + }
> +
> + if (!ret)
> + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
> +
> + return ret;
> +}
> +
> static int aldebaran_mode2_reset(struct smu_context *smu)
> {
> u32 smu_version;
> @@ -1925,7 +1959,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = {
> .get_gpu_metrics = aldebaran_get_gpu_metrics,
> .mode1_reset_is_support = aldebaran_is_mode1_reset_supported,
> .mode2_reset_is_support = aldebaran_is_mode2_reset_supported,
> - .mode1_reset = smu_v13_0_mode1_reset,
> + .mode1_reset = aldebaran_mode1_reset,
> .set_mp1_state = aldebaran_set_mp1_state,
> .mode2_reset = aldebaran_mode2_reset,
> .wait_for_event = smu_v13_0_wait_for_event,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> index 35145db6eedf..4d96099a9bb1 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> @@ -60,8 +60,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin");
>
> #define SMU13_VOLTAGE_SCALE 4
>
> -#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
> -
> #define LINK_WIDTH_MAX 6
> #define LINK_SPEED_MAX 3
>
> @@ -1424,25 +1422,6 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu)
> return ret;
> }
>
> -int smu_v13_0_mode1_reset(struct smu_context *smu)
> -{
> - u32 smu_version;
> - int ret = 0;
> - /*
> - * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
> - */
> - smu_cmn_get_smc_version(smu, NULL, &smu_version);
> - if (smu_version < 0x00440700)
> - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
> - else
> - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_1, NULL);
> -
> - if (!ret)
> - msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
> -
> - return ret;
> -}
> -
> static int smu_v13_0_wait_for_reset_complete(struct smu_context *smu,
> uint64_t event_arg)
> {
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH] drm/amdgpu: support new mode-1 reset interface (v2)
2021-11-16 10:40 ` Lazar, Lijo
@ 2021-11-16 11:57 ` Chen, Guchun
0 siblings, 0 replies; 5+ messages in thread
From: Chen, Guchun @ 2021-11-16 11:57 UTC (permalink / raw)
To: Lazar, Lijo, Zhou1, Tao, amd-gfx, Zhang, Hawking, Clements, John,
Yang, Stanley, Quan, Evan, Wang, Yang(Kevin)
[Public]
A coding style problem.
A {} is needed for the path after if (smu_version < 0x00440700).
if (smu_version < 0x00440700)
> + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
> + else {
> + /* fatal error triggered by ras, PMFW supports the flag
> + from 68.44.0 */
> + if ((smu_version >= 0x00442c00) && ras &&
> + atomic_read(&ras->in_recovery))
> + fatal_err = 1;
> +
> + param |= (fatal_err << 16);
> + ret = smu_cmn_send_smc_msg_with_param(smu,
> + SMU_MSG_GfxDeviceDriverReset, param, NULL);
> + }
Regards,
Guchun
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Lazar, Lijo
Sent: Tuesday, November 16, 2021 6:41 PM
To: Zhou1, Tao <Tao.Zhou1@amd.com>; amd-gfx@lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang@amd.com>; Clements, John <John.Clements@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>; Quan, Evan <Evan.Quan@amd.com>; Wang, Yang(Kevin) <KevinYang.Wang@amd.com>
Subject: Re: [PATCH] drm/amdgpu: support new mode-1 reset interface (v2)
On 11/16/2021 3:58 PM, Tao Zhou wrote:
> If gpu reset is triggered by ras fatal error, tell it to smu in mode-1
> reset message.
>
> v2: move mode-1 reset function to aldebaran_ppt.c since it's aldebaran
> specific currently.
>
> Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Thanks,
Lijo
> ---
> drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 3 +-
> .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 36 ++++++++++++++++++-
> .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 21 -----------
> 3 files changed, 37 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> index e5d3b0d1a032..bbc608c990b0 100644
> --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> @@ -29,6 +29,8 @@
> #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
> #define SMU13_DRIVER_IF_VERSION_ALDE 0x07
>
> +#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
> +
> /* MP Apertures */
> #define MP0_Public 0x03800000
> #define MP0_SRAM 0x03900000
> @@ -216,7 +218,6 @@ int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
> int smu_v13_0_baco_enter(struct smu_context *smu);
> int smu_v13_0_baco_exit(struct smu_context *smu);
>
> -int smu_v13_0_mode1_reset(struct smu_context *smu);
> int smu_v13_0_mode2_reset(struct smu_context *smu);
>
> int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum
> smu_clk_type clk_type, diff --git
> a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> index 59a7d276541d..e50d4491aa96 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> @@ -1765,6 +1765,40 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
> return sizeof(struct gpu_metrics_v1_3);
> }
>
> +static int aldebaran_mode1_reset(struct smu_context *smu) {
> + u32 smu_version, fatal_err, param;
> + int ret = 0;
> + struct amdgpu_device *adev = smu->adev;
> + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> + fatal_err = 0;
> + param = SMU_RESET_MODE_1;
> +
> + /*
> + * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
> + */
> + smu_cmn_get_smc_version(smu, NULL, &smu_version);
> + if (smu_version < 0x00440700)
> + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
> + else {
> + /* fatal error triggered by ras, PMFW supports the flag
> + from 68.44.0 */
> + if ((smu_version >= 0x00442c00) && ras &&
> + atomic_read(&ras->in_recovery))
> + fatal_err = 1;
> +
> + param |= (fatal_err << 16);
> + ret = smu_cmn_send_smc_msg_with_param(smu,
> + SMU_MSG_GfxDeviceDriverReset, param, NULL);
> + }
> +
> + if (!ret)
> + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
> +
> + return ret;
> +}
> +
> static int aldebaran_mode2_reset(struct smu_context *smu)
> {
> u32 smu_version;
> @@ -1925,7 +1959,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = {
> .get_gpu_metrics = aldebaran_get_gpu_metrics,
> .mode1_reset_is_support = aldebaran_is_mode1_reset_supported,
> .mode2_reset_is_support = aldebaran_is_mode2_reset_supported,
> - .mode1_reset = smu_v13_0_mode1_reset,
> + .mode1_reset = aldebaran_mode1_reset,
> .set_mp1_state = aldebaran_set_mp1_state,
> .mode2_reset = aldebaran_mode2_reset,
> .wait_for_event = smu_v13_0_wait_for_event, diff --git
> a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> index 35145db6eedf..4d96099a9bb1 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> @@ -60,8 +60,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin");
>
> #define SMU13_VOLTAGE_SCALE 4
>
> -#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
> -
> #define LINK_WIDTH_MAX 6
> #define LINK_SPEED_MAX 3
>
> @@ -1424,25 +1422,6 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu)
> return ret;
> }
>
> -int smu_v13_0_mode1_reset(struct smu_context *smu) -{
> - u32 smu_version;
> - int ret = 0;
> - /*
> - * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
> - */
> - smu_cmn_get_smc_version(smu, NULL, &smu_version);
> - if (smu_version < 0x00440700)
> - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
> - else
> - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_1, NULL);
> -
> - if (!ret)
> - msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
> -
> - return ret;
> -}
> -
> static int smu_v13_0_wait_for_reset_complete(struct smu_context *smu,
> uint64_t event_arg)
> {
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH] drm/amdgpu: support new mode-1 reset interface (v2)
2021-11-16 10:28 [PATCH] drm/amdgpu: support new mode-1 reset interface (v2) Tao Zhou
2021-11-16 10:40 ` Lazar, Lijo
@ 2021-11-17 1:49 ` Quan, Evan
2021-11-17 2:49 ` Zhou1, Tao
1 sibling, 1 reply; 5+ messages in thread
From: Quan, Evan @ 2021-11-17 1:49 UTC (permalink / raw)
To: Zhou1, Tao, amd-gfx, Zhang, Hawking, Clements, John, Yang,
Stanley, Lazar, Lijo, Wang, Yang(Kevin)
[AMD Official Use Only]
With the concern from Guchun addressed, the patch is reviewed-by: Evan Quan <evan.quan@amd.com>
> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1@amd.com>
> Sent: Tuesday, November 16, 2021 6:29 PM
> To: amd-gfx@lists.freedesktop.org; Zhang, Hawking
> <Hawking.Zhang@amd.com>; Clements, John <John.Clements@amd.com>;
> Yang, Stanley <Stanley.Yang@amd.com>; Quan, Evan
> <Evan.Quan@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Wang,
> Yang(Kevin) <KevinYang.Wang@amd.com>
> Cc: Zhou1, Tao <Tao.Zhou1@amd.com>
> Subject: [PATCH] drm/amdgpu: support new mode-1 reset interface (v2)
>
> If gpu reset is triggered by ras fatal error, tell it to smu in mode-1
> reset message.
>
> v2: move mode-1 reset function to aldebaran_ppt.c since it's aldebaran
> specific currently.
>
> Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
> ---
> drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 3 +-
> .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 36
> ++++++++++++++++++-
> .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 21 -----------
> 3 files changed, 37 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> index e5d3b0d1a032..bbc608c990b0 100644
> --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> @@ -29,6 +29,8 @@
> #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
> #define SMU13_DRIVER_IF_VERSION_ALDE 0x07
>
> +#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
> +
> /* MP Apertures */
> #define MP0_Public 0x03800000
> #define MP0_SRAM 0x03900000
> @@ -216,7 +218,6 @@ int smu_v13_0_baco_set_state(struct smu_context
> *smu, enum smu_baco_state state)
> int smu_v13_0_baco_enter(struct smu_context *smu);
> int smu_v13_0_baco_exit(struct smu_context *smu);
>
> -int smu_v13_0_mode1_reset(struct smu_context *smu);
> int smu_v13_0_mode2_reset(struct smu_context *smu);
>
> int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum
> smu_clk_type clk_type,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> index 59a7d276541d..e50d4491aa96 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> @@ -1765,6 +1765,40 @@ static ssize_t aldebaran_get_gpu_metrics(struct
> smu_context *smu,
> return sizeof(struct gpu_metrics_v1_3);
> }
>
> +static int aldebaran_mode1_reset(struct smu_context *smu)
> +{
> + u32 smu_version, fatal_err, param;
> + int ret = 0;
> + struct amdgpu_device *adev = smu->adev;
> + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> + fatal_err = 0;
> + param = SMU_RESET_MODE_1;
> +
> + /*
> + * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
> + */
> + smu_cmn_get_smc_version(smu, NULL, &smu_version);
> + if (smu_version < 0x00440700)
> + ret = smu_cmn_send_smc_msg(smu,
> SMU_MSG_Mode1Reset, NULL);
> + else {
> + /* fatal error triggered by ras, PMFW supports the flag
> + from 68.44.0 */
> + if ((smu_version >= 0x00442c00) && ras &&
> + atomic_read(&ras->in_recovery))
> + fatal_err = 1;
> +
> + param |= (fatal_err << 16);
> + ret = smu_cmn_send_smc_msg_with_param(smu,
> + SMU_MSG_GfxDeviceDriverReset,
> param, NULL);
> + }
> +
> + if (!ret)
> + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
> +
> + return ret;
> +}
> +
> static int aldebaran_mode2_reset(struct smu_context *smu)
> {
> u32 smu_version;
> @@ -1925,7 +1959,7 @@ static const struct pptable_funcs
> aldebaran_ppt_funcs = {
> .get_gpu_metrics = aldebaran_get_gpu_metrics,
> .mode1_reset_is_support = aldebaran_is_mode1_reset_supported,
> .mode2_reset_is_support = aldebaran_is_mode2_reset_supported,
> - .mode1_reset = smu_v13_0_mode1_reset,
> + .mode1_reset = aldebaran_mode1_reset,
> .set_mp1_state = aldebaran_set_mp1_state,
> .mode2_reset = aldebaran_mode2_reset,
> .wait_for_event = smu_v13_0_wait_for_event,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> index 35145db6eedf..4d96099a9bb1 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> @@ -60,8 +60,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin");
>
> #define SMU13_VOLTAGE_SCALE 4
>
> -#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
> -
> #define LINK_WIDTH_MAX 6
> #define LINK_SPEED_MAX 3
>
> @@ -1424,25 +1422,6 @@ int smu_v13_0_set_azalia_d3_pme(struct
> smu_context *smu)
> return ret;
> }
>
> -int smu_v13_0_mode1_reset(struct smu_context *smu)
> -{
> - u32 smu_version;
> - int ret = 0;
> - /*
> - * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
> - */
> - smu_cmn_get_smc_version(smu, NULL, &smu_version);
> - if (smu_version < 0x00440700)
> - ret = smu_cmn_send_smc_msg(smu,
> SMU_MSG_Mode1Reset, NULL);
> - else
> - ret = smu_cmn_send_smc_msg_with_param(smu,
> SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_1, NULL);
> -
> - if (!ret)
> - msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
> -
> - return ret;
> -}
> -
> static int smu_v13_0_wait_for_reset_complete(struct smu_context *smu,
> uint64_t event_arg)
> {
> --
> 2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH] drm/amdgpu: support new mode-1 reset interface (v2)
2021-11-17 1:49 ` Quan, Evan
@ 2021-11-17 2:49 ` Zhou1, Tao
0 siblings, 0 replies; 5+ messages in thread
From: Zhou1, Tao @ 2021-11-17 2:49 UTC (permalink / raw)
To: Quan, Evan, amd-gfx, Zhang, Hawking, Clements, John, Yang,
Stanley, Lazar, Lijo, Wang, Yang(Kevin)
Thanks for your review, I'll add {} before push.
> -----Original Message-----
> From: Quan, Evan <Evan.Quan@amd.com>
> Sent: Wednesday, November 17, 2021 9:50 AM
> To: Zhou1, Tao <Tao.Zhou1@amd.com>; amd-gfx@lists.freedesktop.org; Zhang,
> Hawking <Hawking.Zhang@amd.com>; Clements, John
> <John.Clements@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>; Lazar,
> Lijo <Lijo.Lazar@amd.com>; Wang, Yang(Kevin) <KevinYang.Wang@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: support new mode-1 reset interface (v2)
>
> [AMD Official Use Only]
>
> With the concern from Guchun addressed, the patch is reviewed-by: Evan Quan
> <evan.quan@amd.com>
>
> > -----Original Message-----
> > From: Zhou1, Tao <Tao.Zhou1@amd.com>
> > Sent: Tuesday, November 16, 2021 6:29 PM
> > To: amd-gfx@lists.freedesktop.org; Zhang, Hawking
> > <Hawking.Zhang@amd.com>; Clements, John <John.Clements@amd.com>;
> Yang,
> > Stanley <Stanley.Yang@amd.com>; Quan, Evan <Evan.Quan@amd.com>;
> Lazar,
> > Lijo <Lijo.Lazar@amd.com>; Wang,
> > Yang(Kevin) <KevinYang.Wang@amd.com>
> > Cc: Zhou1, Tao <Tao.Zhou1@amd.com>
> > Subject: [PATCH] drm/amdgpu: support new mode-1 reset interface (v2)
> >
> > If gpu reset is triggered by ras fatal error, tell it to smu in mode-1
> > reset message.
> >
> > v2: move mode-1 reset function to aldebaran_ppt.c since it's aldebaran
> > specific currently.
> >
> > Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
> > ---
> > drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 3 +-
> > .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 36
> > ++++++++++++++++++-
> > .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 21 -----------
> > 3 files changed, 37 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> > b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> > index e5d3b0d1a032..bbc608c990b0 100644
> > --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> > +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
> > @@ -29,6 +29,8 @@
> > #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define
> > SMU13_DRIVER_IF_VERSION_ALDE 0x07
> >
> > +#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
> > +
> > /* MP Apertures */
> > #define MP0_Public 0x03800000
> > #define MP0_SRAM 0x03900000
> > @@ -216,7 +218,6 @@ int smu_v13_0_baco_set_state(struct smu_context
> > *smu, enum smu_baco_state state) int smu_v13_0_baco_enter(struct
> > smu_context *smu); int smu_v13_0_baco_exit(struct smu_context *smu);
> >
> > -int smu_v13_0_mode1_reset(struct smu_context *smu); int
> > smu_v13_0_mode2_reset(struct smu_context *smu);
> >
> > int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum
> > smu_clk_type clk_type, diff --git
> > a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> > b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> > index 59a7d276541d..e50d4491aa96 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
> > @@ -1765,6 +1765,40 @@ static ssize_t aldebaran_get_gpu_metrics(struct
> > smu_context *smu,
> > return sizeof(struct gpu_metrics_v1_3); }
> >
> > +static int aldebaran_mode1_reset(struct smu_context *smu) {
> > + u32 smu_version, fatal_err, param;
> > + int ret = 0;
> > + struct amdgpu_device *adev = smu->adev;
> > + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> > +
> > + fatal_err = 0;
> > + param = SMU_RESET_MODE_1;
> > +
> > + /*
> > + * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
> > + */
> > + smu_cmn_get_smc_version(smu, NULL, &smu_version);
> > + if (smu_version < 0x00440700)
> > + ret = smu_cmn_send_smc_msg(smu,
> > SMU_MSG_Mode1Reset, NULL);
> > + else {
> > + /* fatal error triggered by ras, PMFW supports the flag
> > + from 68.44.0 */
> > + if ((smu_version >= 0x00442c00) && ras &&
> > + atomic_read(&ras->in_recovery))
> > + fatal_err = 1;
> > +
> > + param |= (fatal_err << 16);
> > + ret = smu_cmn_send_smc_msg_with_param(smu,
> > + SMU_MSG_GfxDeviceDriverReset,
> > param, NULL);
> > + }
> > +
> > + if (!ret)
> > + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
> > +
> > + return ret;
> > +}
> > +
> > static int aldebaran_mode2_reset(struct smu_context *smu) {
> > u32 smu_version;
> > @@ -1925,7 +1959,7 @@ static const struct pptable_funcs
> > aldebaran_ppt_funcs = {
> > .get_gpu_metrics = aldebaran_get_gpu_metrics,
> > .mode1_reset_is_support = aldebaran_is_mode1_reset_supported,
> > .mode2_reset_is_support = aldebaran_is_mode2_reset_supported,
> > - .mode1_reset = smu_v13_0_mode1_reset,
> > + .mode1_reset = aldebaran_mode1_reset,
> > .set_mp1_state = aldebaran_set_mp1_state,
> > .mode2_reset = aldebaran_mode2_reset,
> > .wait_for_event = smu_v13_0_wait_for_event, diff --git
> > a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> > b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> > index 35145db6eedf..4d96099a9bb1 100644
> > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
> > @@ -60,8 +60,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin");
> >
> > #define SMU13_VOLTAGE_SCALE 4
> >
> > -#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
> > -
> > #define LINK_WIDTH_MAX 6
> > #define LINK_SPEED_MAX 3
> >
> > @@ -1424,25 +1422,6 @@ int smu_v13_0_set_azalia_d3_pme(struct
> > smu_context *smu)
> > return ret;
> > }
> >
> > -int smu_v13_0_mode1_reset(struct smu_context *smu) -{
> > - u32 smu_version;
> > - int ret = 0;
> > - /*
> > - * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07
> > - */
> > - smu_cmn_get_smc_version(smu, NULL, &smu_version);
> > - if (smu_version < 0x00440700)
> > - ret = smu_cmn_send_smc_msg(smu,
> > SMU_MSG_Mode1Reset, NULL);
> > - else
> > - ret = smu_cmn_send_smc_msg_with_param(smu,
> > SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_1, NULL);
> > -
> > - if (!ret)
> > - msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
> > -
> > - return ret;
> > -}
> > -
> > static int smu_v13_0_wait_for_reset_complete(struct smu_context *smu,
> > uint64_t event_arg)
> > {
> > --
> > 2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2021-11-17 2:49 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-16 10:28 [PATCH] drm/amdgpu: support new mode-1 reset interface (v2) Tao Zhou
2021-11-16 10:40 ` Lazar, Lijo
2021-11-16 11:57 ` Chen, Guchun
2021-11-17 1:49 ` Quan, Evan
2021-11-17 2:49 ` Zhou1, Tao
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.