All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/amd/powerplay: add SMU mode1 reset
@ 2020-07-14  2:29 Wenhui Sheng
  2020-07-14  2:29 ` [PATCH 2/4] drm/amdgpu: RAS emergency restart logic refine Wenhui Sheng
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Wenhui Sheng @ 2020-07-14  2:29 UTC (permalink / raw)
  To: amd-gfx; +Cc: Likun Gao, Wenhui Sheng, Hawking Zhang

From PM FW 58.26.0 for sienna cichlid, SMU mode1 reset
is support, driver sends PPSMC_MSG_Mode1Reset message
to PM FW could trigger this reset.

v2: add mode1 reset dpm interface
v3: change maro name

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c       | 20 +++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h       |  3 ++
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c    | 34 +++++++++++++++++++
 .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h    |  4 +++
 drivers/gpu/drm/amd/powerplay/inc/smu_types.h |  1 +
 drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h |  2 ++
 .../drm/amd/powerplay/sienna_cichlid_ppt.c    | 29 ++++++++++++++--
 drivers/gpu/drm/amd/powerplay/smu_v11_0.c     | 13 +++++++
 8 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 65472b3dd815..16668fc52d0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -1141,6 +1141,26 @@ int amdgpu_dpm_baco_reset(struct amdgpu_device *adev)
 	return 0;
 }
 
+bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev)
+{
+	struct smu_context *smu = &adev->smu;
+
+	if (is_support_sw_smu(adev))
+		return smu_mode1_reset_is_support(smu);
+
+	return false;
+}
+
+int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev)
+{
+	struct smu_context *smu = &adev->smu;
+
+	if (is_support_sw_smu(adev))
+		return smu_mode1_reset(smu);
+
+	return -EOPNOTSUPP;
+}
+
 int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
 				    enum PP_SMC_POWER_PROFILE type,
 				    bool en)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 6a8aae70a0e6..7f3cd7185650 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -529,6 +529,9 @@ int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev);
 
 bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev);
 
+bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev);
+int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev);
+
 int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
 			     enum pp_mp1_state mp1_state);
 
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index fe4948aa662f..b5a7422d9548 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -2737,6 +2737,40 @@ int smu_baco_exit(struct smu_context *smu)
 	return ret;
 }
 
+bool smu_mode1_reset_is_support(struct smu_context *smu)
+{
+	bool ret = false;
+
+	if (!smu->pm_enabled)
+		return false;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs && smu->ppt_funcs->mode1_reset_is_support)
+		ret = smu->ppt_funcs->mode1_reset_is_support(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
+int smu_mode1_reset(struct smu_context *smu)
+{
+	int ret = 0;
+
+	if (!smu->pm_enabled)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&smu->mutex);
+
+	if (smu->ppt_funcs->mode1_reset)
+		ret = smu->ppt_funcs->mode1_reset(smu);
+
+	mutex_unlock(&smu->mutex);
+
+	return ret;
+}
+
 int smu_mode2_reset(struct smu_context *smu)
 {
 	int ret = 0;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index 7b349e038972..ba59620950d7 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -561,6 +561,8 @@ struct pptable_funcs {
 	int (*baco_set_state)(struct smu_context *smu, enum smu_baco_state state);
 	int (*baco_enter)(struct smu_context *smu);
 	int (*baco_exit)(struct smu_context *smu);
+	bool (*mode1_reset_is_support)(struct smu_context *smu);
+	int (*mode1_reset)(struct smu_context *smu);
 	int (*mode2_reset)(struct smu_context *smu);
 	int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max);
 	int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max);
@@ -672,6 +674,8 @@ int smu_baco_get_state(struct smu_context *smu, enum smu_baco_state *state);
 int smu_baco_enter(struct smu_context *smu);
 int smu_baco_exit(struct smu_context *smu);
 
+bool smu_mode1_reset_is_support(struct smu_context *smu);
+int smu_mode1_reset(struct smu_context *smu);
 int smu_mode2_reset(struct smu_context *smu);
 
 extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table,
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h
index dff2295705be..7b585e205a5a 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h
@@ -173,6 +173,7 @@
 	__SMU_DUMMY_MAP(GmiPwrDnControl), \
 	__SMU_DUMMY_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE), \
 	__SMU_DUMMY_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE), \
+	__SMU_DUMMY_MAP(Mode1Reset), \
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
index b2f65438ad8d..6a496e70917d 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
@@ -252,6 +252,8 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
 int smu_v11_0_baco_enter(struct smu_context *smu);
 int smu_v11_0_baco_exit(struct smu_context *smu);
 
+int smu_v11_0_mode1_reset(struct smu_context *smu);
+
 int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
 						 uint32_t *min, uint32_t *max);
 
diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
index 3efa41444ddf..cd8590aac85d 100644
--- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
@@ -39,8 +39,8 @@
 #include "nbio/nbio_2_3_sh_mask.h"
 #include "thm/thm_11_0_2_offset.h"
 #include "thm/thm_11_0_2_sh_mask.h"
-
-#include "asic_reg/mp/mp_11_0_sh_mask.h"
+#include "mp/mp_11_0_offset.h"
+#include "mp/mp_11_0_sh_mask.h"
 
 /*
  * DO NOT use these for err/warn/info/debug messages.
@@ -116,6 +116,7 @@ static struct smu_11_0_cmn2aisc_mapping sienna_cichlid_message_map[SMU_MSG_MAX_C
 	MSG_MAP(PowerDownJpeg,			PPSMC_MSG_PowerDownJpeg),
 	MSG_MAP(BacoAudioD3PME,			PPSMC_MSG_BacoAudioD3PME),
 	MSG_MAP(ArmD3,				PPSMC_MSG_ArmD3),
+	MSG_MAP(Mode1Reset,			PPSMC_MSG_Mode1Reset),
 };
 
 static struct smu_11_0_cmn2aisc_mapping sienna_cichlid_clk_map[SMU_CLK_COUNT] = {
@@ -1767,6 +1768,28 @@ static bool sienna_cichlid_is_baco_supported(struct smu_context *smu)
 	return (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : false;
 }
 
+static bool sienna_cichlid_is_mode1_reset_supported(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+	uint32_t val;
+	u32 smu_version;
+
+	/**
+	 * SRIOV env will not support SMU mode1 reset
+	 * PM FW support mode1 reset from 58.26
+	 */
+	smu_get_smc_version(smu, NULL, &smu_version);
+	if (amdgpu_sriov_vf(adev) || (smu_version < 0x003a1a00))
+		return false;
+
+	/**
+	 * mode1 reset relies on PSP, so we should check if
+	 * PSP is alive.
+	 */
+	val = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+	return val != 0x0;
+}
+
 static int sienna_cichlid_set_thermal_range(struct smu_context *smu,
 				       struct smu_temperature_range range)
 {
@@ -2537,6 +2560,8 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
 	.baco_set_state = smu_v11_0_baco_set_state,
 	.baco_enter = smu_v11_0_baco_enter,
 	.baco_exit = smu_v11_0_baco_exit,
+	.mode1_reset_is_support = sienna_cichlid_is_mode1_reset_supported,
+	.mode1_reset = smu_v11_0_mode1_reset,
 	.get_dpm_ultimate_freq = sienna_cichlid_get_dpm_ultimate_freq,
 	.set_soft_freq_limited_range = sienna_cichlid_set_soft_freq_limited_range,
 	.override_pcie_parameters = smu_v11_0_override_pcie_parameters,
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 48e15885e9c3..85073d0b94f7 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -63,6 +63,8 @@ MODULE_FIRMWARE("amdgpu/sienna_cichlid_smc.bin");
 
 #define SMU11_VOLTAGE_SCALE 4
 
+#define SMU11_MODE1_RESET_WAIT_TIME_IN_MS 500  //500ms
+
 static int smu_v11_0_send_msg_without_waiting(struct smu_context *smu,
 					      uint16_t msg)
 {
@@ -1741,6 +1743,17 @@ int smu_v11_0_baco_exit(struct smu_context *smu)
 	return ret;
 }
 
+int smu_v11_0_mode1_reset(struct smu_context *smu)
+{
+	int ret = 0;
+
+	ret = smu_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
+	if (!ret)
+		msleep(SMU11_MODE1_RESET_WAIT_TIME_IN_MS);
+
+	return ret;
+}
+
 int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type,
 						 uint32_t *min, uint32_t *max)
 {
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] drm/amdgpu: RAS emergency restart logic refine
  2020-07-14  2:29 [PATCH 1/4] drm/amd/powerplay: add SMU mode1 reset Wenhui Sheng
@ 2020-07-14  2:29 ` Wenhui Sheng
  2020-07-14  2:29 ` [PATCH 3/4] drm/amdgpu: enable mode1 reset Wenhui Sheng
  2020-07-14  2:29 ` [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode Wenhui Sheng
  2 siblings, 0 replies; 10+ messages in thread
From: Wenhui Sheng @ 2020-07-14  2:29 UTC (permalink / raw)
  To: amd-gfx; +Cc: Likun Gao, Wenhui Sheng, Hawking Zhang

If we are in RAS triggered situation and
BACO isn't support, emergency restart is needed,
and this code is only needed for some specific
cases(vega20 with given smu fw version).

After we add smu mode1 reset for sienna cichlid, we
need to share AMD_RESET_METHOD_MODE1 with psp mode1 reset,
so in amdgpu_device_gpu_recover, we need differentiate
which mode1 reset we are using, then decide if it's
a full reset and then decide if emergency restart is needed,
the logic will become much more complex.

After discussion with Hawking, move emergency restart logic
to an independent function.

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 23 +++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 11 +++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h    |  1 +
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9a0319605489..e0e7da8573b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4275,18 +4275,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	struct amdgpu_hive_info *hive = NULL;
 	struct amdgpu_device *tmp_adev = NULL;
 	int i, r = 0;
-	bool in_ras_intr = amdgpu_ras_intr_triggered();
-	bool use_baco =
-		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
-		true : false;
+	bool need_emergency_restart = false;
 	bool audio_suspended = false;
 
+	/**
+	 * Special case: RAS triggered and full reset isn't supported
+	 */
+	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
 	/*
 	 * Flush RAM to disk so that after reboot
 	 * the user can read log and see why the system rebooted.
 	 */
-	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
-
+	if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
 		DRM_WARN("Emergency reboot.");
 
 		ksys_sync_helper();
@@ -4294,7 +4295,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	}
 
 	dev_info(adev->dev, "GPU %s begin!\n",
-		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
+		need_emergency_restart ? "jobs stop":"reset");
 
 	/*
 	 * Here we trylock to avoid chain of resets executing from
@@ -4366,7 +4367,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		amdgpu_fbdev_set_suspend(tmp_adev, 1);
 
 		/* disable ras on ALL IPs */
-		if (!(in_ras_intr && !use_baco) &&
+		if (!need_emergency_restart &&
 		      amdgpu_device_ip_need_full_reset(tmp_adev))
 			amdgpu_ras_suspend(tmp_adev);
 
@@ -4378,12 +4379,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
 
-			if (in_ras_intr && !use_baco)
+			if (need_emergency_restart)
 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
 		}
 	}
 
-	if (in_ras_intr && !use_baco)
+	if (need_emergency_restart)
 		goto skip_sched_resume;
 
 	/*
@@ -4460,7 +4461,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 skip_sched_resume:
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 		/*unlock kfd: SRIOV would do it separately */
-		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
+		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
 	                amdgpu_amdkfd_post_reset(tmp_adev);
 		if (audio_suspended)
 			amdgpu_device_resume_display_audio(tmp_adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 3a3fa8567c94..6f06e1214622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2135,3 +2135,14 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
 		amdgpu_ras_reset_gpu(adev);
 	}
 }
+
+bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
+{
+	if (adev->asic_type == CHIP_VEGA20 &&
+	    adev->pm.fw_version <= 0x283400) {
+		return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) &&
+				amdgpu_ras_intr_triggered();
+	}
+
+	return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index e7df5d8429f8..b2667342cf67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -633,4 +633,5 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
 
 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
 
+bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev);
 #endif
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] drm/amdgpu: enable mode1 reset
  2020-07-14  2:29 [PATCH 1/4] drm/amd/powerplay: add SMU mode1 reset Wenhui Sheng
  2020-07-14  2:29 ` [PATCH 2/4] drm/amdgpu: RAS emergency restart logic refine Wenhui Sheng
@ 2020-07-14  2:29 ` Wenhui Sheng
  2020-07-14  2:29 ` [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode Wenhui Sheng
  2 siblings, 0 replies; 10+ messages in thread
From: Wenhui Sheng @ 2020-07-14  2:29 UTC (permalink / raw)
  To: amd-gfx; +Cc: Likun Gao, Wenhui Sheng, Hawking Zhang

For sienna cichlid, add mode1 reset path for sGPU.

v2: hiding MP0/MP1 mode1 reset under AMD_RESET_METHOD_MODE1
v3: split emergency restart logic to a new patch

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/nv.c               | 19 ++++++++++++-------
 drivers/gpu/drm/amd/powerplay/navi10_ppt.c    |  2 +-
 .../drm/amd/powerplay/sienna_cichlid_ppt.c    |  2 +-
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 356849136d1d..9f1240bd0310 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -265,17 +265,21 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
 
 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
 
-	dev_info(adev->dev, "GPU mode1 reset\n");
-
 	/* disable BM */
 	pci_clear_master(adev->pdev);
 
 	pci_save_state(adev->pdev);
 
-	ret = psp_gpu_reset(adev);
+	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+		dev_info(adev->dev, "GPU smu mode1 reset\n");
+		ret = amdgpu_dpm_mode1_reset(adev);
+	} else {
+		dev_info(adev->dev, "GPU psp mode1 reset\n");
+		ret = psp_gpu_reset(adev);
+	}
+
 	if (ret)
 		dev_err(adev->dev, "GPU mode1 reset failed\n");
-
 	pci_restore_state(adev->pdev);
 
 	/* wait for asic to come out of reset */
@@ -307,7 +311,7 @@ nv_asic_reset_method(struct amdgpu_device *adev)
 {
 	struct smu_context *smu = &adev->smu;
 
-	if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
+	if (smu_baco_is_support(smu))
 		return AMD_RESET_METHOD_BACO;
 	else
 		return AMD_RESET_METHOD_MODE1;
@@ -319,15 +323,16 @@ static int nv_asic_reset(struct amdgpu_device *adev)
 	struct smu_context *smu = &adev->smu;
 
 	if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+		dev_info(adev->dev, "GPU BACO reset\n");
+
 		ret = smu_baco_enter(smu);
 		if (ret)
 			return ret;
 		ret = smu_baco_exit(smu);
 		if (ret)
 			return ret;
-	} else {
+	} else
 		ret = nv_asic_mode1_reset(adev);
-	}
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 4f7d064e16e4..014815bcae93 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -2039,7 +2039,7 @@ static bool navi10_is_baco_supported(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t val;
 
-	if (!smu_v11_0_baco_is_support(smu))
+	if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu)))
 		return false;
 
 	val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
index cd8590aac85d..1949ec3e6b83 100644
--- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c
@@ -1761,7 +1761,7 @@ static bool sienna_cichlid_is_baco_supported(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t val;
 
-	if (!smu_v11_0_baco_is_support(smu))
+	if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu)))
 		return false;
 
 	val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode
  2020-07-14  2:29 [PATCH 1/4] drm/amd/powerplay: add SMU mode1 reset Wenhui Sheng
  2020-07-14  2:29 ` [PATCH 2/4] drm/amdgpu: RAS emergency restart logic refine Wenhui Sheng
  2020-07-14  2:29 ` [PATCH 3/4] drm/amdgpu: enable mode1 reset Wenhui Sheng
@ 2020-07-14  2:29 ` Wenhui Sheng
  2020-07-14  6:39   ` Zhang, Hawking
  2020-07-14  8:35   ` Christian König
  2 siblings, 2 replies; 10+ messages in thread
From: Wenhui Sheng @ 2020-07-14  2:29 UTC (permalink / raw)
  To: amd-gfx; +Cc: Likun Gao, Wenhui Sheng, Hawking Zhang

Default value is auto, doesn't change
original reset method logic.

v2: change to use parameter reset_method
v3: add warn msg if specified mode isn't supported

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
 drivers/gpu/drm/amd/amdgpu/cik.c        | 7 +++++++
 drivers/gpu/drm/amd/amdgpu/nv.c         | 7 +++++++
 drivers/gpu/drm/amd/amdgpu/si.c         | 5 +++++
 drivers/gpu/drm/amd/amdgpu/soc15.c      | 8 ++++++++
 drivers/gpu/drm/amd/amdgpu/vi.c         | 7 +++++++
 7 files changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4de93cef79b9..06bfb8658dec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -196,6 +196,7 @@ static const bool debug_evictions; /* = false */
 #endif
 
 extern int amdgpu_tmz;
+extern int amdgpu_reset_method;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 94c83a9d4987..581d5fcac361 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -154,6 +154,7 @@ int amdgpu_mes = 0;
 int amdgpu_noretry = 1;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
+int amdgpu_reset_method = -1; /* auto */
 
 struct amdgpu_mgpu_info mgpu_info = {
 	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -793,6 +794,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
 MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
 module_param_named(tmz, amdgpu_tmz, int, 0444);
 
+/**
+ * DOC: reset_method (int)
+ * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
+ */
+MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
+module_param_named(reset_method, amdgpu_reset_method, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index fe306d0f73f7..310bcf81256f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1326,6 +1326,13 @@ cik_asic_reset_method(struct amdgpu_device *adev)
 {
 	bool baco_reset;
 
+	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
+	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+		return amdgpu_reset_method;
+	else if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
 	case CHIP_HAWAII:
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 9f1240bd0310..486321bcab8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -311,6 +311,13 @@ nv_asic_reset_method(struct amdgpu_device *adev)
 {
 	struct smu_context *smu = &adev->smu;
 
+	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+		return amdgpu_reset_method;
+	else if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	if (smu_baco_is_support(smu))
 		return AMD_RESET_METHOD_BACO;
 	else
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 9d7b4ccd17b8..1b449291f068 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev)
 static enum amd_reset_method
 si_asic_reset_method(struct amdgpu_device *adev)
 {
+	if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
+	    amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	return AMD_RESET_METHOD_LEGACY;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 8c739b285915..40b343b25588 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -532,6 +532,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
 	bool baco_reset = false;
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
+	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+	    amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+		amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+		return amdgpu_reset_method;
+	else if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
 	case CHIP_RENOIR:
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 4e5e91888d87..e4628c17802f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -710,6 +710,13 @@ vi_asic_reset_method(struct amdgpu_device *adev)
 {
 	bool baco_reset;
 
+	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
+	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+		return amdgpu_reset_method;
+	else if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
 	case CHIP_TONGA:
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* RE: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode
  2020-07-14  2:29 ` [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode Wenhui Sheng
@ 2020-07-14  6:39   ` Zhang, Hawking
  2020-07-14  8:35   ` Christian König
  1 sibling, 0 replies; 10+ messages in thread
From: Zhang, Hawking @ 2020-07-14  6:39 UTC (permalink / raw)
  To: Sheng, Wenhui, amd-gfx, Chen, Guchun; +Cc: Gao, Likun

[AMD Public Use]

Patch #4

+	else if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);

I would suggest explicitly specify the reset_method enum that is not supported per ASIC. Other than that the series is

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>

Please work with @Chen, Guchun to validate the RAS Recovery before the submit

Regards,
Hawking

-----Original Message-----
From: Sheng, Wenhui <Wenhui.Sheng@amd.com> 
Sent: Tuesday, July 14, 2020 10:29
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Sheng, Wenhui <Wenhui.Sheng@amd.com>; Gao, Likun <Likun.Gao@amd.com>
Subject: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode

Default value is auto, doesn't change
original reset method logic.

v2: change to use parameter reset_method
v3: add warn msg if specified mode isn't supported

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
 drivers/gpu/drm/amd/amdgpu/cik.c        | 7 +++++++
 drivers/gpu/drm/amd/amdgpu/nv.c         | 7 +++++++
 drivers/gpu/drm/amd/amdgpu/si.c         | 5 +++++
 drivers/gpu/drm/amd/amdgpu/soc15.c      | 8 ++++++++
 drivers/gpu/drm/amd/amdgpu/vi.c         | 7 +++++++
 7 files changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4de93cef79b9..06bfb8658dec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -196,6 +196,7 @@ static const bool debug_evictions; /* = false */  #endif
 
 extern int amdgpu_tmz;
+extern int amdgpu_reset_method;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 94c83a9d4987..581d5fcac361 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -154,6 +154,7 @@ int amdgpu_mes = 0;
 int amdgpu_noretry = 1;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
+int amdgpu_reset_method = -1; /* auto */
 
 struct amdgpu_mgpu_info mgpu_info = {
 	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -793,6 +794,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);  MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");  module_param_named(tmz, amdgpu_tmz, int, 0444);
 
+/**
+ * DOC: reset_method (int)
+ * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = 
+mode1, 3 = mode2, 4 = baco)  */ MODULE_PARM_DESC(reset_method, "GPU 
+reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 
+= mode2, 4 = baco)"); module_param_named(reset_method, 
+amdgpu_reset_method, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {  #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index fe306d0f73f7..310bcf81256f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1326,6 +1326,13 @@ cik_asic_reset_method(struct amdgpu_device *adev)  {
 	bool baco_reset;
 
+	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
+	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+		return amdgpu_reset_method;
+	else if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
 	case CHIP_HAWAII:
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 9f1240bd0310..486321bcab8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -311,6 +311,13 @@ nv_asic_reset_method(struct amdgpu_device *adev)  {
 	struct smu_context *smu = &adev->smu;
 
+	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+		return amdgpu_reset_method;
+	else if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	if (smu_baco_is_support(smu))
 		return AMD_RESET_METHOD_BACO;
 	else
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9d7b4ccd17b8..1b449291f068 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev)  static enum amd_reset_method  si_asic_reset_method(struct amdgpu_device *adev)  {
+	if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
+	    amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	return AMD_RESET_METHOD_LEGACY;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 8c739b285915..40b343b25588 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -532,6 +532,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
 	bool baco_reset = false;
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
+	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+	    amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+		amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+		return amdgpu_reset_method;
+	else if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
 	case CHIP_RENOIR:
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 4e5e91888d87..e4628c17802f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -710,6 +710,13 @@ vi_asic_reset_method(struct amdgpu_device *adev)  {
 	bool baco_reset;
 
+	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
+	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+		return amdgpu_reset_method;
+	else if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
 	case CHIP_TONGA:
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode
  2020-07-14  2:29 ` [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode Wenhui Sheng
  2020-07-14  6:39   ` Zhang, Hawking
@ 2020-07-14  8:35   ` Christian König
  2020-07-14  9:58     ` Sheng, Wenhui
  1 sibling, 1 reply; 10+ messages in thread
From: Christian König @ 2020-07-14  8:35 UTC (permalink / raw)
  To: Wenhui Sheng, amd-gfx; +Cc: Likun Gao, Hawking Zhang

Am 14.07.20 um 04:29 schrieb Wenhui Sheng:
> Default value is auto, doesn't change
> original reset method logic.
>
> v2: change to use parameter reset_method
> v3: add warn msg if specified mode isn't supported
>
> Signed-off-by: Likun Gao <Likun.Gao@amd.com>
> Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
>   drivers/gpu/drm/amd/amdgpu/cik.c        | 7 +++++++
>   drivers/gpu/drm/amd/amdgpu/nv.c         | 7 +++++++
>   drivers/gpu/drm/amd/amdgpu/si.c         | 5 +++++
>   drivers/gpu/drm/amd/amdgpu/soc15.c      | 8 ++++++++
>   drivers/gpu/drm/amd/amdgpu/vi.c         | 7 +++++++
>   7 files changed, 43 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4de93cef79b9..06bfb8658dec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -196,6 +196,7 @@ static const bool debug_evictions; /* = false */
>   #endif
>   
>   extern int amdgpu_tmz;
> +extern int amdgpu_reset_method;
>   
>   #ifdef CONFIG_DRM_AMDGPU_SI
>   extern int amdgpu_si_support;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 94c83a9d4987..581d5fcac361 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -154,6 +154,7 @@ int amdgpu_mes = 0;
>   int amdgpu_noretry = 1;
>   int amdgpu_force_asic_type = -1;
>   int amdgpu_tmz = 0;
> +int amdgpu_reset_method = -1; /* auto */
>   
>   struct amdgpu_mgpu_info mgpu_info = {
>   	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -793,6 +794,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
>   MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
>   module_param_named(tmz, amdgpu_tmz, int, 0444);
>   
> +/**
> + * DOC: reset_method (int)
> + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
> + */
> +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
> +module_param_named(reset_method, amdgpu_reset_method, int, 0444);
> +
>   static const struct pci_device_id pciidlist[] = {
>   #ifdef  CONFIG_DRM_AMDGPU_SI
>   	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
> index fe306d0f73f7..310bcf81256f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> @@ -1326,6 +1326,13 @@ cik_asic_reset_method(struct amdgpu_device *adev)
>   {
>   	bool baco_reset;
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)

When you return anyway you can also drop the else here and on other 
occasions as well.

Apart from that the patch looks good to me.

We usually try to avoid adding more module parameters, but I think this 
one is really justified.

Thanks,
Christian.

> +		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	switch (adev->asic_type) {
>   	case CHIP_BONAIRE:
>   	case CHIP_HAWAII:
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
> index 9f1240bd0310..486321bcab8f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -311,6 +311,13 @@ nv_asic_reset_method(struct amdgpu_device *adev)
>   {
>   	struct smu_context *smu = &adev->smu;
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	if (smu_baco_is_support(smu))
>   		return AMD_RESET_METHOD_BACO;
>   	else
> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
> index 9d7b4ccd17b8..1b449291f068 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> @@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev)
>   static enum amd_reset_method
>   si_asic_reset_method(struct amdgpu_device *adev)
>   {
> +	if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
> +	    amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	return AMD_RESET_METHOD_LEGACY;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 8c739b285915..40b343b25588 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -532,6 +532,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
>   	bool baco_reset = false;
>   	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
> +		amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	switch (adev->asic_type) {
>   	case CHIP_RAVEN:
>   	case CHIP_RENOIR:
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 4e5e91888d87..e4628c17802f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -710,6 +710,13 @@ vi_asic_reset_method(struct amdgpu_device *adev)
>   {
>   	bool baco_reset;
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	switch (adev->asic_type) {
>   	case CHIP_FIJI:
>   	case CHIP_TONGA:

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode
  2020-07-14  8:35   ` Christian König
@ 2020-07-14  9:58     ` Sheng, Wenhui
  2020-07-14 11:45       ` Christian König
  0 siblings, 1 reply; 10+ messages in thread
From: Sheng, Wenhui @ 2020-07-14  9:58 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx; +Cc: Gao, Likun, Zhang, Hawking

[AMD Official Use Only - Internal Distribution Only]

Well,  drop else will make code much more clean.
I use "else if" because I don't want to break the reset_method checking context, it will be much easier for code reviewing later I think.

Will refine it anyway:)

Thanks
Wenhui
-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 
Sent: Tuesday, July 14, 2020 4:36 PM
To: Sheng, Wenhui <Wenhui.Sheng@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Gao, Likun <Likun.Gao@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: Re: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode

Am 14.07.20 um 04:29 schrieb Wenhui Sheng:
> Default value is auto, doesn't change
> original reset method logic.
>
> v2: change to use parameter reset_method
> v3: add warn msg if specified mode isn't supported
>
> Signed-off-by: Likun Gao <Likun.Gao@amd.com>
> Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
>   drivers/gpu/drm/amd/amdgpu/cik.c        | 7 +++++++
>   drivers/gpu/drm/amd/amdgpu/nv.c         | 7 +++++++
>   drivers/gpu/drm/amd/amdgpu/si.c         | 5 +++++
>   drivers/gpu/drm/amd/amdgpu/soc15.c      | 8 ++++++++
>   drivers/gpu/drm/amd/amdgpu/vi.c         | 7 +++++++
>   7 files changed, 43 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4de93cef79b9..06bfb8658dec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -196,6 +196,7 @@ static const bool debug_evictions; /* = false */
>   #endif
>   
>   extern int amdgpu_tmz;
> +extern int amdgpu_reset_method;
>   
>   #ifdef CONFIG_DRM_AMDGPU_SI
>   extern int amdgpu_si_support;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 94c83a9d4987..581d5fcac361 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -154,6 +154,7 @@ int amdgpu_mes = 0;
>   int amdgpu_noretry = 1;
>   int amdgpu_force_asic_type = -1;
>   int amdgpu_tmz = 0;
> +int amdgpu_reset_method = -1; /* auto */
>   
>   struct amdgpu_mgpu_info mgpu_info = {
>   	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
> @@ -793,6 +794,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
>   MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
>   module_param_named(tmz, amdgpu_tmz, int, 0444);
>   
> +/**
> + * DOC: reset_method (int)
> + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = 
> +mode1, 3 = mode2, 4 = baco)  */ MODULE_PARM_DESC(reset_method, "GPU 
> +reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 
> +3 = mode2, 4 = baco)"); module_param_named(reset_method, 
> +amdgpu_reset_method, int, 0444);
> +
>   static const struct pci_device_id pciidlist[] = {
>   #ifdef  CONFIG_DRM_AMDGPU_SI
>   	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff 
> --git a/drivers/gpu/drm/amd/amdgpu/cik.c 
> b/drivers/gpu/drm/amd/amdgpu/cik.c
> index fe306d0f73f7..310bcf81256f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> @@ -1326,6 +1326,13 @@ cik_asic_reset_method(struct amdgpu_device *adev)
>   {
>   	bool baco_reset;
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)

When you return anyway you can also drop the else here and on other occasions as well.

Apart from that the patch looks good to me.

We usually try to avoid adding more module parameters, but I think this one is really justified.

Thanks,
Christian.

> +		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	switch (adev->asic_type) {
>   	case CHIP_BONAIRE:
>   	case CHIP_HAWAII:
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c 
> b/drivers/gpu/drm/amd/amdgpu/nv.c index 9f1240bd0310..486321bcab8f 
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -311,6 +311,13 @@ nv_asic_reset_method(struct amdgpu_device *adev)
>   {
>   	struct smu_context *smu = &adev->smu;
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	if (smu_baco_is_support(smu))
>   		return AMD_RESET_METHOD_BACO;
>   	else
> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c 
> b/drivers/gpu/drm/amd/amdgpu/si.c index 9d7b4ccd17b8..1b449291f068 
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> @@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev)
>   static enum amd_reset_method
>   si_asic_reset_method(struct amdgpu_device *adev)
>   {
> +	if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
> +	    amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	return AMD_RESET_METHOD_LEGACY;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c 
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 8c739b285915..40b343b25588 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -532,6 +532,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
>   	bool baco_reset = false;
>   	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
> +		amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	switch (adev->asic_type) {
>   	case CHIP_RAVEN:
>   	case CHIP_RENOIR:
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c 
> b/drivers/gpu/drm/amd/amdgpu/vi.c index 4e5e91888d87..e4628c17802f 
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -710,6 +710,13 @@ vi_asic_reset_method(struct amdgpu_device *adev)
>   {
>   	bool baco_reset;
>   
> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
> +		return amdgpu_reset_method;
> +	else if (amdgpu_reset_method != -1)
> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
> +				  amdgpu_reset_method);
> +
>   	switch (adev->asic_type) {
>   	case CHIP_FIJI:
>   	case CHIP_TONGA:
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode
  2020-07-14  9:58     ` Sheng, Wenhui
@ 2020-07-14 11:45       ` Christian König
  2020-07-15  3:06         ` Sheng, Wenhui
  0 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2020-07-14 11:45 UTC (permalink / raw)
  To: Sheng, Wenhui, Koenig, Christian, amd-gfx; +Cc: Gao, Likun, Zhang, Hawking

> I use "else if" because I don't want to break the reset_method checking context, it will be much easier for code reviewing later I think.

What do you mean with that? To just keep the two "if" more closely together?

See we usually avoid extra "else" when there is a return in the if to 
make sure not to confuse the reader.

E.g. and "else" usually implies that both branches converge again after 
the if and that's not the case if you have a return or goto.

Regards,
Christian.

Am 14.07.20 um 11:58 schrieb Sheng, Wenhui:
> [AMD Official Use Only - Internal Distribution Only]
>
> Well,  drop else will make code much more clean.
> I use "else if" because I don't want to break the reset_method checking context, it will be much easier for code reviewing later I think.
>
> Will refine it anyway:)
>
> Thanks
> Wenhui
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, July 14, 2020 4:36 PM
> To: Sheng, Wenhui <Wenhui.Sheng@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Gao, Likun <Likun.Gao@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>
> Subject: Re: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode
>
> Am 14.07.20 um 04:29 schrieb Wenhui Sheng:
>> Default value is auto, doesn't change
>> original reset method logic.
>>
>> v2: change to use parameter reset_method
>> v3: add warn msg if specified mode isn't supported
>>
>> Signed-off-by: Likun Gao <Likun.Gao@amd.com>
>> Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
>>    drivers/gpu/drm/amd/amdgpu/cik.c        | 7 +++++++
>>    drivers/gpu/drm/amd/amdgpu/nv.c         | 7 +++++++
>>    drivers/gpu/drm/amd/amdgpu/si.c         | 5 +++++
>>    drivers/gpu/drm/amd/amdgpu/soc15.c      | 8 ++++++++
>>    drivers/gpu/drm/amd/amdgpu/vi.c         | 7 +++++++
>>    7 files changed, 43 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 4de93cef79b9..06bfb8658dec 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -196,6 +196,7 @@ static const bool debug_evictions; /* = false */
>>    #endif
>>    
>>    extern int amdgpu_tmz;
>> +extern int amdgpu_reset_method;
>>    
>>    #ifdef CONFIG_DRM_AMDGPU_SI
>>    extern int amdgpu_si_support;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 94c83a9d4987..581d5fcac361 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -154,6 +154,7 @@ int amdgpu_mes = 0;
>>    int amdgpu_noretry = 1;
>>    int amdgpu_force_asic_type = -1;
>>    int amdgpu_tmz = 0;
>> +int amdgpu_reset_method = -1; /* auto */
>>    
>>    struct amdgpu_mgpu_info mgpu_info = {
>>    	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
>> @@ -793,6 +794,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
>>    MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
>>    module_param_named(tmz, amdgpu_tmz, int, 0444);
>>    
>> +/**
>> + * DOC: reset_method (int)
>> + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 =
>> +mode1, 3 = mode2, 4 = baco)  */ MODULE_PARM_DESC(reset_method, "GPU
>> +reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1,
>> +3 = mode2, 4 = baco)"); module_param_named(reset_method,
>> +amdgpu_reset_method, int, 0444);
>> +
>>    static const struct pci_device_id pciidlist[] = {
>>    #ifdef  CONFIG_DRM_AMDGPU_SI
>>    	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff
>> --git a/drivers/gpu/drm/amd/amdgpu/cik.c
>> b/drivers/gpu/drm/amd/amdgpu/cik.c
>> index fe306d0f73f7..310bcf81256f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
>> @@ -1326,6 +1326,13 @@ cik_asic_reset_method(struct amdgpu_device *adev)
>>    {
>>    	bool baco_reset;
>>    
>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
>> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>> +		return amdgpu_reset_method;
>> +	else if (amdgpu_reset_method != -1)
> When you return anyway you can also drop the else here and on other occasions as well.
>
> Apart from that the patch looks good to me.
>
> We usually try to avoid adding more module parameters, but I think this one is really justified.
>
> Thanks,
> Christian.
>
>> +		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	switch (adev->asic_type) {
>>    	case CHIP_BONAIRE:
>>    	case CHIP_HAWAII:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c
>> b/drivers/gpu/drm/amd/amdgpu/nv.c index 9f1240bd0310..486321bcab8f
>> 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
>> @@ -311,6 +311,13 @@ nv_asic_reset_method(struct amdgpu_device *adev)
>>    {
>>    	struct smu_context *smu = &adev->smu;
>>    
>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
>> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>> +		return amdgpu_reset_method;
>> +	else if (amdgpu_reset_method != -1)
>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	if (smu_baco_is_support(smu))
>>    		return AMD_RESET_METHOD_BACO;
>>    	else
>> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c
>> b/drivers/gpu/drm/amd/amdgpu/si.c index 9d7b4ccd17b8..1b449291f068
>> 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/si.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
>> @@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev)
>>    static enum amd_reset_method
>>    si_asic_reset_method(struct amdgpu_device *adev)
>>    {
>> +	if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
>> +	    amdgpu_reset_method != -1)
>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	return AMD_RESET_METHOD_LEGACY;
>>    }
>>    
>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> index 8c739b285915..40b343b25588 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> @@ -532,6 +532,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
>>    	bool baco_reset = false;
>>    	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
>>    
>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
>> +	    amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
>> +		amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>> +		return amdgpu_reset_method;
>> +	else if (amdgpu_reset_method != -1)
>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	switch (adev->asic_type) {
>>    	case CHIP_RAVEN:
>>    	case CHIP_RENOIR:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c
>> b/drivers/gpu/drm/amd/amdgpu/vi.c index 4e5e91888d87..e4628c17802f
>> 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
>> @@ -710,6 +710,13 @@ vi_asic_reset_method(struct amdgpu_device *adev)
>>    {
>>    	bool baco_reset;
>>    
>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
>> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>> +		return amdgpu_reset_method;
>> +	else if (amdgpu_reset_method != -1)
>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	switch (adev->asic_type) {
>>    	case CHIP_FIJI:
>>    	case CHIP_TONGA:
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode
  2020-07-14 11:45       ` Christian König
@ 2020-07-15  3:06         ` Sheng, Wenhui
  2020-07-15  9:08           ` Christian König
  0 siblings, 1 reply; 10+ messages in thread
From: Sheng, Wenhui @ 2020-07-15  3:06 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx; +Cc: Gao, Likun, Zhang, Hawking

[AMD Official Use Only - Internal Distribution Only]

> What do you mean with that? To just keep the two "if" more closely together?

I mean generally we want to use if to check value A, we always do like :
	if (cond1(A))
	    xxxx;
	else if (cond2(A))
	    xxxx;
 or
	if (cond1(A))
	    xxxx;
	if (cond2(A))
	    xxxx;

In second case, two conditions could be  true at the same, so when we want to indicate that the program should choose either one or none of the options, the first one should be much more cleaner and better.


Brs
Wenhui
-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 
Sent: Tuesday, July 14, 2020 7:46 PM
To: Sheng, Wenhui <Wenhui.Sheng@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Gao, Likun <Likun.Gao@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: Re: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode

> I use "else if" because I don't want to break the reset_method checking context, it will be much easier for code reviewing later I think.

What do you mean with that? To just keep the two "if" more closely together?

See we usually avoid extra "else" when there is a return in the if to make sure not to confuse the reader.

E.g. and "else" usually implies that both branches converge again after the if and that's not the case if you have a return or goto.

Regards,
Christian.

Am 14.07.20 um 11:58 schrieb Sheng, Wenhui:
> [AMD Official Use Only - Internal Distribution Only]
>
> Well,  drop else will make code much more clean.
> I use "else if" because I don't want to break the reset_method checking context, it will be much easier for code reviewing later I think.
>
> Will refine it anyway:)
>
> Thanks
> Wenhui
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, July 14, 2020 4:36 PM
> To: Sheng, Wenhui <Wenhui.Sheng@amd.com>; 
> amd-gfx@lists.freedesktop.org
> Cc: Gao, Likun <Likun.Gao@amd.com>; Zhang, Hawking 
> <Hawking.Zhang@amd.com>
> Subject: Re: [PATCH 4/4] drm/amdgpu: add module parameter choose reset 
> mode
>
> Am 14.07.20 um 04:29 schrieb Wenhui Sheng:
>> Default value is auto, doesn't change original reset method logic.
>>
>> v2: change to use parameter reset_method
>> v3: add warn msg if specified mode isn't supported
>>
>> Signed-off-by: Likun Gao <Likun.Gao@amd.com>
>> Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
>>    drivers/gpu/drm/amd/amdgpu/cik.c        | 7 +++++++
>>    drivers/gpu/drm/amd/amdgpu/nv.c         | 7 +++++++
>>    drivers/gpu/drm/amd/amdgpu/si.c         | 5 +++++
>>    drivers/gpu/drm/amd/amdgpu/soc15.c      | 8 ++++++++
>>    drivers/gpu/drm/amd/amdgpu/vi.c         | 7 +++++++
>>    7 files changed, 43 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 4de93cef79b9..06bfb8658dec 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -196,6 +196,7 @@ static const bool debug_evictions; /* = false */
>>    #endif
>>    
>>    extern int amdgpu_tmz;
>> +extern int amdgpu_reset_method;
>>    
>>    #ifdef CONFIG_DRM_AMDGPU_SI
>>    extern int amdgpu_si_support;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 94c83a9d4987..581d5fcac361 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -154,6 +154,7 @@ int amdgpu_mes = 0;
>>    int amdgpu_noretry = 1;
>>    int amdgpu_force_asic_type = -1;
>>    int amdgpu_tmz = 0;
>> +int amdgpu_reset_method = -1; /* auto */
>>    
>>    struct amdgpu_mgpu_info mgpu_info = {
>>    	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
>> @@ -793,6 +794,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
>>    MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
>>    module_param_named(tmz, amdgpu_tmz, int, 0444);
>>    
>> +/**
>> + * DOC: reset_method (int)
>> + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = 
>> +mode1, 3 = mode2, 4 = baco)  */ MODULE_PARM_DESC(reset_method, "GPU 
>> +reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1,
>> +3 = mode2, 4 = baco)"); module_param_named(reset_method, 
>> +amdgpu_reset_method, int, 0444);
>> +
>>    static const struct pci_device_id pciidlist[] = {
>>    #ifdef  CONFIG_DRM_AMDGPU_SI
>>    	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff 
>> --git a/drivers/gpu/drm/amd/amdgpu/cik.c
>> b/drivers/gpu/drm/amd/amdgpu/cik.c
>> index fe306d0f73f7..310bcf81256f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
>> @@ -1326,6 +1326,13 @@ cik_asic_reset_method(struct amdgpu_device *adev)
>>    {
>>    	bool baco_reset;
>>    
>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
>> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>> +		return amdgpu_reset_method;
>> +	else if (amdgpu_reset_method != -1)
> When you return anyway you can also drop the else here and on other occasions as well.
>
> Apart from that the patch looks good to me.
>
> We usually try to avoid adding more module parameters, but I think this one is really justified.
>
> Thanks,
> Christian.
>
>> +		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	switch (adev->asic_type) {
>>    	case CHIP_BONAIRE:
>>    	case CHIP_HAWAII:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c 
>> b/drivers/gpu/drm/amd/amdgpu/nv.c index 9f1240bd0310..486321bcab8f
>> 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
>> @@ -311,6 +311,13 @@ nv_asic_reset_method(struct amdgpu_device *adev)
>>    {
>>    	struct smu_context *smu = &adev->smu;
>>    
>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
>> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>> +		return amdgpu_reset_method;
>> +	else if (amdgpu_reset_method != -1)
>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	if (smu_baco_is_support(smu))
>>    		return AMD_RESET_METHOD_BACO;
>>    	else
>> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c 
>> b/drivers/gpu/drm/amd/amdgpu/si.c index 9d7b4ccd17b8..1b449291f068
>> 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/si.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
>> @@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev)
>>    static enum amd_reset_method
>>    si_asic_reset_method(struct amdgpu_device *adev)
>>    {
>> +	if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
>> +	    amdgpu_reset_method != -1)
>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	return AMD_RESET_METHOD_LEGACY;
>>    }
>>    
>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> index 8c739b285915..40b343b25588 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> @@ -532,6 +532,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
>>    	bool baco_reset = false;
>>    	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
>>    
>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
>> +	    amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
>> +		amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>> +		return amdgpu_reset_method;
>> +	else if (amdgpu_reset_method != -1)
>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	switch (adev->asic_type) {
>>    	case CHIP_RAVEN:
>>    	case CHIP_RENOIR:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c 
>> b/drivers/gpu/drm/amd/amdgpu/vi.c index 4e5e91888d87..e4628c17802f
>> 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
>> @@ -710,6 +710,13 @@ vi_asic_reset_method(struct amdgpu_device *adev)
>>    {
>>    	bool baco_reset;
>>    
>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
>> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>> +		return amdgpu_reset_method;
>> +	else if (amdgpu_reset_method != -1)
>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>> +				  amdgpu_reset_method);
>> +
>>    	switch (adev->asic_type) {
>>    	case CHIP_FIJI:
>>    	case CHIP_TONGA:
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7CWe
> nhui.Sheng%40amd.com%7C7379730b0fe24aaebf7908d827eb71f4%7C3dd8961fe488
> 4e608e11a82d994e183d%7C0%7C0%7C637303239467554138&amp;sdata=m0Nc6gbZGR
> KLlAbTvQFJe4XJPA2gyVrK42FzG9H6OPE%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode
  2020-07-15  3:06         ` Sheng, Wenhui
@ 2020-07-15  9:08           ` Christian König
  0 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2020-07-15  9:08 UTC (permalink / raw)
  To: Sheng, Wenhui, amd-gfx; +Cc: Gao, Likun, Zhang, Hawking

> In second case, two conditions could be  true at the same, so when we want to indicate that the program should choose either one or none of the options, the first one should be much more cleaner and better.

When you check two independent conditions which just happen to use the 
same value? Well that sounds like a rather bad idea to me.

"else if" should be used when you can simplify cond2 because part of it 
already contains !cond1.

If both conditions are truly independent using "else if" just confuses 
the reader and is rather error prone.

Regards,
Christian.

Am 15.07.20 um 05:06 schrieb Sheng, Wenhui:
> [AMD Official Use Only - Internal Distribution Only]
>
>> What do you mean with that? To just keep the two "if" more closely together?
> I mean generally we want to use if to check value A, we always do like :
> 	if (cond1(A))
> 	    xxxx;
> 	else if (cond2(A))
> 	    xxxx;
>   or
> 	if (cond1(A))
> 	    xxxx;
> 	if (cond2(A))
> 	    xxxx;
>
> In second case, two conditions could be  true at the same, so when we want to indicate that the program should choose either one or none of the options, the first one should be much more cleaner and better.
>
>
> Brs
> Wenhui
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, July 14, 2020 7:46 PM
> To: Sheng, Wenhui <Wenhui.Sheng@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Gao, Likun <Likun.Gao@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>
> Subject: Re: [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode
>
>> I use "else if" because I don't want to break the reset_method checking context, it will be much easier for code reviewing later I think.
> What do you mean with that? To just keep the two "if" more closely together?
>
> See we usually avoid extra "else" when there is a return in the if to make sure not to confuse the reader.
>
> E.g. and "else" usually implies that both branches converge again after the if and that's not the case if you have a return or goto.
>
> Regards,
> Christian.
>
> Am 14.07.20 um 11:58 schrieb Sheng, Wenhui:
>> [AMD Official Use Only - Internal Distribution Only]
>>
>> Well,  drop else will make code much more clean.
>> I use "else if" because I don't want to break the reset_method checking context, it will be much easier for code reviewing later I think.
>>
>> Will refine it anyway:)
>>
>> Thanks
>> Wenhui
>> -----Original Message-----
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>> Sent: Tuesday, July 14, 2020 4:36 PM
>> To: Sheng, Wenhui <Wenhui.Sheng@amd.com>;
>> amd-gfx@lists.freedesktop.org
>> Cc: Gao, Likun <Likun.Gao@amd.com>; Zhang, Hawking
>> <Hawking.Zhang@amd.com>
>> Subject: Re: [PATCH 4/4] drm/amdgpu: add module parameter choose reset
>> mode
>>
>> Am 14.07.20 um 04:29 schrieb Wenhui Sheng:
>>> Default value is auto, doesn't change original reset method logic.
>>>
>>> v2: change to use parameter reset_method
>>> v3: add warn msg if specified mode isn't supported
>>>
>>> Signed-off-by: Likun Gao <Likun.Gao@amd.com>
>>> Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
>>> ---
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
>>>     drivers/gpu/drm/amd/amdgpu/cik.c        | 7 +++++++
>>>     drivers/gpu/drm/amd/amdgpu/nv.c         | 7 +++++++
>>>     drivers/gpu/drm/amd/amdgpu/si.c         | 5 +++++
>>>     drivers/gpu/drm/amd/amdgpu/soc15.c      | 8 ++++++++
>>>     drivers/gpu/drm/amd/amdgpu/vi.c         | 7 +++++++
>>>     7 files changed, 43 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 4de93cef79b9..06bfb8658dec 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -196,6 +196,7 @@ static const bool debug_evictions; /* = false */
>>>     #endif
>>>     
>>>     extern int amdgpu_tmz;
>>> +extern int amdgpu_reset_method;
>>>     
>>>     #ifdef CONFIG_DRM_AMDGPU_SI
>>>     extern int amdgpu_si_support;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 94c83a9d4987..581d5fcac361 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -154,6 +154,7 @@ int amdgpu_mes = 0;
>>>     int amdgpu_noretry = 1;
>>>     int amdgpu_force_asic_type = -1;
>>>     int amdgpu_tmz = 0;
>>> +int amdgpu_reset_method = -1; /* auto */
>>>     
>>>     struct amdgpu_mgpu_info mgpu_info = {
>>>     	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
>>> @@ -793,6 +794,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
>>>     MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
>>>     module_param_named(tmz, amdgpu_tmz, int, 0444);
>>>     
>>> +/**
>>> + * DOC: reset_method (int)
>>> + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 =
>>> +mode1, 3 = mode2, 4 = baco)  */ MODULE_PARM_DESC(reset_method, "GPU
>>> +reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1,
>>> +3 = mode2, 4 = baco)"); module_param_named(reset_method,
>>> +amdgpu_reset_method, int, 0444);
>>> +
>>>     static const struct pci_device_id pciidlist[] = {
>>>     #ifdef  CONFIG_DRM_AMDGPU_SI
>>>     	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff
>>> --git a/drivers/gpu/drm/amd/amdgpu/cik.c
>>> b/drivers/gpu/drm/amd/amdgpu/cik.c
>>> index fe306d0f73f7..310bcf81256f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
>>> @@ -1326,6 +1326,13 @@ cik_asic_reset_method(struct amdgpu_device *adev)
>>>     {
>>>     	bool baco_reset;
>>>     
>>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
>>> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>>> +		return amdgpu_reset_method;
>>> +	else if (amdgpu_reset_method != -1)
>> When you return anyway you can also drop the else here and on other occasions as well.
>>
>> Apart from that the patch looks good to me.
>>
>> We usually try to avoid adding more module parameters, but I think this one is really justified.
>>
>> Thanks,
>> Christian.
>>
>>> +		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
>>> +				  amdgpu_reset_method);
>>> +
>>>     	switch (adev->asic_type) {
>>>     	case CHIP_BONAIRE:
>>>     	case CHIP_HAWAII:
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c
>>> b/drivers/gpu/drm/amd/amdgpu/nv.c index 9f1240bd0310..486321bcab8f
>>> 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
>>> @@ -311,6 +311,13 @@ nv_asic_reset_method(struct amdgpu_device *adev)
>>>     {
>>>     	struct smu_context *smu = &adev->smu;
>>>     
>>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
>>> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>>> +		return amdgpu_reset_method;
>>> +	else if (amdgpu_reset_method != -1)
>>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>>> +				  amdgpu_reset_method);
>>> +
>>>     	if (smu_baco_is_support(smu))
>>>     		return AMD_RESET_METHOD_BACO;
>>>     	else
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c
>>> b/drivers/gpu/drm/amd/amdgpu/si.c index 9d7b4ccd17b8..1b449291f068
>>> 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/si.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
>>> @@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev)
>>>     static enum amd_reset_method
>>>     si_asic_reset_method(struct amdgpu_device *adev)
>>>     {
>>> +	if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY &&
>>> +	    amdgpu_reset_method != -1)
>>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>>> +				  amdgpu_reset_method);
>>> +
>>>     	return AMD_RESET_METHOD_LEGACY;
>>>     }
>>>     
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> index 8c739b285915..40b343b25588 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> @@ -532,6 +532,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
>>>     	bool baco_reset = false;
>>>     	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
>>>     
>>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
>>> +	    amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
>>> +		amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>>> +		return amdgpu_reset_method;
>>> +	else if (amdgpu_reset_method != -1)
>>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>>> +				  amdgpu_reset_method);
>>> +
>>>     	switch (adev->asic_type) {
>>>     	case CHIP_RAVEN:
>>>     	case CHIP_RENOIR:
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c
>>> b/drivers/gpu/drm/amd/amdgpu/vi.c index 4e5e91888d87..e4628c17802f
>>> 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
>>> @@ -710,6 +710,13 @@ vi_asic_reset_method(struct amdgpu_device *adev)
>>>     {
>>>     	bool baco_reset;
>>>     
>>> +	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
>>> +	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
>>> +		return amdgpu_reset_method;
>>> +	else if (amdgpu_reset_method != -1)
>>> +		dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
>>> +				  amdgpu_reset_method);
>>> +
>>>     	switch (adev->asic_type) {
>>>     	case CHIP_FIJI:
>>>     	case CHIP_TONGA:
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
>> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7CWe
>> nhui.Sheng%40amd.com%7C7379730b0fe24aaebf7908d827eb71f4%7C3dd8961fe488
>> 4e608e11a82d994e183d%7C0%7C0%7C637303239467554138&amp;sdata=m0Nc6gbZGR
>> KLlAbTvQFJe4XJPA2gyVrK42FzG9H6OPE%3D&amp;reserved=0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-07-15  9:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-14  2:29 [PATCH 1/4] drm/amd/powerplay: add SMU mode1 reset Wenhui Sheng
2020-07-14  2:29 ` [PATCH 2/4] drm/amdgpu: RAS emergency restart logic refine Wenhui Sheng
2020-07-14  2:29 ` [PATCH 3/4] drm/amdgpu: enable mode1 reset Wenhui Sheng
2020-07-14  2:29 ` [PATCH 4/4] drm/amdgpu: add module parameter choose reset mode Wenhui Sheng
2020-07-14  6:39   ` Zhang, Hawking
2020-07-14  8:35   ` Christian König
2020-07-14  9:58     ` Sheng, Wenhui
2020-07-14 11:45       ` Christian König
2020-07-15  3:06         ` Sheng, Wenhui
2020-07-15  9:08           ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.