[PATCH] drm/amdgpu: add support to SMU debug option

* [PATCH] drm/amdgpu: add support to SMU debug option
@ 2021-12-01  9:24 Lang Yu
  2021-12-01  9:29 ` Christian König
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Lang Yu @ 2021-12-01  9:24 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Lang Yu, Lijo Lazar, Huang Rui, Christian Koenig

To maintain system error state when SMU errors occurred,
which will aid in debugging SMU firmware issues, add SMU
debug option support.

It can be enabled or disabled via amdgpu_smu_debug
debugfs file. When enabled, it makes SMU errors fatal.
It is disabled by default.

== Command Guide ==

1, enable SMU debug option

 # echo 1 > /sys/kernel/debug/dri/0/amdgpu_smu_debug

2, disable SMU debug option

 # echo 0 > /sys/kernel/debug/dri/0/amdgpu_smu_debug

v3:
 - Use debugfs_create_bool().(Christian)
 - Put variable into smu_context struct.
 - Don't resend command when timeout.

v2:
 - Resend command when timeout.(Lijo)
 - Use debugfs file instead of module parameter.

Signed-off-by: Lang Yu <lang.yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c        | 3 +++
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h            | 5 +++++
 drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 2 ++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c             | 8 +++++++-
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 164d6a9e9fbb..86cd888c7822 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1618,6 +1618,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 	if (!debugfs_initialized())
 		return 0;
 
+	debugfs_create_bool("amdgpu_smu_debug", 0600, root,
+				  &adev->smu.smu_debug_mode);
+
 	ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev,
 				  &fops_ib_preempt);
 	if (IS_ERR(ent)) {
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index f738f7dc20c9..50dbf5594a9d 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -569,6 +569,11 @@ struct smu_context
 	struct smu_user_dpm_profile user_dpm_profile;
 
 	struct stb_context stb_context;
+	/*
+	 * When enabled, it makes SMU errors fatal.
+	 * (0 = disabled (default), 1 = enabled)
+	 */
+	bool smu_debug_mode;
 };
 
 struct i2c_adapter;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 6e781cee8bb6..d3797a2d6451 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1919,6 +1919,8 @@ static int aldebaran_mode2_reset(struct smu_context *smu)
 out:
 	mutex_unlock(&smu->message_lock);
 
+	BUG_ON(unlikely(smu->smu_debug_mode) && ret);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 048ca1673863..9be005eb4241 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -349,15 +349,21 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
 		__smu_cmn_reg_print_error(smu, reg, index, param, msg);
 		goto Out;
 	}
+
 	__smu_cmn_send_msg(smu, (uint16_t) index, param);
 	reg = __smu_cmn_poll_stat(smu);
 	res = __smu_cmn_reg2errno(smu, reg);
-	if (res != 0)
+	if (res != 0) {
 		__smu_cmn_reg_print_error(smu, reg, index, param, msg);
+		goto Out;
+	}
 	if (read_arg)
 		smu_cmn_read_arg(smu, read_arg);
 Out:
 	mutex_unlock(&smu->message_lock);
+
+	BUG_ON(unlikely(smu->smu_debug_mode) && res);
+
 	return res;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread