dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: add the IP information of the soc
@ 2024-03-12 12:41 Sunil Khatri
  2024-03-12 12:41 ` [PATCH 2/2] drm:amdgpu: add firmware information of all IP's Sunil Khatri
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Sunil Khatri @ 2024-03-12 12:41 UTC (permalink / raw)
  To: Alex Deucher, Christian König, Shashank Sharma
  Cc: amd-gfx, dri-devel, linux-kernel, Sunil Khatri

Add all the IP's information on a SOC to the
devcoredump.

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index a0dbccad2f53..611fdb90a1fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -196,6 +196,25 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
 			   coredump->reset_task_info.process_name,
 			   coredump->reset_task_info.pid);
 
+	/* GPU IP's information of the SOC */
+	if (coredump->adev) {
+		drm_printf(&p, "\nIP Information\n");
+		drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+		drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+
+		for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
+			struct amdgpu_ip_block *ip =
+				&coredump->adev->ip_blocks[i];
+			drm_printf(&p, "IP type: %d IP name: %s\n",
+				   ip->version->type,
+				   ip->version->funcs->name);
+			drm_printf(&p, "IP version: (%d,%d,%d)\n\n",
+				   ip->version->major,
+				   ip->version->minor,
+				   ip->version->rev);
+		}
+	}
+
 	if (coredump->ring) {
 		drm_printf(&p, "\nRing timed out details\n");
 		drm_printf(&p, "IP Type: %d Ring Name: %s\n",
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/2] drm:amdgpu: add firmware information of all IP's
  2024-03-12 12:41 [PATCH 1/2] drm/amdgpu: add the IP information of the soc Sunil Khatri
@ 2024-03-12 12:41 ` Sunil Khatri
  2024-03-13 19:48   ` Khatri, Sunil
  2024-03-13 20:36   ` Alex Deucher
  2024-03-13 19:45 ` [PATCH 1/2] drm/amdgpu: add the IP information of the soc Khatri, Sunil
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 15+ messages in thread
From: Sunil Khatri @ 2024-03-12 12:41 UTC (permalink / raw)
  To: Alex Deucher, Christian König, Shashank Sharma
  Cc: amd-gfx, dri-devel, linux-kernel, Sunil Khatri

Add firmware version information of each
IP and each instance where applicable.

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 122 ++++++++++++++++++++++
 1 file changed, 122 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 611fdb90a1fc..78ddc58aef67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -168,6 +168,123 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
 {
 }
 #else
+static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, struct drm_printer *p)
+{
+	uint32_t version;
+	uint32_t feature;
+	uint8_t smu_program, smu_major, smu_minor, smu_debug;
+
+	drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
+		   adev->vce.fb_version, adev->vce.fw_version);
+	drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n",
+		   0, adev->uvd.fw_version);
+	drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n",
+		   0, adev->gmc.fw_version);
+	drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.me_feature_version, adev->gfx.me_fw_version);
+	drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version);
+	drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.ce_feature_version, adev->gfx.ce_fw_version);
+	drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version);
+
+	drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlc_srlc_feature_version,
+		   adev->gfx.rlc_srlc_fw_version);
+	drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlc_srlg_feature_version,
+		   adev->gfx.rlc_srlg_fw_version);
+	drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlc_srls_feature_version,
+		   adev->gfx.rlc_srls_fw_version);
+	drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlcp_ucode_feature_version,
+		   adev->gfx.rlcp_ucode_version);
+	drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlcv_ucode_feature_version,
+		   adev->gfx.rlcv_ucode_version);
+	drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.mec_feature_version,
+		   adev->gfx.mec_fw_version);
+
+	if (adev->gfx.mec2_fw)
+		drm_printf(p,
+			   "MEC2 feature version: %u, fw version: 0x%08x\n",
+			   adev->gfx.mec2_feature_version,
+			   adev->gfx.mec2_fw_version);
+
+	drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n",
+		   0, adev->gfx.imu_fw_version);
+	drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n",
+		   adev->psp.sos.feature_version,
+		   adev->psp.sos.fw_version);
+	drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n",
+		   adev->psp.asd_context.bin_desc.feature_version,
+		   adev->psp.asd_context.bin_desc.fw_version);
+
+	drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.xgmi_context.context.bin_desc.feature_version,
+		   adev->psp.xgmi_context.context.bin_desc.fw_version);
+	drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.ras_context.context.bin_desc.feature_version,
+		   adev->psp.ras_context.context.bin_desc.fw_version);
+	drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.hdcp_context.context.bin_desc.feature_version,
+		   adev->psp.hdcp_context.context.bin_desc.fw_version);
+	drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.dtm_context.context.bin_desc.feature_version,
+		   adev->psp.dtm_context.context.bin_desc.fw_version);
+	drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.rap_context.context.bin_desc.feature_version,
+		   adev->psp.rap_context.context.bin_desc.fw_version);
+	drm_printf(p, "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n",
+		adev->psp.securedisplay_context.context.bin_desc.feature_version,
+		adev->psp.securedisplay_context.context.bin_desc.fw_version);
+
+	/* SMC firmware */
+	version = adev->pm.fw_version;
+
+	smu_program = (version >> 24) & 0xff;
+	smu_major = (version >> 16) & 0xff;
+	smu_minor = (version >> 8) & 0xff;
+	smu_debug = (version >> 0) & 0xff;
+	drm_printf(p, "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n",
+		   0, smu_program, version, smu_major, smu_minor, smu_debug);
+
+	/* SDMA firmware */
+	for (int i = 0; i < adev->sdma.num_instances; i++) {
+		drm_printf(p, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+			   i, adev->sdma.instance[i].feature_version,
+			   adev->sdma.instance[i].fw_version);
+	}
+
+	drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n",
+		   0, adev->vcn.fw_version);
+	drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n",
+		   0, adev->dm.dmcu_fw_version);
+	drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n",
+		   0, adev->dm.dmcub_fw_version);
+	drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n",
+		   adev->psp.toc.feature_version, adev->psp.toc.fw_version);
+
+	version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+	feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK)
+					>> AMDGPU_MES_FEAT_VERSION_SHIFT;
+	drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n",
+		   feature, version);
+
+	version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+	feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK)
+					>> AMDGPU_MES_FEAT_VERSION_SHIFT;
+	drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n",
+		   feature, version);
+
+	drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
+		   adev->vpe.feature_version, adev->vpe.fw_version);
+
+}
+
 static ssize_t
 amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
 			void *data, size_t datalen)
@@ -215,6 +332,11 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
 		}
 	}
 
+	if (coredump->adev) {
+		drm_printf(&p, "IP Firmwares\n");
+		amdgpu_devcoredump_fw_info(coredump->adev, &p);
+	}
+
 	if (coredump->ring) {
 		drm_printf(&p, "\nRing timed out details\n");
 		drm_printf(&p, "IP Type: %d Ring Name: %s\n",
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add the IP information of the soc
  2024-03-12 12:41 [PATCH 1/2] drm/amdgpu: add the IP information of the soc Sunil Khatri
  2024-03-12 12:41 ` [PATCH 2/2] drm:amdgpu: add firmware information of all IP's Sunil Khatri
@ 2024-03-13 19:45 ` Khatri, Sunil
  2024-03-13 19:46 ` Khatri, Sunil
  2024-03-13 20:28 ` Alex Deucher
  3 siblings, 0 replies; 15+ messages in thread
From: Khatri, Sunil @ 2024-03-13 19:45 UTC (permalink / raw)
  To: Deucher, Alexander, Koenig, Christian, Sharma, Shashank
  Cc: amd-gfx, dri-devel, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 2504 bytes --]

[AMD Official Use Only - General]

Gentle Reminder for review.

Regards,
Sunil

Get Outlook for Android<https://aka.ms/AAb9ysg>
________________________________
From: Sunil Khatri <sunil.khatri@amd.com>
Sent: Tuesday, March 12, 2024 6:11:47 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Sharma, Shashank <Shashank.Sharma@amd.com>
Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>; Khatri, Sunil <Sunil.Khatri@amd.com>
Subject: [PATCH 1/2] drm/amdgpu: add the IP information of the soc

Add all the IP's information on a SOC to the
devcoredump.

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index a0dbccad2f53..611fdb90a1fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -196,6 +196,25 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
                            coredump->reset_task_info.process_name,
                            coredump->reset_task_info.pid);

+       /* GPU IP's information of the SOC */
+       if (coredump->adev) {
+               drm_printf(&p, "\nIP Information\n");
+               drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+               drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+
+               for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
+                       struct amdgpu_ip_block *ip =
+                               &coredump->adev->ip_blocks[i];
+                       drm_printf(&p, "IP type: %d IP name: %s\n",
+                                  ip->version->type,
+                                  ip->version->funcs->name);
+                       drm_printf(&p, "IP version: (%d,%d,%d)\n\n",
+                                  ip->version->major,
+                                  ip->version->minor,
+                                  ip->version->rev);
+               }
+       }
+
         if (coredump->ring) {
                 drm_printf(&p, "\nRing timed out details\n");
                 drm_printf(&p, "IP Type: %d Ring Name: %s\n",
--
2.34.1


[-- Attachment #2: Type: text/html, Size: 5939 bytes --]

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add the IP information of the soc
  2024-03-12 12:41 [PATCH 1/2] drm/amdgpu: add the IP information of the soc Sunil Khatri
  2024-03-12 12:41 ` [PATCH 2/2] drm:amdgpu: add firmware information of all IP's Sunil Khatri
  2024-03-13 19:45 ` [PATCH 1/2] drm/amdgpu: add the IP information of the soc Khatri, Sunil
@ 2024-03-13 19:46 ` Khatri, Sunil
  2024-03-13 20:28 ` Alex Deucher
  3 siblings, 0 replies; 15+ messages in thread
From: Khatri, Sunil @ 2024-03-13 19:46 UTC (permalink / raw)
  To: Deucher, Alexander, Koenig, Christian, Sharma, Shashank
  Cc: amd-gfx, dri-devel, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 2503 bytes --]

[AMD Official Use Only - General]

Gentle reminder for review.

Regards
Sunil

Get Outlook for Android<https://aka.ms/AAb9ysg>
________________________________
From: Sunil Khatri <sunil.khatri@amd.com>
Sent: Tuesday, March 12, 2024 6:11:47 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Sharma, Shashank <Shashank.Sharma@amd.com>
Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>; Khatri, Sunil <Sunil.Khatri@amd.com>
Subject: [PATCH 1/2] drm/amdgpu: add the IP information of the soc

Add all the IP's information on a SOC to the
devcoredump.

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index a0dbccad2f53..611fdb90a1fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -196,6 +196,25 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
                            coredump->reset_task_info.process_name,
                            coredump->reset_task_info.pid);

+       /* GPU IP's information of the SOC */
+       if (coredump->adev) {
+               drm_printf(&p, "\nIP Information\n");
+               drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+               drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+
+               for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
+                       struct amdgpu_ip_block *ip =
+                               &coredump->adev->ip_blocks[i];
+                       drm_printf(&p, "IP type: %d IP name: %s\n",
+                                  ip->version->type,
+                                  ip->version->funcs->name);
+                       drm_printf(&p, "IP version: (%d,%d,%d)\n\n",
+                                  ip->version->major,
+                                  ip->version->minor,
+                                  ip->version->rev);
+               }
+       }
+
         if (coredump->ring) {
                 drm_printf(&p, "\nRing timed out details\n");
                 drm_printf(&p, "IP Type: %d Ring Name: %s\n",
--
2.34.1


[-- Attachment #2: Type: text/html, Size: 5949 bytes --]

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] drm:amdgpu: add firmware information of all IP's
  2024-03-12 12:41 ` [PATCH 2/2] drm:amdgpu: add firmware information of all IP's Sunil Khatri
@ 2024-03-13 19:48   ` Khatri, Sunil
  2024-03-13 20:36   ` Alex Deucher
  1 sibling, 0 replies; 15+ messages in thread
From: Khatri, Sunil @ 2024-03-13 19:48 UTC (permalink / raw)
  To: Deucher, Alexander, Koenig, Christian, Sharma, Shashank
  Cc: amd-gfx, dri-devel, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 8427 bytes --]

[AMD Official Use Only - General]

Gentle reminder

Regards
Sunil

Get Outlook for Android<https://aka.ms/AAb9ysg>
________________________________
From: Sunil Khatri <sunil.khatri@amd.com>
Sent: Tuesday, March 12, 2024 6:11:48 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Sharma, Shashank <Shashank.Sharma@amd.com>
Cc: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; dri-devel@lists.freedesktop.org <dri-devel@lists.freedesktop.org>; linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>; Khatri, Sunil <Sunil.Khatri@amd.com>
Subject: [PATCH 2/2] drm:amdgpu: add firmware information of all IP's

Add firmware version information of each
IP and each instance where applicable.

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 122 ++++++++++++++++++++++
 1 file changed, 122 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 611fdb90a1fc..78ddc58aef67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -168,6 +168,123 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
 {
 }
 #else
+static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, struct drm_printer *p)
+{
+       uint32_t version;
+       uint32_t feature;
+       uint8_t smu_program, smu_major, smu_minor, smu_debug;
+
+       drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
+                  adev->vce.fb_version, adev->vce.fw_version);
+       drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n",
+                  0, adev->uvd.fw_version);
+       drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n",
+                  0, adev->gmc.fw_version);
+       drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.me_feature_version, adev->gfx.me_fw_version);
+       drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version);
+       drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.ce_feature_version, adev->gfx.ce_fw_version);
+       drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version);
+
+       drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.rlc_srlc_feature_version,
+                  adev->gfx.rlc_srlc_fw_version);
+       drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.rlc_srlg_feature_version,
+                  adev->gfx.rlc_srlg_fw_version);
+       drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.rlc_srls_feature_version,
+                  adev->gfx.rlc_srls_fw_version);
+       drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.rlcp_ucode_feature_version,
+                  adev->gfx.rlcp_ucode_version);
+       drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.rlcv_ucode_feature_version,
+                  adev->gfx.rlcv_ucode_version);
+       drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
+                  adev->gfx.mec_feature_version,
+                  adev->gfx.mec_fw_version);
+
+       if (adev->gfx.mec2_fw)
+               drm_printf(p,
+                          "MEC2 feature version: %u, fw version: 0x%08x\n",
+                          adev->gfx.mec2_feature_version,
+                          adev->gfx.mec2_fw_version);
+
+       drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n",
+                  0, adev->gfx.imu_fw_version);
+       drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n",
+                  adev->psp.sos.feature_version,
+                  adev->psp.sos.fw_version);
+       drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n",
+                  adev->psp.asd_context.bin_desc.feature_version,
+                  adev->psp.asd_context.bin_desc.fw_version);
+
+       drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n",
+                  adev->psp.xgmi_context.context.bin_desc.feature_version,
+                  adev->psp.xgmi_context.context.bin_desc.fw_version);
+       drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n",
+                  adev->psp.ras_context.context.bin_desc.feature_version,
+                  adev->psp.ras_context.context.bin_desc.fw_version);
+       drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n",
+                  adev->psp.hdcp_context.context.bin_desc.feature_version,
+                  adev->psp.hdcp_context.context.bin_desc.fw_version);
+       drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n",
+                  adev->psp.dtm_context.context.bin_desc.feature_version,
+                  adev->psp.dtm_context.context.bin_desc.fw_version);
+       drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n",
+                  adev->psp.rap_context.context.bin_desc.feature_version,
+                  adev->psp.rap_context.context.bin_desc.fw_version);
+       drm_printf(p, "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n",
+               adev->psp.securedisplay_context.context.bin_desc.feature_version,
+               adev->psp.securedisplay_context.context.bin_desc.fw_version);
+
+       /* SMC firmware */
+       version = adev->pm.fw_version;
+
+       smu_program = (version >> 24) & 0xff;
+       smu_major = (version >> 16) & 0xff;
+       smu_minor = (version >> 8) & 0xff;
+       smu_debug = (version >> 0) & 0xff;
+       drm_printf(p, "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n",
+                  0, smu_program, version, smu_major, smu_minor, smu_debug);
+
+       /* SDMA firmware */
+       for (int i = 0; i < adev->sdma.num_instances; i++) {
+               drm_printf(p, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+                          i, adev->sdma.instance[i].feature_version,
+                          adev->sdma.instance[i].fw_version);
+       }
+
+       drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n",
+                  0, adev->vcn.fw_version);
+       drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n",
+                  0, adev->dm.dmcu_fw_version);
+       drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n",
+                  0, adev->dm.dmcub_fw_version);
+       drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n",
+                  adev->psp.toc.feature_version, adev->psp.toc.fw_version);
+
+       version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+       feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK)
+                                       >> AMDGPU_MES_FEAT_VERSION_SHIFT;
+       drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n",
+                  feature, version);
+
+       version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+       feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK)
+                                       >> AMDGPU_MES_FEAT_VERSION_SHIFT;
+       drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n",
+                  feature, version);
+
+       drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
+                  adev->vpe.feature_version, adev->vpe.fw_version);
+
+}
+
 static ssize_t
 amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
                         void *data, size_t datalen)
@@ -215,6 +332,11 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
                 }
         }

+       if (coredump->adev) {
+               drm_printf(&p, "IP Firmwares\n");
+               amdgpu_devcoredump_fw_info(coredump->adev, &p);
+       }
+
         if (coredump->ring) {
                 drm_printf(&p, "\nRing timed out details\n");
                 drm_printf(&p, "IP Type: %d Ring Name: %s\n",
--
2.34.1


[-- Attachment #2: Type: text/html, Size: 17471 bytes --]

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add the IP information of the soc
  2024-03-12 12:41 [PATCH 1/2] drm/amdgpu: add the IP information of the soc Sunil Khatri
                   ` (2 preceding siblings ...)
  2024-03-13 19:46 ` Khatri, Sunil
@ 2024-03-13 20:28 ` Alex Deucher
  2024-03-14  5:44   ` Khatri, Sunil
  3 siblings, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2024-03-13 20:28 UTC (permalink / raw)
  To: Sunil Khatri
  Cc: Alex Deucher, Christian König, Shashank Sharma, amd-gfx,
	dri-devel, linux-kernel

On Tue, Mar 12, 2024 at 8:41 AM Sunil Khatri <sunil.khatri@amd.com> wrote:
>
> Add all the IP's information on a SOC to the
> devcoredump.
>
> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 19 +++++++++++++++++++
>  1 file changed, 19 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> index a0dbccad2f53..611fdb90a1fc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> @@ -196,6 +196,25 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>                            coredump->reset_task_info.process_name,
>                            coredump->reset_task_info.pid);
>
> +       /* GPU IP's information of the SOC */
> +       if (coredump->adev) {
> +               drm_printf(&p, "\nIP Information\n");
> +               drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
> +               drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
> +
> +               for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
> +                       struct amdgpu_ip_block *ip =
> +                               &coredump->adev->ip_blocks[i];
> +                       drm_printf(&p, "IP type: %d IP name: %s\n",
> +                                  ip->version->type,
> +                                  ip->version->funcs->name);
> +                       drm_printf(&p, "IP version: (%d,%d,%d)\n\n",
> +                                  ip->version->major,
> +                                  ip->version->minor,
> +                                  ip->version->rev);
> +               }
> +       }

I think the IP discovery table would be more useful.  Either walk the
adev->ip_versions structure, or just include the IP discovery binary.

Alex

> +
>         if (coredump->ring) {
>                 drm_printf(&p, "\nRing timed out details\n");
>                 drm_printf(&p, "IP Type: %d Ring Name: %s\n",
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] drm:amdgpu: add firmware information of all IP's
  2024-03-12 12:41 ` [PATCH 2/2] drm:amdgpu: add firmware information of all IP's Sunil Khatri
  2024-03-13 19:48   ` Khatri, Sunil
@ 2024-03-13 20:36   ` Alex Deucher
  2024-03-14  5:58     ` Khatri, Sunil
  1 sibling, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2024-03-13 20:36 UTC (permalink / raw)
  To: Sunil Khatri
  Cc: Alex Deucher, Christian König, Shashank Sharma, amd-gfx,
	dri-devel, linux-kernel

On Tue, Mar 12, 2024 at 8:42 AM Sunil Khatri <sunil.khatri@amd.com> wrote:
>
> Add firmware version information of each
> IP and each instance where applicable.
>

Is there a way we can share some common code with devcoredump,
debugfs, and the info IOCTL?  All three places need to query this
information and the same logic is repeated in each case.

Alex


> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 122 ++++++++++++++++++++++
>  1 file changed, 122 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> index 611fdb90a1fc..78ddc58aef67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> @@ -168,6 +168,123 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
>  {
>  }
>  #else
> +static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, struct drm_printer *p)
> +{
> +       uint32_t version;
> +       uint32_t feature;
> +       uint8_t smu_program, smu_major, smu_minor, smu_debug;
> +
> +       drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
> +                  adev->vce.fb_version, adev->vce.fw_version);
> +       drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n",
> +                  0, adev->uvd.fw_version);
> +       drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n",
> +                  0, adev->gmc.fw_version);
> +       drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.me_feature_version, adev->gfx.me_fw_version);
> +       drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version);
> +       drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.ce_feature_version, adev->gfx.ce_fw_version);
> +       drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version);
> +
> +       drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.rlc_srlc_feature_version,
> +                  adev->gfx.rlc_srlc_fw_version);
> +       drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.rlc_srlg_feature_version,
> +                  adev->gfx.rlc_srlg_fw_version);
> +       drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.rlc_srls_feature_version,
> +                  adev->gfx.rlc_srls_fw_version);
> +       drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.rlcp_ucode_feature_version,
> +                  adev->gfx.rlcp_ucode_version);
> +       drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.rlcv_ucode_feature_version,
> +                  adev->gfx.rlcv_ucode_version);
> +       drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
> +                  adev->gfx.mec_feature_version,
> +                  adev->gfx.mec_fw_version);
> +
> +       if (adev->gfx.mec2_fw)
> +               drm_printf(p,
> +                          "MEC2 feature version: %u, fw version: 0x%08x\n",
> +                          adev->gfx.mec2_feature_version,
> +                          adev->gfx.mec2_fw_version);
> +
> +       drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n",
> +                  0, adev->gfx.imu_fw_version);
> +       drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n",
> +                  adev->psp.sos.feature_version,
> +                  adev->psp.sos.fw_version);
> +       drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n",
> +                  adev->psp.asd_context.bin_desc.feature_version,
> +                  adev->psp.asd_context.bin_desc.fw_version);
> +
> +       drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n",
> +                  adev->psp.xgmi_context.context.bin_desc.feature_version,
> +                  adev->psp.xgmi_context.context.bin_desc.fw_version);
> +       drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n",
> +                  adev->psp.ras_context.context.bin_desc.feature_version,
> +                  adev->psp.ras_context.context.bin_desc.fw_version);
> +       drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n",
> +                  adev->psp.hdcp_context.context.bin_desc.feature_version,
> +                  adev->psp.hdcp_context.context.bin_desc.fw_version);
> +       drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n",
> +                  adev->psp.dtm_context.context.bin_desc.feature_version,
> +                  adev->psp.dtm_context.context.bin_desc.fw_version);
> +       drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n",
> +                  adev->psp.rap_context.context.bin_desc.feature_version,
> +                  adev->psp.rap_context.context.bin_desc.fw_version);
> +       drm_printf(p, "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n",
> +               adev->psp.securedisplay_context.context.bin_desc.feature_version,
> +               adev->psp.securedisplay_context.context.bin_desc.fw_version);
> +
> +       /* SMC firmware */
> +       version = adev->pm.fw_version;
> +
> +       smu_program = (version >> 24) & 0xff;
> +       smu_major = (version >> 16) & 0xff;
> +       smu_minor = (version >> 8) & 0xff;
> +       smu_debug = (version >> 0) & 0xff;
> +       drm_printf(p, "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n",
> +                  0, smu_program, version, smu_major, smu_minor, smu_debug);
> +
> +       /* SDMA firmware */
> +       for (int i = 0; i < adev->sdma.num_instances; i++) {
> +               drm_printf(p, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
> +                          i, adev->sdma.instance[i].feature_version,
> +                          adev->sdma.instance[i].fw_version);
> +       }
> +
> +       drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n",
> +                  0, adev->vcn.fw_version);
> +       drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n",
> +                  0, adev->dm.dmcu_fw_version);
> +       drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n",
> +                  0, adev->dm.dmcub_fw_version);
> +       drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n",
> +                  adev->psp.toc.feature_version, adev->psp.toc.fw_version);
> +
> +       version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
> +       feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK)
> +                                       >> AMDGPU_MES_FEAT_VERSION_SHIFT;
> +       drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n",
> +                  feature, version);
> +
> +       version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
> +       feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK)
> +                                       >> AMDGPU_MES_FEAT_VERSION_SHIFT;
> +       drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n",
> +                  feature, version);
> +
> +       drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
> +                  adev->vpe.feature_version, adev->vpe.fw_version);
> +
> +}
> +
>  static ssize_t
>  amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>                         void *data, size_t datalen)
> @@ -215,6 +332,11 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>                 }
>         }
>
> +       if (coredump->adev) {
> +               drm_printf(&p, "IP Firmwares\n");
> +               amdgpu_devcoredump_fw_info(coredump->adev, &p);
> +       }
> +
>         if (coredump->ring) {
>                 drm_printf(&p, "\nRing timed out details\n");
>                 drm_printf(&p, "IP Type: %d Ring Name: %s\n",
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add the IP information of the soc
  2024-03-13 20:28 ` Alex Deucher
@ 2024-03-14  5:44   ` Khatri, Sunil
  2024-03-14 14:42     ` Alex Deucher
  0 siblings, 1 reply; 15+ messages in thread
From: Khatri, Sunil @ 2024-03-14  5:44 UTC (permalink / raw)
  To: Alex Deucher, Sunil Khatri
  Cc: Alex Deucher, Christian König, Shashank Sharma, amd-gfx,
	dri-devel, linux-kernel


On 3/14/2024 1:58 AM, Alex Deucher wrote:
> On Tue, Mar 12, 2024 at 8:41 AM Sunil Khatri <sunil.khatri@amd.com> wrote:
>> Add all the IP's information on a SOC to the
>> devcoredump.
>>
>> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 19 +++++++++++++++++++
>>   1 file changed, 19 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>> index a0dbccad2f53..611fdb90a1fc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>> @@ -196,6 +196,25 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>>                             coredump->reset_task_info.process_name,
>>                             coredump->reset_task_info.pid);
>>
>> +       /* GPU IP's information of the SOC */
>> +       if (coredump->adev) {
>> +               drm_printf(&p, "\nIP Information\n");
>> +               drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
>> +               drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
>> +
>> +               for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
>> +                       struct amdgpu_ip_block *ip =
>> +                               &coredump->adev->ip_blocks[i];
>> +                       drm_printf(&p, "IP type: %d IP name: %s\n",
>> +                                  ip->version->type,
>> +                                  ip->version->funcs->name);
>> +                       drm_printf(&p, "IP version: (%d,%d,%d)\n\n",
>> +                                  ip->version->major,
>> +                                  ip->version->minor,
>> +                                  ip->version->rev);
>> +               }
>> +       }
> I think the IP discovery table would be more useful.  Either walk the
> adev->ip_versions structure, or just include the IP discovery binary.

I did explore the adev->ip_versions and if i just go through the array 
it doesn't give any useful information directly.
There are no ways to find directly from adev->ip_versions below things 
until i also reparse the discovery binary again like done the discovery 
amdgpu_discovery_reg_base_init and walk through the headers of various 
ips using discovery binary.
a. Which IP is available on soc or not.
b. How many instances are there
Also i again have to change back to major, minor and rev convention for 
this information to be useful. I am exploring it more if i find some 
other information i will update.

adev->ip_block[] is derived from ip discovery only for each block which 
is there on the SOC, so we are not reading information which isnt 
applicable for the soc. We have name , type and version no of the IPs 
available on the soc. If you want i could add no of instances of each IP 
too if you think that's useful information here. Could you share what 
information is missing in this approach so i can include that.

For dumping the IP discovery binary, i dont understand how that 
information would be useful directly and needs to be decoded like we are 
doing in discovery init. Please correct me if my understanding is wrong 
here.
> Alex
>
>> +
>>          if (coredump->ring) {
>>                  drm_printf(&p, "\nRing timed out details\n");
>>                  drm_printf(&p, "IP Type: %d Ring Name: %s\n",
>> --
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] drm:amdgpu: add firmware information of all IP's
  2024-03-13 20:36   ` Alex Deucher
@ 2024-03-14  5:58     ` Khatri, Sunil
  2024-03-14  6:10       ` Sharma, Shashank
  0 siblings, 1 reply; 15+ messages in thread
From: Khatri, Sunil @ 2024-03-14  5:58 UTC (permalink / raw)
  To: Alex Deucher, Sunil Khatri
  Cc: Alex Deucher, Christian König, Shashank Sharma, amd-gfx,
	dri-devel, linux-kernel


On 3/14/2024 2:06 AM, Alex Deucher wrote:
> On Tue, Mar 12, 2024 at 8:42 AM Sunil Khatri <sunil.khatri@amd.com> wrote:
>> Add firmware version information of each
>> IP and each instance where applicable.
>>
> Is there a way we can share some common code with devcoredump,
> debugfs, and the info IOCTL?  All three places need to query this
> information and the same logic is repeated in each case.

Hello Alex,

Yes you re absolutely right the same information is being retrieved 
again as done in debugfs. I can reorganize the code so same function 
could be used by debugfs and devcoredump but this is exactly what i 
tried to avoid here. I did try to use minimum functionality in 
devcoredump without shuffling a lot of code here and there.

Also our devcoredump is implemented in amdgpu_reset.c and not all the 
information is available here and there we might have to include lot of 
header and cross functions in amdgpu_reset until we want a dedicated 
file for devcoredump.

Info IOCTL does have a lot of information which also is in pipeline to 
be dumped but this if we want to reuse the functionality of IOCTL we 
need to reorganize a lot of code.

If that is the need of the hour i could work on that. Please let me know.

This is my suggestion if it makes sense:

1. If we want to reuse a lot of functionality then we need to modularize 
some of the functions further so they could be consumed directly by 
devcoredump.
2. We should also have a dedicated file for devcoredump.c/.h so its easy 
to include headers of needed functionality cleanly and easy to expand 
devcoredump.
3. based on the priority and importance of this task we can add 
information else some repetition is a real possibility.

>
> Alex
>
>
>> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 122 ++++++++++++++++++++++
>>   1 file changed, 122 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>> index 611fdb90a1fc..78ddc58aef67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>> @@ -168,6 +168,123 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
>>   {
>>   }
>>   #else
>> +static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, struct drm_printer *p)
>> +{
>> +       uint32_t version;
>> +       uint32_t feature;
>> +       uint8_t smu_program, smu_major, smu_minor, smu_debug;
>> +
>> +       drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
>> +                  adev->vce.fb_version, adev->vce.fw_version);
>> +       drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n",
>> +                  0, adev->uvd.fw_version);
>> +       drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n",
>> +                  0, adev->gmc.fw_version);
>> +       drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.me_feature_version, adev->gfx.me_fw_version);
>> +       drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version);
>> +       drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.ce_feature_version, adev->gfx.ce_fw_version);
>> +       drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version);
>> +
>> +       drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.rlc_srlc_feature_version,
>> +                  adev->gfx.rlc_srlc_fw_version);
>> +       drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.rlc_srlg_feature_version,
>> +                  adev->gfx.rlc_srlg_fw_version);
>> +       drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.rlc_srls_feature_version,
>> +                  adev->gfx.rlc_srls_fw_version);
>> +       drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.rlcp_ucode_feature_version,
>> +                  adev->gfx.rlcp_ucode_version);
>> +       drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.rlcv_ucode_feature_version,
>> +                  adev->gfx.rlcv_ucode_version);
>> +       drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
>> +                  adev->gfx.mec_feature_version,
>> +                  adev->gfx.mec_fw_version);
>> +
>> +       if (adev->gfx.mec2_fw)
>> +               drm_printf(p,
>> +                          "MEC2 feature version: %u, fw version: 0x%08x\n",
>> +                          adev->gfx.mec2_feature_version,
>> +                          adev->gfx.mec2_fw_version);
>> +
>> +       drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n",
>> +                  0, adev->gfx.imu_fw_version);
>> +       drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n",
>> +                  adev->psp.sos.feature_version,
>> +                  adev->psp.sos.fw_version);
>> +       drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n",
>> +                  adev->psp.asd_context.bin_desc.feature_version,
>> +                  adev->psp.asd_context.bin_desc.fw_version);
>> +
>> +       drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n",
>> +                  adev->psp.xgmi_context.context.bin_desc.feature_version,
>> +                  adev->psp.xgmi_context.context.bin_desc.fw_version);
>> +       drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n",
>> +                  adev->psp.ras_context.context.bin_desc.feature_version,
>> +                  adev->psp.ras_context.context.bin_desc.fw_version);
>> +       drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n",
>> +                  adev->psp.hdcp_context.context.bin_desc.feature_version,
>> +                  adev->psp.hdcp_context.context.bin_desc.fw_version);
>> +       drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n",
>> +                  adev->psp.dtm_context.context.bin_desc.feature_version,
>> +                  adev->psp.dtm_context.context.bin_desc.fw_version);
>> +       drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n",
>> +                  adev->psp.rap_context.context.bin_desc.feature_version,
>> +                  adev->psp.rap_context.context.bin_desc.fw_version);
>> +       drm_printf(p, "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n",
>> +               adev->psp.securedisplay_context.context.bin_desc.feature_version,
>> +               adev->psp.securedisplay_context.context.bin_desc.fw_version);
>> +
>> +       /* SMC firmware */
>> +       version = adev->pm.fw_version;
>> +
>> +       smu_program = (version >> 24) & 0xff;
>> +       smu_major = (version >> 16) & 0xff;
>> +       smu_minor = (version >> 8) & 0xff;
>> +       smu_debug = (version >> 0) & 0xff;
>> +       drm_printf(p, "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n",
>> +                  0, smu_program, version, smu_major, smu_minor, smu_debug);
>> +
>> +       /* SDMA firmware */
>> +       for (int i = 0; i < adev->sdma.num_instances; i++) {
>> +               drm_printf(p, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
>> +                          i, adev->sdma.instance[i].feature_version,
>> +                          adev->sdma.instance[i].fw_version);
>> +       }
>> +
>> +       drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n",
>> +                  0, adev->vcn.fw_version);
>> +       drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n",
>> +                  0, adev->dm.dmcu_fw_version);
>> +       drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n",
>> +                  0, adev->dm.dmcub_fw_version);
>> +       drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n",
>> +                  adev->psp.toc.feature_version, adev->psp.toc.fw_version);
>> +
>> +       version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
>> +       feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK)
>> +                                       >> AMDGPU_MES_FEAT_VERSION_SHIFT;
>> +       drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n",
>> +                  feature, version);
>> +
>> +       version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
>> +       feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK)
>> +                                       >> AMDGPU_MES_FEAT_VERSION_SHIFT;
>> +       drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n",
>> +                  feature, version);
>> +
>> +       drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
>> +                  adev->vpe.feature_version, adev->vpe.fw_version);
>> +
>> +}
>> +
>>   static ssize_t
>>   amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>>                          void *data, size_t datalen)
>> @@ -215,6 +332,11 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>>                  }
>>          }
>>
>> +       if (coredump->adev) {
>> +               drm_printf(&p, "IP Firmwares\n");
>> +               amdgpu_devcoredump_fw_info(coredump->adev, &p);
>> +       }
>> +
>>          if (coredump->ring) {
>>                  drm_printf(&p, "\nRing timed out details\n");
>>                  drm_printf(&p, "IP Type: %d Ring Name: %s\n",
>> --
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] drm:amdgpu: add firmware information of all IP's
  2024-03-14  5:58     ` Khatri, Sunil
@ 2024-03-14  6:10       ` Sharma, Shashank
  2024-03-14  9:52         ` Khatri, Sunil
  2024-03-14 13:27         ` Alex Deucher
  0 siblings, 2 replies; 15+ messages in thread
From: Sharma, Shashank @ 2024-03-14  6:10 UTC (permalink / raw)
  To: Khatri, Sunil, Alex Deucher, Sunil Khatri
  Cc: Alex Deucher, Christian König, amd-gfx, dri-devel, linux-kernel


On 14/03/2024 06:58, Khatri, Sunil wrote:
>
> On 3/14/2024 2:06 AM, Alex Deucher wrote:
>> On Tue, Mar 12, 2024 at 8:42 AM Sunil Khatri <sunil.khatri@amd.com> 
>> wrote:
>>> Add firmware version information of each
>>> IP and each instance where applicable.
>>>
>> Is there a way we can share some common code with devcoredump,
>> debugfs, and the info IOCTL?  All three places need to query this
>> information and the same logic is repeated in each case.
>
> Hello Alex,
>
> Yes you re absolutely right the same information is being retrieved 
> again as done in debugfs. I can reorganize the code so same function 
> could be used by debugfs and devcoredump but this is exactly what i 
> tried to avoid here. I did try to use minimum functionality in 
> devcoredump without shuffling a lot of code here and there.
>
> Also our devcoredump is implemented in amdgpu_reset.c and not all the 
> information is available here and there we might have to include lot 
> of header and cross functions in amdgpu_reset until we want a 
> dedicated file for devcoredump.


I think Alex is suggesting to have one common backend to generate all 
the core debug info, and then different wrapper functions which can pack 
this raw info into the packets aligned with respective infra like 
devcore/debugfs/info IOCTL, which seems like a good idea to me.

If you think you need a new file for this backend it should be fine.


something like:

amdgpu_debug_core.c::

struct amdgpu_core_debug_info {

/* Superset of all the info you are collecting from HW */

};

- amdgpu_debug_generate_core_info

{

/* This function collects the core debug info from HW and saves in 
amdgpu_core_debug_info,

   we can update this periodically regardless of a request */

}

and then:

devcore_info *amdgpu_debug_pack_for_devcore(core_debug_info)

{

/* convert core debug info into devcore aligned format/data */

}

ioctl_info *amdgpu_debug_pack_for_info_ioctl(core_debug_info)

{

/* convert core debug info into info IOCTL aligned format/data */

}

debugfs_info *amdgpu_debug_pack_for_debugfs(core_debug_info)

{

/* convert core debug info into debugfs aligned format/data */

}

- Shashank

>
>
>
> Info IOCTL does have a lot of information which also is in pipeline to 
> be dumped but this if we want to reuse the functionality of IOCTL we 
> need to reorganize a lot of code.
>
> If that is the need of the hour i could work on that. Please let me know.
>
> This is my suggestion if it makes sense:
>
> 1. If we want to reuse a lot of functionality then we need to 
> modularize some of the functions further so they could be consumed 
> directly by devcoredump.
> 2. We should also have a dedicated file for devcoredump.c/.h so its 
> easy to include headers of needed functionality cleanly and easy to 
> expand devcoredump.
> 3. based on the priority and importance of this task we can add 
> information else some repetition is a real possibility.
>
>>
>> Alex
>>
>>
>>> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 122 
>>> ++++++++++++++++++++++
>>>   1 file changed, 122 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>> index 611fdb90a1fc..78ddc58aef67 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>> @@ -168,6 +168,123 @@ void amdgpu_coredump(struct amdgpu_device 
>>> *adev, bool vram_lost,
>>>   {
>>>   }
>>>   #else
>>> +static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, 
>>> struct drm_printer *p)
>>> +{
>>> +       uint32_t version;
>>> +       uint32_t feature;
>>> +       uint8_t smu_program, smu_major, smu_minor, smu_debug;
>>> +
>>> +       drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
>>> +                  adev->vce.fb_version, adev->vce.fw_version);
>>> +       drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n",
>>> +                  0, adev->uvd.fw_version);
>>> +       drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n",
>>> +                  0, adev->gmc.fw_version);
>>> +       drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
>>> +                  adev->gfx.me_feature_version, 
>>> adev->gfx.me_fw_version);
>>> +       drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
>>> +                  adev->gfx.pfp_feature_version, 
>>> adev->gfx.pfp_fw_version);
>>> +       drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
>>> +                  adev->gfx.ce_feature_version, 
>>> adev->gfx.ce_fw_version);
>>> +       drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
>>> +                  adev->gfx.rlc_feature_version, 
>>> adev->gfx.rlc_fw_version);
>>> +
>>> +       drm_printf(p, "RLC SRLC feature version: %u, fw version: 
>>> 0x%08x\n",
>>> +                  adev->gfx.rlc_srlc_feature_version,
>>> +                  adev->gfx.rlc_srlc_fw_version);
>>> +       drm_printf(p, "RLC SRLG feature version: %u, fw version: 
>>> 0x%08x\n",
>>> +                  adev->gfx.rlc_srlg_feature_version,
>>> +                  adev->gfx.rlc_srlg_fw_version);
>>> +       drm_printf(p, "RLC SRLS feature version: %u, fw version: 
>>> 0x%08x\n",
>>> +                  adev->gfx.rlc_srls_feature_version,
>>> +                  adev->gfx.rlc_srls_fw_version);
>>> +       drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
>>> +                  adev->gfx.rlcp_ucode_feature_version,
>>> +                  adev->gfx.rlcp_ucode_version);
>>> +       drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
>>> +                  adev->gfx.rlcv_ucode_feature_version,
>>> +                  adev->gfx.rlcv_ucode_version);
>>> +       drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
>>> +                  adev->gfx.mec_feature_version,
>>> +                  adev->gfx.mec_fw_version);
>>> +
>>> +       if (adev->gfx.mec2_fw)
>>> +               drm_printf(p,
>>> +                          "MEC2 feature version: %u, fw version: 
>>> 0x%08x\n",
>>> +                          adev->gfx.mec2_feature_version,
>>> +                          adev->gfx.mec2_fw_version);
>>> +
>>> +       drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n",
>>> +                  0, adev->gfx.imu_fw_version);
>>> +       drm_printf(p, "PSP SOS feature version: %u, fw version: 
>>> 0x%08x\n",
>>> +                  adev->psp.sos.feature_version,
>>> +                  adev->psp.sos.fw_version);
>>> +       drm_printf(p, "PSP ASD feature version: %u, fw version: 
>>> 0x%08x\n",
>>> + adev->psp.asd_context.bin_desc.feature_version,
>>> + adev->psp.asd_context.bin_desc.fw_version);
>>> +
>>> +       drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 
>>> 0x%08x\n",
>>> + adev->psp.xgmi_context.context.bin_desc.feature_version,
>>> + adev->psp.xgmi_context.context.bin_desc.fw_version);
>>> +       drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 
>>> 0x%08x\n",
>>> + adev->psp.ras_context.context.bin_desc.feature_version,
>>> + adev->psp.ras_context.context.bin_desc.fw_version);
>>> +       drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 
>>> 0x%08x\n",
>>> + adev->psp.hdcp_context.context.bin_desc.feature_version,
>>> + adev->psp.hdcp_context.context.bin_desc.fw_version);
>>> +       drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 
>>> 0x%08x\n",
>>> + adev->psp.dtm_context.context.bin_desc.feature_version,
>>> + adev->psp.dtm_context.context.bin_desc.fw_version);
>>> +       drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 
>>> 0x%08x\n",
>>> + adev->psp.rap_context.context.bin_desc.feature_version,
>>> + adev->psp.rap_context.context.bin_desc.fw_version);
>>> +       drm_printf(p, "TA SECURE DISPLAY feature version: 0x%08x, fw 
>>> version: 0x%08x\n",
>>> + adev->psp.securedisplay_context.context.bin_desc.feature_version,
>>> + adev->psp.securedisplay_context.context.bin_desc.fw_version);
>>> +
>>> +       /* SMC firmware */
>>> +       version = adev->pm.fw_version;
>>> +
>>> +       smu_program = (version >> 24) & 0xff;
>>> +       smu_major = (version >> 16) & 0xff;
>>> +       smu_minor = (version >> 8) & 0xff;
>>> +       smu_debug = (version >> 0) & 0xff;
>>> +       drm_printf(p, "SMC feature version: %u, program: %d, fw 
>>> version: 0x%08x (%d.%d.%d)\n",
>>> +                  0, smu_program, version, smu_major, smu_minor, 
>>> smu_debug);
>>> +
>>> +       /* SDMA firmware */
>>> +       for (int i = 0; i < adev->sdma.num_instances; i++) {
>>> +               drm_printf(p, "SDMA%d feature version: %u, firmware 
>>> version: 0x%08x\n",
>>> +                          i, adev->sdma.instance[i].feature_version,
>>> + adev->sdma.instance[i].fw_version);
>>> +       }
>>> +
>>> +       drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n",
>>> +                  0, adev->vcn.fw_version);
>>> +       drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n",
>>> +                  0, adev->dm.dmcu_fw_version);
>>> +       drm_printf(p, "DMCUB feature version: %u, fw version: 
>>> 0x%08x\n",
>>> +                  0, adev->dm.dmcub_fw_version);
>>> +       drm_printf(p, "PSP TOC feature version: %u, fw version: 
>>> 0x%08x\n",
>>> +                  adev->psp.toc.feature_version, 
>>> adev->psp.toc.fw_version);
>>> +
>>> +       version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
>>> +       feature = (adev->mes.kiq_version & 
>>> AMDGPU_MES_FEAT_VERSION_MASK)
>>> +                                       >> 
>>> AMDGPU_MES_FEAT_VERSION_SHIFT;
>>> +       drm_printf(p, "MES_KIQ feature version: %u, fw version: 
>>> 0x%08x\n",
>>> +                  feature, version);
>>> +
>>> +       version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
>>> +       feature = (adev->mes.sched_version & 
>>> AMDGPU_MES_FEAT_VERSION_MASK)
>>> +                                       >> 
>>> AMDGPU_MES_FEAT_VERSION_SHIFT;
>>> +       drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n",
>>> +                  feature, version);
>>> +
>>> +       drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
>>> +                  adev->vpe.feature_version, adev->vpe.fw_version);
>>> +
>>> +}
>>> +
>>>   static ssize_t
>>>   amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>>>                          void *data, size_t datalen)
>>> @@ -215,6 +332,11 @@ amdgpu_devcoredump_read(char *buffer, loff_t 
>>> offset, size_t count,
>>>                  }
>>>          }
>>>
>>> +       if (coredump->adev) {
>>> +               drm_printf(&p, "IP Firmwares\n");
>>> +               amdgpu_devcoredump_fw_info(coredump->adev, &p);
>>> +       }
>>> +
>>>          if (coredump->ring) {
>>>                  drm_printf(&p, "\nRing timed out details\n");
>>>                  drm_printf(&p, "IP Type: %d Ring Name: %s\n",
>>> -- 
>>> 2.34.1
>>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] drm:amdgpu: add firmware information of all IP's
  2024-03-14  6:10       ` Sharma, Shashank
@ 2024-03-14  9:52         ` Khatri, Sunil
  2024-03-14 13:27         ` Alex Deucher
  1 sibling, 0 replies; 15+ messages in thread
From: Khatri, Sunil @ 2024-03-14  9:52 UTC (permalink / raw)
  To: Sharma, Shashank, Alex Deucher, Sunil Khatri
  Cc: Alex Deucher, Christian König, amd-gfx, dri-devel, linux-kernel


On 3/14/2024 11:40 AM, Sharma, Shashank wrote:
>
> On 14/03/2024 06:58, Khatri, Sunil wrote:
>>
>> On 3/14/2024 2:06 AM, Alex Deucher wrote:
>>> On Tue, Mar 12, 2024 at 8:42 AM Sunil Khatri <sunil.khatri@amd.com> 
>>> wrote:
>>>> Add firmware version information of each
>>>> IP and each instance where applicable.
>>>>
>>> Is there a way we can share some common code with devcoredump,
>>> debugfs, and the info IOCTL?  All three places need to query this
>>> information and the same logic is repeated in each case.
>>
>> Hello Alex,
>>
>> Yes you re absolutely right the same information is being retrieved 
>> again as done in debugfs. I can reorganize the code so same function 
>> could be used by debugfs and devcoredump but this is exactly what i 
>> tried to avoid here. I did try to use minimum functionality in 
>> devcoredump without shuffling a lot of code here and there.
>>
>> Also our devcoredump is implemented in amdgpu_reset.c and not all the 
>> information is available here and there we might have to include lot 
>> of header and cross functions in amdgpu_reset until we want a 
>> dedicated file for devcoredump.
>
>
> I think Alex is suggesting to have one common backend to generate all 
> the core debug info, and then different wrapper functions which can 
> pack this raw info into the packets aligned with respective infra like 
> devcore/debugfs/info IOCTL, which seems like a good idea to me.
>
> If you think you need a new file for this backend it should be fine.
My suggestion was on same lines that if we want to use the same infra to 
access information from different parts of the code then we need to 
reorganize. And at same time since there is quite some data we are 
planning to add in devcoredump so i recommend to have a dedicated .c/.h 
instead of using amdgpu_reset.c so a clean include is easy to maintain.

Once Alex confirms it i can start working on design and what all 
information we need on this.

Regards
Sunil

>
> something like:
>
> amdgpu_debug_core.c::
>
> struct amdgpu_core_debug_info {
>
> /* Superset of all the info you are collecting from HW */
>
> };
>
> - amdgpu_debug_generate_core_info
>
> {
>
> /* This function collects the core debug info from HW and saves in 
> amdgpu_core_debug_info,
>
>   we can update this periodically regardless of a request */
>
> }
>
> and then:
>
> devcore_info *amdgpu_debug_pack_for_devcore(core_debug_info)
>
> {
>
> /* convert core debug info into devcore aligned format/data */
>
> }
>
> ioctl_info *amdgpu_debug_pack_for_info_ioctl(core_debug_info)
>
> {
>
> /* convert core debug info into info IOCTL aligned format/data */
>
> }
>
> debugfs_info *amdgpu_debug_pack_for_debugfs(core_debug_info)
>
> {
>
> /* convert core debug info into debugfs aligned format/data */
>
> }
>
> - Shashank
>
>>
>>
>>
>> Info IOCTL does have a lot of information which also is in pipeline 
>> to be dumped but this if we want to reuse the functionality of IOCTL 
>> we need to reorganize a lot of code.
>>
>> If that is the need of the hour i could work on that. Please let me 
>> know.
>>
>> This is my suggestion if it makes sense:
>>
>> 1. If we want to reuse a lot of functionality then we need to 
>> modularize some of the functions further so they could be consumed 
>> directly by devcoredump.
>> 2. We should also have a dedicated file for devcoredump.c/.h so its 
>> easy to include headers of needed functionality cleanly and easy to 
>> expand devcoredump.
>> 3. based on the priority and importance of this task we can add 
>> information else some repetition is a real possibility.
>>
>>>
>>> Alex
>>>
>>>
>>>> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 122 
>>>> ++++++++++++++++++++++
>>>>   1 file changed, 122 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> index 611fdb90a1fc..78ddc58aef67 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> @@ -168,6 +168,123 @@ void amdgpu_coredump(struct amdgpu_device 
>>>> *adev, bool vram_lost,
>>>>   {
>>>>   }
>>>>   #else
>>>> +static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, 
>>>> struct drm_printer *p)
>>>> +{
>>>> +       uint32_t version;
>>>> +       uint32_t feature;
>>>> +       uint8_t smu_program, smu_major, smu_minor, smu_debug;
>>>> +
>>>> +       drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
>>>> +                  adev->vce.fb_version, adev->vce.fw_version);
>>>> +       drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n",
>>>> +                  0, adev->uvd.fw_version);
>>>> +       drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n",
>>>> +                  0, adev->gmc.fw_version);
>>>> +       drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
>>>> +                  adev->gfx.me_feature_version, 
>>>> adev->gfx.me_fw_version);
>>>> +       drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
>>>> +                  adev->gfx.pfp_feature_version, 
>>>> adev->gfx.pfp_fw_version);
>>>> +       drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
>>>> +                  adev->gfx.ce_feature_version, 
>>>> adev->gfx.ce_fw_version);
>>>> +       drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
>>>> +                  adev->gfx.rlc_feature_version, 
>>>> adev->gfx.rlc_fw_version);
>>>> +
>>>> +       drm_printf(p, "RLC SRLC feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  adev->gfx.rlc_srlc_feature_version,
>>>> +                  adev->gfx.rlc_srlc_fw_version);
>>>> +       drm_printf(p, "RLC SRLG feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  adev->gfx.rlc_srlg_feature_version,
>>>> +                  adev->gfx.rlc_srlg_fw_version);
>>>> +       drm_printf(p, "RLC SRLS feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  adev->gfx.rlc_srls_feature_version,
>>>> +                  adev->gfx.rlc_srls_fw_version);
>>>> +       drm_printf(p, "RLCP feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  adev->gfx.rlcp_ucode_feature_version,
>>>> +                  adev->gfx.rlcp_ucode_version);
>>>> +       drm_printf(p, "RLCV feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  adev->gfx.rlcv_ucode_feature_version,
>>>> +                  adev->gfx.rlcv_ucode_version);
>>>> +       drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
>>>> +                  adev->gfx.mec_feature_version,
>>>> +                  adev->gfx.mec_fw_version);
>>>> +
>>>> +       if (adev->gfx.mec2_fw)
>>>> +               drm_printf(p,
>>>> +                          "MEC2 feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> + adev->gfx.mec2_feature_version,
>>>> +                          adev->gfx.mec2_fw_version);
>>>> +
>>>> +       drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n",
>>>> +                  0, adev->gfx.imu_fw_version);
>>>> +       drm_printf(p, "PSP SOS feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  adev->psp.sos.feature_version,
>>>> +                  adev->psp.sos.fw_version);
>>>> +       drm_printf(p, "PSP ASD feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> + adev->psp.asd_context.bin_desc.feature_version,
>>>> + adev->psp.asd_context.bin_desc.fw_version);
>>>> +
>>>> +       drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 
>>>> 0x%08x\n",
>>>> + adev->psp.xgmi_context.context.bin_desc.feature_version,
>>>> + adev->psp.xgmi_context.context.bin_desc.fw_version);
>>>> +       drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 
>>>> 0x%08x\n",
>>>> + adev->psp.ras_context.context.bin_desc.feature_version,
>>>> + adev->psp.ras_context.context.bin_desc.fw_version);
>>>> +       drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 
>>>> 0x%08x\n",
>>>> + adev->psp.hdcp_context.context.bin_desc.feature_version,
>>>> + adev->psp.hdcp_context.context.bin_desc.fw_version);
>>>> +       drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 
>>>> 0x%08x\n",
>>>> + adev->psp.dtm_context.context.bin_desc.feature_version,
>>>> + adev->psp.dtm_context.context.bin_desc.fw_version);
>>>> +       drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 
>>>> 0x%08x\n",
>>>> + adev->psp.rap_context.context.bin_desc.feature_version,
>>>> + adev->psp.rap_context.context.bin_desc.fw_version);
>>>> +       drm_printf(p, "TA SECURE DISPLAY feature version: 0x%08x, 
>>>> fw version: 0x%08x\n",
>>>> + adev->psp.securedisplay_context.context.bin_desc.feature_version,
>>>> + adev->psp.securedisplay_context.context.bin_desc.fw_version);
>>>> +
>>>> +       /* SMC firmware */
>>>> +       version = adev->pm.fw_version;
>>>> +
>>>> +       smu_program = (version >> 24) & 0xff;
>>>> +       smu_major = (version >> 16) & 0xff;
>>>> +       smu_minor = (version >> 8) & 0xff;
>>>> +       smu_debug = (version >> 0) & 0xff;
>>>> +       drm_printf(p, "SMC feature version: %u, program: %d, fw 
>>>> version: 0x%08x (%d.%d.%d)\n",
>>>> +                  0, smu_program, version, smu_major, smu_minor, 
>>>> smu_debug);
>>>> +
>>>> +       /* SDMA firmware */
>>>> +       for (int i = 0; i < adev->sdma.num_instances; i++) {
>>>> +               drm_printf(p, "SDMA%d feature version: %u, firmware 
>>>> version: 0x%08x\n",
>>>> +                          i, adev->sdma.instance[i].feature_version,
>>>> + adev->sdma.instance[i].fw_version);
>>>> +       }
>>>> +
>>>> +       drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n",
>>>> +                  0, adev->vcn.fw_version);
>>>> +       drm_printf(p, "DMCU feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  0, adev->dm.dmcu_fw_version);
>>>> +       drm_printf(p, "DMCUB feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  0, adev->dm.dmcub_fw_version);
>>>> +       drm_printf(p, "PSP TOC feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  adev->psp.toc.feature_version, 
>>>> adev->psp.toc.fw_version);
>>>> +
>>>> +       version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
>>>> +       feature = (adev->mes.kiq_version & 
>>>> AMDGPU_MES_FEAT_VERSION_MASK)
>>>> +                                       >> 
>>>> AMDGPU_MES_FEAT_VERSION_SHIFT;
>>>> +       drm_printf(p, "MES_KIQ feature version: %u, fw version: 
>>>> 0x%08x\n",
>>>> +                  feature, version);
>>>> +
>>>> +       version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
>>>> +       feature = (adev->mes.sched_version & 
>>>> AMDGPU_MES_FEAT_VERSION_MASK)
>>>> +                                       >> 
>>>> AMDGPU_MES_FEAT_VERSION_SHIFT;
>>>> +       drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n",
>>>> +                  feature, version);
>>>> +
>>>> +       drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
>>>> +                  adev->vpe.feature_version, adev->vpe.fw_version);
>>>> +
>>>> +}
>>>> +
>>>>   static ssize_t
>>>>   amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>>>>                          void *data, size_t datalen)
>>>> @@ -215,6 +332,11 @@ amdgpu_devcoredump_read(char *buffer, loff_t 
>>>> offset, size_t count,
>>>>                  }
>>>>          }
>>>>
>>>> +       if (coredump->adev) {
>>>> +               drm_printf(&p, "IP Firmwares\n");
>>>> + amdgpu_devcoredump_fw_info(coredump->adev, &p);
>>>> +       }
>>>> +
>>>>          if (coredump->ring) {
>>>>                  drm_printf(&p, "\nRing timed out details\n");
>>>>                  drm_printf(&p, "IP Type: %d Ring Name: %s\n",
>>>> -- 
>>>> 2.34.1
>>>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] drm:amdgpu: add firmware information of all IP's
  2024-03-14  6:10       ` Sharma, Shashank
  2024-03-14  9:52         ` Khatri, Sunil
@ 2024-03-14 13:27         ` Alex Deucher
  1 sibling, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2024-03-14 13:27 UTC (permalink / raw)
  To: Sharma, Shashank
  Cc: Khatri, Sunil, Sunil Khatri, Alex Deucher, Christian König,
	amd-gfx, dri-devel, linux-kernel

On Thu, Mar 14, 2024 at 2:10 AM Sharma, Shashank
<shashank.sharma@amd.com> wrote:
>
>
> On 14/03/2024 06:58, Khatri, Sunil wrote:
> >
> > On 3/14/2024 2:06 AM, Alex Deucher wrote:
> >> On Tue, Mar 12, 2024 at 8:42 AM Sunil Khatri <sunil.khatri@amd.com>
> >> wrote:
> >>> Add firmware version information of each
> >>> IP and each instance where applicable.
> >>>
> >> Is there a way we can share some common code with devcoredump,
> >> debugfs, and the info IOCTL?  All three places need to query this
> >> information and the same logic is repeated in each case.
> >
> > Hello Alex,
> >
> > Yes you re absolutely right the same information is being retrieved
> > again as done in debugfs. I can reorganize the code so same function
> > could be used by debugfs and devcoredump but this is exactly what i
> > tried to avoid here. I did try to use minimum functionality in
> > devcoredump without shuffling a lot of code here and there.
> >
> > Also our devcoredump is implemented in amdgpu_reset.c and not all the
> > information is available here and there we might have to include lot
> > of header and cross functions in amdgpu_reset until we want a
> > dedicated file for devcoredump.
>
>
> I think Alex is suggesting to have one common backend to generate all
> the core debug info, and then different wrapper functions which can pack
> this raw info into the packets aligned with respective infra like
> devcore/debugfs/info IOCTL, which seems like a good idea to me.

Right, something like this.  I'm trying to avoid having to touch
several places every time we add a new firmware type or TA, etc.
Maybe something like an array of valid firmwares for each device and
then we can just walk the array and call a helper function to fetch
the firmware versions and name strings for the requested type.  Then
each use case can just call the helpers to get what it needs.

Alex

>
> If you think you need a new file for this backend it should be fine.
>
>
> something like:
>
> amdgpu_debug_core.c::
>
> struct amdgpu_core_debug_info {
>
> /* Superset of all the info you are collecting from HW */
>
> };
>
> - amdgpu_debug_generate_core_info
>
> {
>
> /* This function collects the core debug info from HW and saves in
> amdgpu_core_debug_info,
>
>    we can update this periodically regardless of a request */
>
> }
>
> and then:
>
> devcore_info *amdgpu_debug_pack_for_devcore(core_debug_info)
>
> {
>
> /* convert core debug info into devcore aligned format/data */
>
> }
>
> ioctl_info *amdgpu_debug_pack_for_info_ioctl(core_debug_info)
>
> {
>
> /* convert core debug info into info IOCTL aligned format/data */
>
> }
>
> debugfs_info *amdgpu_debug_pack_for_debugfs(core_debug_info)
>
> {
>
> /* convert core debug info into debugfs aligned format/data */
>
> }
>
> - Shashank
>
> >
> >
> >
> > Info IOCTL does have a lot of information which also is in pipeline to
> > be dumped but this if we want to reuse the functionality of IOCTL we
> > need to reorganize a lot of code.
> >
> > If that is the need of the hour i could work on that. Please let me know.
> >
> > This is my suggestion if it makes sense:
> >
> > 1. If we want to reuse a lot of functionality then we need to
> > modularize some of the functions further so they could be consumed
> > directly by devcoredump.
> > 2. We should also have a dedicated file for devcoredump.c/.h so its
> > easy to include headers of needed functionality cleanly and easy to
> > expand devcoredump.
> > 3. based on the priority and importance of this task we can add
> > information else some repetition is a real possibility.
> >
> >>
> >> Alex
> >>
> >>
> >>> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
> >>> ---
> >>>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 122
> >>> ++++++++++++++++++++++
> >>>   1 file changed, 122 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >>> index 611fdb90a1fc..78ddc58aef67 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >>> @@ -168,6 +168,123 @@ void amdgpu_coredump(struct amdgpu_device
> >>> *adev, bool vram_lost,
> >>>   {
> >>>   }
> >>>   #else
> >>> +static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
> >>> struct drm_printer *p)
> >>> +{
> >>> +       uint32_t version;
> >>> +       uint32_t feature;
> >>> +       uint8_t smu_program, smu_major, smu_minor, smu_debug;
> >>> +
> >>> +       drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
> >>> +                  adev->vce.fb_version, adev->vce.fw_version);
> >>> +       drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n",
> >>> +                  0, adev->uvd.fw_version);
> >>> +       drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n",
> >>> +                  0, adev->gmc.fw_version);
> >>> +       drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
> >>> +                  adev->gfx.me_feature_version,
> >>> adev->gfx.me_fw_version);
> >>> +       drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
> >>> +                  adev->gfx.pfp_feature_version,
> >>> adev->gfx.pfp_fw_version);
> >>> +       drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
> >>> +                  adev->gfx.ce_feature_version,
> >>> adev->gfx.ce_fw_version);
> >>> +       drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
> >>> +                  adev->gfx.rlc_feature_version,
> >>> adev->gfx.rlc_fw_version);
> >>> +
> >>> +       drm_printf(p, "RLC SRLC feature version: %u, fw version:
> >>> 0x%08x\n",
> >>> +                  adev->gfx.rlc_srlc_feature_version,
> >>> +                  adev->gfx.rlc_srlc_fw_version);
> >>> +       drm_printf(p, "RLC SRLG feature version: %u, fw version:
> >>> 0x%08x\n",
> >>> +                  adev->gfx.rlc_srlg_feature_version,
> >>> +                  adev->gfx.rlc_srlg_fw_version);
> >>> +       drm_printf(p, "RLC SRLS feature version: %u, fw version:
> >>> 0x%08x\n",
> >>> +                  adev->gfx.rlc_srls_feature_version,
> >>> +                  adev->gfx.rlc_srls_fw_version);
> >>> +       drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
> >>> +                  adev->gfx.rlcp_ucode_feature_version,
> >>> +                  adev->gfx.rlcp_ucode_version);
> >>> +       drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
> >>> +                  adev->gfx.rlcv_ucode_feature_version,
> >>> +                  adev->gfx.rlcv_ucode_version);
> >>> +       drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
> >>> +                  adev->gfx.mec_feature_version,
> >>> +                  adev->gfx.mec_fw_version);
> >>> +
> >>> +       if (adev->gfx.mec2_fw)
> >>> +               drm_printf(p,
> >>> +                          "MEC2 feature version: %u, fw version:
> >>> 0x%08x\n",
> >>> +                          adev->gfx.mec2_feature_version,
> >>> +                          adev->gfx.mec2_fw_version);
> >>> +
> >>> +       drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n",
> >>> +                  0, adev->gfx.imu_fw_version);
> >>> +       drm_printf(p, "PSP SOS feature version: %u, fw version:
> >>> 0x%08x\n",
> >>> +                  adev->psp.sos.feature_version,
> >>> +                  adev->psp.sos.fw_version);
> >>> +       drm_printf(p, "PSP ASD feature version: %u, fw version:
> >>> 0x%08x\n",
> >>> + adev->psp.asd_context.bin_desc.feature_version,
> >>> + adev->psp.asd_context.bin_desc.fw_version);
> >>> +
> >>> +       drm_printf(p, "TA XGMI feature version: 0x%08x, fw version:
> >>> 0x%08x\n",
> >>> + adev->psp.xgmi_context.context.bin_desc.feature_version,
> >>> + adev->psp.xgmi_context.context.bin_desc.fw_version);
> >>> +       drm_printf(p, "TA RAS feature version: 0x%08x, fw version:
> >>> 0x%08x\n",
> >>> + adev->psp.ras_context.context.bin_desc.feature_version,
> >>> + adev->psp.ras_context.context.bin_desc.fw_version);
> >>> +       drm_printf(p, "TA HDCP feature version: 0x%08x, fw version:
> >>> 0x%08x\n",
> >>> + adev->psp.hdcp_context.context.bin_desc.feature_version,
> >>> + adev->psp.hdcp_context.context.bin_desc.fw_version);
> >>> +       drm_printf(p, "TA DTM feature version: 0x%08x, fw version:
> >>> 0x%08x\n",
> >>> + adev->psp.dtm_context.context.bin_desc.feature_version,
> >>> + adev->psp.dtm_context.context.bin_desc.fw_version);
> >>> +       drm_printf(p, "TA RAP feature version: 0x%08x, fw version:
> >>> 0x%08x\n",
> >>> + adev->psp.rap_context.context.bin_desc.feature_version,
> >>> + adev->psp.rap_context.context.bin_desc.fw_version);
> >>> +       drm_printf(p, "TA SECURE DISPLAY feature version: 0x%08x, fw
> >>> version: 0x%08x\n",
> >>> + adev->psp.securedisplay_context.context.bin_desc.feature_version,
> >>> + adev->psp.securedisplay_context.context.bin_desc.fw_version);
> >>> +
> >>> +       /* SMC firmware */
> >>> +       version = adev->pm.fw_version;
> >>> +
> >>> +       smu_program = (version >> 24) & 0xff;
> >>> +       smu_major = (version >> 16) & 0xff;
> >>> +       smu_minor = (version >> 8) & 0xff;
> >>> +       smu_debug = (version >> 0) & 0xff;
> >>> +       drm_printf(p, "SMC feature version: %u, program: %d, fw
> >>> version: 0x%08x (%d.%d.%d)\n",
> >>> +                  0, smu_program, version, smu_major, smu_minor,
> >>> smu_debug);
> >>> +
> >>> +       /* SDMA firmware */
> >>> +       for (int i = 0; i < adev->sdma.num_instances; i++) {
> >>> +               drm_printf(p, "SDMA%d feature version: %u, firmware
> >>> version: 0x%08x\n",
> >>> +                          i, adev->sdma.instance[i].feature_version,
> >>> + adev->sdma.instance[i].fw_version);
> >>> +       }
> >>> +
> >>> +       drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n",
> >>> +                  0, adev->vcn.fw_version);
> >>> +       drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n",
> >>> +                  0, adev->dm.dmcu_fw_version);
> >>> +       drm_printf(p, "DMCUB feature version: %u, fw version:
> >>> 0x%08x\n",
> >>> +                  0, adev->dm.dmcub_fw_version);
> >>> +       drm_printf(p, "PSP TOC feature version: %u, fw version:
> >>> 0x%08x\n",
> >>> +                  adev->psp.toc.feature_version,
> >>> adev->psp.toc.fw_version);
> >>> +
> >>> +       version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
> >>> +       feature = (adev->mes.kiq_version &
> >>> AMDGPU_MES_FEAT_VERSION_MASK)
> >>> +                                       >>
> >>> AMDGPU_MES_FEAT_VERSION_SHIFT;
> >>> +       drm_printf(p, "MES_KIQ feature version: %u, fw version:
> >>> 0x%08x\n",
> >>> +                  feature, version);
> >>> +
> >>> +       version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
> >>> +       feature = (adev->mes.sched_version &
> >>> AMDGPU_MES_FEAT_VERSION_MASK)
> >>> +                                       >>
> >>> AMDGPU_MES_FEAT_VERSION_SHIFT;
> >>> +       drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n",
> >>> +                  feature, version);
> >>> +
> >>> +       drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
> >>> +                  adev->vpe.feature_version, adev->vpe.fw_version);
> >>> +
> >>> +}
> >>> +
> >>>   static ssize_t
> >>>   amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
> >>>                          void *data, size_t datalen)
> >>> @@ -215,6 +332,11 @@ amdgpu_devcoredump_read(char *buffer, loff_t
> >>> offset, size_t count,
> >>>                  }
> >>>          }
> >>>
> >>> +       if (coredump->adev) {
> >>> +               drm_printf(&p, "IP Firmwares\n");
> >>> +               amdgpu_devcoredump_fw_info(coredump->adev, &p);
> >>> +       }
> >>> +
> >>>          if (coredump->ring) {
> >>>                  drm_printf(&p, "\nRing timed out details\n");
> >>>                  drm_printf(&p, "IP Type: %d Ring Name: %s\n",
> >>> --
> >>> 2.34.1
> >>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add the IP information of the soc
  2024-03-14  5:44   ` Khatri, Sunil
@ 2024-03-14 14:42     ` Alex Deucher
  2024-03-14 16:16       ` Khatri, Sunil
  0 siblings, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2024-03-14 14:42 UTC (permalink / raw)
  To: Khatri, Sunil
  Cc: Sunil Khatri, Alex Deucher, Christian König,
	Shashank Sharma, amd-gfx, dri-devel, linux-kernel

On Thu, Mar 14, 2024 at 1:44 AM Khatri, Sunil <sukhatri@amd.com> wrote:
>
>
> On 3/14/2024 1:58 AM, Alex Deucher wrote:
> > On Tue, Mar 12, 2024 at 8:41 AM Sunil Khatri <sunil.khatri@amd.com> wrote:
> >> Add all the IP's information on a SOC to the
> >> devcoredump.
> >>
> >> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 19 +++++++++++++++++++
> >>   1 file changed, 19 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >> index a0dbccad2f53..611fdb90a1fc 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >> @@ -196,6 +196,25 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
> >>                             coredump->reset_task_info.process_name,
> >>                             coredump->reset_task_info.pid);
> >>
> >> +       /* GPU IP's information of the SOC */
> >> +       if (coredump->adev) {
> >> +               drm_printf(&p, "\nIP Information\n");
> >> +               drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
> >> +               drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
> >> +
> >> +               for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
> >> +                       struct amdgpu_ip_block *ip =
> >> +                               &coredump->adev->ip_blocks[i];
> >> +                       drm_printf(&p, "IP type: %d IP name: %s\n",
> >> +                                  ip->version->type,
> >> +                                  ip->version->funcs->name);
> >> +                       drm_printf(&p, "IP version: (%d,%d,%d)\n\n",
> >> +                                  ip->version->major,
> >> +                                  ip->version->minor,
> >> +                                  ip->version->rev);
> >> +               }
> >> +       }
> > I think the IP discovery table would be more useful.  Either walk the
> > adev->ip_versions structure, or just include the IP discovery binary.
>
> I did explore the adev->ip_versions and if i just go through the array
> it doesn't give any useful information directly.
> There are no ways to find directly from adev->ip_versions below things
> until i also reparse the discovery binary again like done the discovery
> amdgpu_discovery_reg_base_init and walk through the headers of various
> ips using discovery binary.
> a. Which IP is available on soc or not.
> b. How many instances are there
> Also i again have to change back to major, minor and rev convention for
> this information to be useful. I am exploring it more if i find some
> other information i will update.
>
> adev->ip_block[] is derived from ip discovery only for each block which
> is there on the SOC, so we are not reading information which isnt
> applicable for the soc. We have name , type and version no of the IPs
> available on the soc. If you want i could add no of instances of each IP
> too if you think that's useful information here. Could you share what
> information is missing in this approach so i can include that.

I was hoping to get the actual IP versions for the IPs from IP
discovery rather than the versions from the ip_block array.  The
latter are common so you can end up with the same version used across
a wide variety of chips (e.g., all gfx10.x based chips use the same
gfx 10 IP code even if the actual IP version is different for most of
the chips).

>
> For dumping the IP discovery binary, i dont understand how that
> information would be useful directly and needs to be decoded like we are
> doing in discovery init. Please correct me if my understanding is wrong
> here.

It's probably not a high priority, I was just thinking it might be
useful to have in case there ended up being some problem related to
the IP discovery table on some boards.  E.g., we'd know that all
boards with a certain harvest config seem to align with a reported
problem.  Similar for vbios.  It's more for telemetry.  E.g., all the
boards reporting some problem have a particular powerplay config or
whatever.

Alex


> > Alex
> >
> >> +
> >>          if (coredump->ring) {
> >>                  drm_printf(&p, "\nRing timed out details\n");
> >>                  drm_printf(&p, "IP Type: %d Ring Name: %s\n",
> >> --
> >> 2.34.1
> >>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add the IP information of the soc
  2024-03-14 14:42     ` Alex Deucher
@ 2024-03-14 16:16       ` Khatri, Sunil
  2024-03-14 17:51         ` Alex Deucher
  0 siblings, 1 reply; 15+ messages in thread
From: Khatri, Sunil @ 2024-03-14 16:16 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Sunil Khatri, Alex Deucher, Christian König,
	Shashank Sharma, amd-gfx, dri-devel, linux-kernel


On 3/14/2024 8:12 PM, Alex Deucher wrote:
> On Thu, Mar 14, 2024 at 1:44 AM Khatri, Sunil <sukhatri@amd.com> wrote:
>>
>> On 3/14/2024 1:58 AM, Alex Deucher wrote:
>>> On Tue, Mar 12, 2024 at 8:41 AM Sunil Khatri <sunil.khatri@amd.com> wrote:
>>>> Add all the IP's information on a SOC to the
>>>> devcoredump.
>>>>
>>>> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 19 +++++++++++++++++++
>>>>    1 file changed, 19 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> index a0dbccad2f53..611fdb90a1fc 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
>>>> @@ -196,6 +196,25 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
>>>>                              coredump->reset_task_info.process_name,
>>>>                              coredump->reset_task_info.pid);
>>>>
>>>> +       /* GPU IP's information of the SOC */
>>>> +       if (coredump->adev) {
>>>> +               drm_printf(&p, "\nIP Information\n");
>>>> +               drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
>>>> +               drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
>>>> +
>>>> +               for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
>>>> +                       struct amdgpu_ip_block *ip =
>>>> +                               &coredump->adev->ip_blocks[i];
>>>> +                       drm_printf(&p, "IP type: %d IP name: %s\n",
>>>> +                                  ip->version->type,
>>>> +                                  ip->version->funcs->name);
>>>> +                       drm_printf(&p, "IP version: (%d,%d,%d)\n\n",
>>>> +                                  ip->version->major,
>>>> +                                  ip->version->minor,
>>>> +                                  ip->version->rev);
>>>> +               }
>>>> +       }
>>> I think the IP discovery table would be more useful.  Either walk the
>>> adev->ip_versions structure, or just include the IP discovery binary.
>> I did explore the adev->ip_versions and if i just go through the array
>> it doesn't give any useful information directly.
>> There are no ways to find directly from adev->ip_versions below things
>> until i also reparse the discovery binary again like done the discovery
>> amdgpu_discovery_reg_base_init and walk through the headers of various
>> ips using discovery binary.
>> a. Which IP is available on soc or not.
>> b. How many instances are there
>> Also i again have to change back to major, minor and rev convention for
>> this information to be useful. I am exploring it more if i find some
>> other information i will update.
>>
>> adev->ip_block[] is derived from ip discovery only for each block which
>> is there on the SOC, so we are not reading information which isnt
>> applicable for the soc. We have name , type and version no of the IPs
>> available on the soc. If you want i could add no of instances of each IP
>> too if you think that's useful information here. Could you share what
>> information is missing in this approach so i can include that.
> I was hoping to get the actual IP versions for the IPs from IP
> discovery rather than the versions from the ip_block array.  The
> latter are common so you can end up with the same version used across
> a wide variety of chips (e.g., all gfx10.x based chips use the same
> gfx 10 IP code even if the actual IP version is different for most of
> the chips).
Got it. let me check how to get it could be done rightly.
>
>> For dumping the IP discovery binary, i dont understand how that
>> information would be useful directly and needs to be decoded like we are
>> doing in discovery init. Please correct me if my understanding is wrong
>> here.
> It's probably not a high priority, I was just thinking it might be
> useful to have in case there ended up being some problem related to
> the IP discovery table on some boards.  E.g., we'd know that all
> boards with a certain harvest config seem to align with a reported
> problem.  Similar for vbios.  It's more for telemetry.  E.g., all the
> boards reporting some problem have a particular powerplay config or
> whatever.
I got it.
But two points of contention here in my understanding. The dump works 
only where there is reset and not sure if it could be used very early in 
development of not. Second point is that devcoredump is 4096 
bytes/4Kbyte of memory where we are dumping all the information. Not 
sure if that could be increased but it might not be enough if we are 
planning to dump all to it.

Another point is since we have sysfs/debugfs/info ioctl etc information 
available. We should sort out what really is helpful in debugging GPU 
hang and that's added in devcore.

Regards
Sunil

>
> Alex
>
>
>>> Alex
>>>
>>>> +
>>>>           if (coredump->ring) {
>>>>                   drm_printf(&p, "\nRing timed out details\n");
>>>>                   drm_printf(&p, "IP Type: %d Ring Name: %s\n",
>>>> --
>>>> 2.34.1
>>>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add the IP information of the soc
  2024-03-14 16:16       ` Khatri, Sunil
@ 2024-03-14 17:51         ` Alex Deucher
  0 siblings, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2024-03-14 17:51 UTC (permalink / raw)
  To: Khatri, Sunil
  Cc: Sunil Khatri, Alex Deucher, Christian König,
	Shashank Sharma, amd-gfx, dri-devel, linux-kernel

On Thu, Mar 14, 2024 at 12:16 PM Khatri, Sunil <sukhatri@amd.com> wrote:
>
>
> On 3/14/2024 8:12 PM, Alex Deucher wrote:
> > On Thu, Mar 14, 2024 at 1:44 AM Khatri, Sunil <sukhatri@amd.com> wrote:
> >>
> >> On 3/14/2024 1:58 AM, Alex Deucher wrote:
> >>> On Tue, Mar 12, 2024 at 8:41 AM Sunil Khatri <sunil.khatri@amd.com> wrote:
> >>>> Add all the IP's information on a SOC to the
> >>>> devcoredump.
> >>>>
> >>>> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
> >>>> ---
> >>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 19 +++++++++++++++++++
> >>>>    1 file changed, 19 insertions(+)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >>>> index a0dbccad2f53..611fdb90a1fc 100644
> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
> >>>> @@ -196,6 +196,25 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
> >>>>                              coredump->reset_task_info.process_name,
> >>>>                              coredump->reset_task_info.pid);
> >>>>
> >>>> +       /* GPU IP's information of the SOC */
> >>>> +       if (coredump->adev) {
> >>>> +               drm_printf(&p, "\nIP Information\n");
> >>>> +               drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
> >>>> +               drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
> >>>> +
> >>>> +               for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
> >>>> +                       struct amdgpu_ip_block *ip =
> >>>> +                               &coredump->adev->ip_blocks[i];
> >>>> +                       drm_printf(&p, "IP type: %d IP name: %s\n",
> >>>> +                                  ip->version->type,
> >>>> +                                  ip->version->funcs->name);
> >>>> +                       drm_printf(&p, "IP version: (%d,%d,%d)\n\n",
> >>>> +                                  ip->version->major,
> >>>> +                                  ip->version->minor,
> >>>> +                                  ip->version->rev);
> >>>> +               }
> >>>> +       }
> >>> I think the IP discovery table would be more useful.  Either walk the
> >>> adev->ip_versions structure, or just include the IP discovery binary.
> >> I did explore the adev->ip_versions and if i just go through the array
> >> it doesn't give any useful information directly.
> >> There are no ways to find directly from adev->ip_versions below things
> >> until i also reparse the discovery binary again like done the discovery
> >> amdgpu_discovery_reg_base_init and walk through the headers of various
> >> ips using discovery binary.
> >> a. Which IP is available on soc or not.
> >> b. How many instances are there
> >> Also i again have to change back to major, minor and rev convention for
> >> this information to be useful. I am exploring it more if i find some
> >> other information i will update.
> >>
> >> adev->ip_block[] is derived from ip discovery only for each block which
> >> is there on the SOC, so we are not reading information which isnt
> >> applicable for the soc. We have name , type and version no of the IPs
> >> available on the soc. If you want i could add no of instances of each IP
> >> too if you think that's useful information here. Could you share what
> >> information is missing in this approach so i can include that.
> > I was hoping to get the actual IP versions for the IPs from IP
> > discovery rather than the versions from the ip_block array.  The
> > latter are common so you can end up with the same version used across
> > a wide variety of chips (e.g., all gfx10.x based chips use the same
> > gfx 10 IP code even if the actual IP version is different for most of
> > the chips).
> Got it. let me check how to get it could be done rightly.
> >
> >> For dumping the IP discovery binary, i dont understand how that
> >> information would be useful directly and needs to be decoded like we are
> >> doing in discovery init. Please correct me if my understanding is wrong
> >> here.
> > It's probably not a high priority, I was just thinking it might be
> > useful to have in case there ended up being some problem related to
> > the IP discovery table on some boards.  E.g., we'd know that all
> > boards with a certain harvest config seem to align with a reported
> > problem.  Similar for vbios.  It's more for telemetry.  E.g., all the
> > boards reporting some problem have a particular powerplay config or
> > whatever.
> I got it.
> But two points of contention here in my understanding. The dump works
> only where there is reset and not sure if it could be used very early in
> development of not. Second point is that devcoredump is 4096
> bytes/4Kbyte of memory where we are dumping all the information. Not
> sure if that could be increased but it might not be enough if we are
> planning to dump all to it.

ah, ok.  Let's skip the IP versions in that case, we can use the
family and rev_id and external_rev_id to look up the IP versions.

Alex

>
> Another point is since we have sysfs/debugfs/info ioctl etc information
> available. We should sort out what really is helpful in debugging GPU
> hang and that's added in devcore.
>
> Regards
> Sunil
>
> >
> > Alex
> >
> >
> >>> Alex
> >>>
> >>>> +
> >>>>           if (coredump->ring) {
> >>>>                   drm_printf(&p, "\nRing timed out details\n");
> >>>>                   drm_printf(&p, "IP Type: %d Ring Name: %s\n",
> >>>> --
> >>>> 2.34.1
> >>>>

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2024-03-14 17:52 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-12 12:41 [PATCH 1/2] drm/amdgpu: add the IP information of the soc Sunil Khatri
2024-03-12 12:41 ` [PATCH 2/2] drm:amdgpu: add firmware information of all IP's Sunil Khatri
2024-03-13 19:48   ` Khatri, Sunil
2024-03-13 20:36   ` Alex Deucher
2024-03-14  5:58     ` Khatri, Sunil
2024-03-14  6:10       ` Sharma, Shashank
2024-03-14  9:52         ` Khatri, Sunil
2024-03-14 13:27         ` Alex Deucher
2024-03-13 19:45 ` [PATCH 1/2] drm/amdgpu: add the IP information of the soc Khatri, Sunil
2024-03-13 19:46 ` Khatri, Sunil
2024-03-13 20:28 ` Alex Deucher
2024-03-14  5:44   ` Khatri, Sunil
2024-03-14 14:42     ` Alex Deucher
2024-03-14 16:16       ` Khatri, Sunil
2024-03-14 17:51         ` Alex Deucher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).