* [PATCH 1/2] drm/amd/pm: Add STB dump function.
@ 2022-03-09 21:03 Andrey Grodzovsky
2022-03-09 21:03 ` [PATCH 2/2] drm/amdgpu: Dump STB during ASIC reset Andrey Grodzovsky
2022-03-10 5:17 ` [PATCH 1/2] drm/amd/pm: Add STB dump function Lazar, Lijo
0 siblings, 2 replies; 4+ messages in thread
From: Andrey Grodzovsky @ 2022-03-09 21:03 UTC (permalink / raw)
To: amd-gfx
Cc: Alexander.Deucher, Andrey Grodzovsky, Amaranath.Somalapuram,
Christian.Koenig, Shashank.Sharma
It will be used during GPU reset.
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 10 +++++++
drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +++
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 26 +++++++++++++++++++
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 ++
4 files changed, 41 insertions(+)
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 1d63f1e8884c..815a6367d834 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -1327,6 +1327,16 @@ void amdgpu_dpm_stb_debug_fs_init(struct amdgpu_device *adev)
amdgpu_smu_stb_debug_fs_init(adev);
}
+void amdgpu_dpm_stb_dump(struct amdgpu_device *adev)
+{
+ struct smu_context *smu = adev->powerplay.pp_handle;
+
+ if (!is_support_sw_smu(adev))
+ return;
+
+ smu_stb_dump(smu);
+}
+
int amdgpu_dpm_display_configuration_change(struct amdgpu_device *adev,
const struct amd_pp_display_configuration *input)
{
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index ddfa55b59d02..99351d463a72 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -503,6 +503,7 @@ int amdgpu_dpm_set_pp_table(struct amdgpu_device *adev,
size_t size);
int amdgpu_dpm_get_num_cpu_cores(struct amdgpu_device *adev);
void amdgpu_dpm_stb_debug_fs_init(struct amdgpu_device *adev);
+void amdgpu_dpm_stb_dump(struct amdgpu_device *adev);
int amdgpu_dpm_display_configuration_change(struct amdgpu_device *adev,
const struct amd_pp_display_configuration *input);
int amdgpu_dpm_get_clock_by_type(struct amdgpu_device *adev,
@@ -540,4 +541,6 @@ enum pp_smu_status amdgpu_dpm_get_uclk_dpm_states(struct amdgpu_device *adev,
unsigned int *num_states);
int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev,
struct dpm_clocks *clock_table);
+
+
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 7e79a67bb8ef..aff0ed9b6f9a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2958,6 +2958,32 @@ int smu_stb_collect_info(struct smu_context *smu, void *buf, uint32_t size)
return smu->ppt_funcs->stb_collect_info(smu, buf, size);
}
+void smu_stb_dump(struct smu_context *smu)
+{
+ unsigned char *buf;
+
+ /* STB is disabled */
+ if (!drm_debug_enabled(DRM_UT_DRIVER) || !smu->stb_context.enabled)
+ return;
+
+ buf = kvmalloc_array(smu->stb_context.stb_buf_size, sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return;
+
+ if (smu_stb_collect_info(smu, buf, smu->stb_context.stb_buf_size))
+ goto out;
+
+ DRM_DEV_DEBUG_DRIVER(smu->adev->dev, "START PRINT STB DUMP");
+ print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
+ 4, 4, buf, smu->stb_context.stb_buf_size, false);
+ DRM_DEV_DEBUG_DRIVER(smu->adev->dev, "END PRINT STB DUMP");
+
+ return;
+
+out:
+ kvfree(buf);
+}
+
#if defined(CONFIG_DEBUG_FS)
static int smu_stb_debugfs_open(struct inode *inode, struct file *filp)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index fbef3ab8d487..991586f61a85 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1426,6 +1426,8 @@ int smu_wait_for_event(struct smu_context *smu, enum smu_event_type event,
uint64_t event_arg);
int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size);
+void smu_stb_dump(struct smu_context *smu);
+
void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
#endif
--
2.25.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] drm/amdgpu: Dump STB during ASIC reset.
2022-03-09 21:03 [PATCH 1/2] drm/amd/pm: Add STB dump function Andrey Grodzovsky
@ 2022-03-09 21:03 ` Andrey Grodzovsky
2022-03-10 5:17 ` [PATCH 1/2] drm/amd/pm: Add STB dump function Lazar, Lijo
1 sibling, 0 replies; 4+ messages in thread
From: Andrey Grodzovsky @ 2022-03-09 21:03 UTC (permalink / raw)
To: amd-gfx
Cc: Alexander.Deucher, Andrey Grodzovsky, Amaranath.Somalapuram,
Christian.Koenig, Shashank.Sharma
This should provide more debug info for the driver.
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7abdf5e3dc05..22caff7c98a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4708,6 +4708,14 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
return 0;
}
+
+static void amdgpu_stb_dump(struct amdgpu_device *adev)
+{
+ /* TODO For APU case PMC driver API is to retrieve STB is needed */
+ if (!(adev->flags & AMD_IS_APU))
+ amdgpu_dpm_stb_dump(adev);
+}
+
int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_reset_context *reset_context)
{
@@ -4719,6 +4727,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_reset_reg_dumps(tmp_adev);
+ amdgpu_stb_dump(tmp_adev);
r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
/* If reset handler not implemented, continue; otherwise return */
if (r == -ENOSYS)
--
2.25.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] drm/amd/pm: Add STB dump function.
2022-03-09 21:03 [PATCH 1/2] drm/amd/pm: Add STB dump function Andrey Grodzovsky
2022-03-09 21:03 ` [PATCH 2/2] drm/amdgpu: Dump STB during ASIC reset Andrey Grodzovsky
@ 2022-03-10 5:17 ` Lazar, Lijo
2022-03-10 15:16 ` Andrey Grodzovsky
1 sibling, 1 reply; 4+ messages in thread
From: Lazar, Lijo @ 2022-03-10 5:17 UTC (permalink / raw)
To: Andrey Grodzovsky, amd-gfx
Cc: Alexander.Deucher, Amaranath.Somalapuram, Christian.Koenig,
Shashank.Sharma
On 3/10/2022 2:33 AM, Andrey Grodzovsky wrote:
> It will be used during GPU reset.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 10 +++++++
> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +++
> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 26 +++++++++++++++++++
> drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 ++
> 4 files changed, 41 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 1d63f1e8884c..815a6367d834 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -1327,6 +1327,16 @@ void amdgpu_dpm_stb_debug_fs_init(struct amdgpu_device *adev)
> amdgpu_smu_stb_debug_fs_init(adev);
> }
>
> +void amdgpu_dpm_stb_dump(struct amdgpu_device *adev)
> +{
> + struct smu_context *smu = adev->powerplay.pp_handle;
> +
> + if (!is_support_sw_smu(adev))
> + return;
> +
> + smu_stb_dump(smu);
> +}
> +
> int amdgpu_dpm_display_configuration_change(struct amdgpu_device *adev,
> const struct amd_pp_display_configuration *input)
> {
> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> index ddfa55b59d02..99351d463a72 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> @@ -503,6 +503,7 @@ int amdgpu_dpm_set_pp_table(struct amdgpu_device *adev,
> size_t size);
> int amdgpu_dpm_get_num_cpu_cores(struct amdgpu_device *adev);
> void amdgpu_dpm_stb_debug_fs_init(struct amdgpu_device *adev);
> +void amdgpu_dpm_stb_dump(struct amdgpu_device *adev);
> int amdgpu_dpm_display_configuration_change(struct amdgpu_device *adev,
> const struct amd_pp_display_configuration *input);
> int amdgpu_dpm_get_clock_by_type(struct amdgpu_device *adev,
> @@ -540,4 +541,6 @@ enum pp_smu_status amdgpu_dpm_get_uclk_dpm_states(struct amdgpu_device *adev,
> unsigned int *num_states);
> int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev,
> struct dpm_clocks *clock_table);
> +
> +
> #endif
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index 7e79a67bb8ef..aff0ed9b6f9a 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -2958,6 +2958,32 @@ int smu_stb_collect_info(struct smu_context *smu, void *buf, uint32_t size)
> return smu->ppt_funcs->stb_collect_info(smu, buf, size);
> }
>
> +void smu_stb_dump(struct smu_context *smu)
> +{
> + unsigned char *buf;
> +
> + /* STB is disabled */
> + if (!drm_debug_enabled(DRM_UT_DRIVER) || !smu->stb_context.enabled)
> + return;
> +
> + buf = kvmalloc_array(smu->stb_context.stb_buf_size, sizeof(*buf), GFP_KERNEL);
> + if (!buf)
> + return;
> +
> + if (smu_stb_collect_info(smu, buf, smu->stb_context.stb_buf_size))
> + goto out;
> +
> + DRM_DEV_DEBUG_DRIVER(smu->adev->dev, "START PRINT STB DUMP");
> + print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
> + 4, 4, buf, smu->stb_context.stb_buf_size, false);
Printing to kernel log with each reset doesn't look great. It makes it
difficult to associate this with a corresponding register dump. Instead,
it is better to have a buffer accepted from user through debugfs and
copy the data there for each reset. The app may keep the reset data for
a particular reset together at one place.
Thanks,
Lijo
> + DRM_DEV_DEBUG_DRIVER(smu->adev->dev, "END PRINT STB DUMP");
> +
> + return;
> +
> +out:
> + kvfree(buf);
> +}
> +
> #if defined(CONFIG_DEBUG_FS)
>
> static int smu_stb_debugfs_open(struct inode *inode, struct file *filp)
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index fbef3ab8d487..991586f61a85 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -1426,6 +1426,8 @@ int smu_wait_for_event(struct smu_context *smu, enum smu_event_type event,
> uint64_t event_arg);
> int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
> int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size);
> +void smu_stb_dump(struct smu_context *smu);
> +
> void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
> int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
> #endif
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] drm/amd/pm: Add STB dump function.
2022-03-10 5:17 ` [PATCH 1/2] drm/amd/pm: Add STB dump function Lazar, Lijo
@ 2022-03-10 15:16 ` Andrey Grodzovsky
0 siblings, 0 replies; 4+ messages in thread
From: Andrey Grodzovsky @ 2022-03-10 15:16 UTC (permalink / raw)
To: Lazar, Lijo, amd-gfx
Cc: Alexander.Deucher, Amaranath.Somalapuram, Christian.Koenig,
Shashank.Sharma
On 2022-03-10 00:17, Lazar, Lijo wrote:
>
>
> On 3/10/2022 2:33 AM, Andrey Grodzovsky wrote:
>> It will be used during GPU reset.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>> drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 10 +++++++
>> drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 +++
>> drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 26 +++++++++++++++++++
>> drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 ++
>> 4 files changed, 41 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> index 1d63f1e8884c..815a6367d834 100644
>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> @@ -1327,6 +1327,16 @@ void amdgpu_dpm_stb_debug_fs_init(struct
>> amdgpu_device *adev)
>> amdgpu_smu_stb_debug_fs_init(adev);
>> }
>> +void amdgpu_dpm_stb_dump(struct amdgpu_device *adev)
>> +{
>> + struct smu_context *smu = adev->powerplay.pp_handle;
>> +
>> + if (!is_support_sw_smu(adev))
>> + return;
>> +
>> + smu_stb_dump(smu);
>> +}
>> +
>> int amdgpu_dpm_display_configuration_change(struct amdgpu_device
>> *adev,
>> const struct amd_pp_display_configuration
>> *input)
>> {
>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> index ddfa55b59d02..99351d463a72 100644
>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> @@ -503,6 +503,7 @@ int amdgpu_dpm_set_pp_table(struct amdgpu_device
>> *adev,
>> size_t size);
>> int amdgpu_dpm_get_num_cpu_cores(struct amdgpu_device *adev);
>> void amdgpu_dpm_stb_debug_fs_init(struct amdgpu_device *adev);
>> +void amdgpu_dpm_stb_dump(struct amdgpu_device *adev);
>> int amdgpu_dpm_display_configuration_change(struct amdgpu_device
>> *adev,
>> const struct amd_pp_display_configuration
>> *input);
>> int amdgpu_dpm_get_clock_by_type(struct amdgpu_device *adev,
>> @@ -540,4 +541,6 @@ enum pp_smu_status
>> amdgpu_dpm_get_uclk_dpm_states(struct amdgpu_device *adev,
>> unsigned int *num_states);
>> int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev,
>> struct dpm_clocks *clock_table);
>> +
>> +
>> #endif
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> index 7e79a67bb8ef..aff0ed9b6f9a 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> @@ -2958,6 +2958,32 @@ int smu_stb_collect_info(struct smu_context
>> *smu, void *buf, uint32_t size)
>> return smu->ppt_funcs->stb_collect_info(smu, buf, size);
>> }
>> +void smu_stb_dump(struct smu_context *smu)
>> +{
>> + unsigned char *buf;
>> +
>> + /* STB is disabled */
>> + if (!drm_debug_enabled(DRM_UT_DRIVER) || !smu->stb_context.enabled)
>> + return;
>> +
>> + buf = kvmalloc_array(smu->stb_context.stb_buf_size,
>> sizeof(*buf), GFP_KERNEL);
>> + if (!buf)
>> + return;
>> +
>> + if (smu_stb_collect_info(smu, buf, smu->stb_context.stb_buf_size))
>> + goto out;
>> +
>> + DRM_DEV_DEBUG_DRIVER(smu->adev->dev, "START PRINT STB DUMP");
>> + print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
>> + 4, 4, buf, smu->stb_context.stb_buf_size, false);
>
> Printing to kernel log with each reset doesn't look great. It makes it
> difficult to associate this with a corresponding register dump.
> Instead, it is better to have a buffer accepted from user through
> debugfs and copy the data there for each reset. The app may keep the
> reset data for a particular reset together at one place.
STB already provides debugfs interface to retrieve the latest buffer if
needed. On top of that - not all hangs are related to user apps
submitting commands, there can be some internal kernel driver issues or
FW issues which could
trigger a hang without any user app even present (working in FB console
mode) so I wouldn't want to tie this functionality to presence of any
user app.
Regarding association with a corresponding register dump - I probably
can switch this to even tracing which is what the register dump is using
and then u will see them in one flow in trace dump - I am just not sure
how I dump a variable length buffer into event trace - how i define the
argument ? Is is just a char* ?
Andrey
>
> Thanks,
> Lijo
>
>> + DRM_DEV_DEBUG_DRIVER(smu->adev->dev, "END PRINT STB DUMP");
>> +
>> + return;
>> +
>> +out:
>> + kvfree(buf);
>> +}
>> +
>> #if defined(CONFIG_DEBUG_FS)
>> static int smu_stb_debugfs_open(struct inode *inode, struct file
>> *filp)
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> index fbef3ab8d487..991586f61a85 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> @@ -1426,6 +1426,8 @@ int smu_wait_for_event(struct smu_context *smu,
>> enum smu_event_type event,
>> uint64_t event_arg);
>> int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
>> int smu_stb_collect_info(struct smu_context *smu, void *buff,
>> uint32_t size);
>> +void smu_stb_dump(struct smu_context *smu);
>> +
>> void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
>> int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t
>> size);
>> #endif
>>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-03-10 15:16 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-09 21:03 [PATCH 1/2] drm/amd/pm: Add STB dump function Andrey Grodzovsky
2022-03-09 21:03 ` [PATCH 2/2] drm/amdgpu: Dump STB during ASIC reset Andrey Grodzovsky
2022-03-10 5:17 ` [PATCH 1/2] drm/amd/pm: Add STB dump function Lazar, Lijo
2022-03-10 15:16 ` Andrey Grodzovsky
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.