All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Sharma, Shashank" <shashank.sharma@amd.com>
To: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>,
	amd-gfx@lists.freedesktop.org
Cc: alexander.deucher@amd.com, christian.koenig@amd.com
Subject: Re: [PATCH v2 2/2] drm/amdgpu: adding device coredump support
Date: Thu, 26 May 2022 11:57:35 +0200	[thread overview]
Message-ID: <3a677ab8-42a7-4bee-6e94-d4cd43991d49@amd.com> (raw)
In-Reply-To: <20220526094839.36709-2-Amaranath.Somalapuram@amd.com>



On 5/26/2022 11:48 AM, Somalapuram Amaranath wrote:
> Added device coredump information:
> - Kernel version
> - Module
> - Time
> - VRAM status
> - Guilty process name and PID
> - GPU register dumps
> v1 -> v2: Variable name change
> v1 -> v2: NULL check
> v1 -> v2: Code alignment
> v1 -> v2: Adding dummy amdgpu_devcoredump_free
> v1 -> v2: memset reset_task_info to zero
> 
> Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 67 ++++++++++++++++++++++
>   2 files changed, 70 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index c79d9992b113..25a7b2c74928 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1044,6 +1044,9 @@ struct amdgpu_device {
>   	uint32_t                        *reset_dump_reg_list;
>   	uint32_t			*reset_dump_reg_value;
>   	int                             num_regs;
> +	struct amdgpu_task_info         reset_task_info;
> +	bool                            reset_vram_lost;
> +	struct timespec64               reset_time;
>   
>   	struct amdgpu_reset_domain	*reset_domain;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 866b4980a6fa..ca97afe5be63 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -32,6 +32,8 @@
>   #include <linux/slab.h>
>   #include <linux/iommu.h>
>   #include <linux/pci.h>
> +#include <linux/devcoredump.h>
> +#include <generated/utsrelease.h>
>   
>   #include <drm/drm_atomic_helper.h>
>   #include <drm/drm_probe_helper.h>
> @@ -4734,6 +4736,62 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +#ifdef CONFIG_DEV_COREDUMP
> +static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
> +		size_t count, void *data, size_t datalen)
> +{
> +	struct drm_printer p;
> +	struct amdgpu_device *adev = data;
> +	struct drm_print_iterator iter;
> +	int i;
> +
> +	if (adev == NULL)
> +		return 0;
> +
> +	iter.data = buffer;
> +	iter.offset = 0;
> +	iter.start = offset;
> +	iter.remain = count;
> +
> +	p = drm_coredump_printer(&iter);
> +
> +	drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
> +	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
> +	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
> +	drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
> +	if (adev->reset_task_info.pid)
> +		drm_printf(&p, "process_name: %s PID: %d\n",
> +			   adev->reset_task_info.process_name,
> +			   adev->reset_task_info.pid);
> +
> +	if (adev->reset_vram_lost)
> +		drm_printf(&p, "VRAM is lost due to GPU reset!\n");
> +	if (adev->num_regs) {
> +		drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
> +
> +		for (i = 0; i < adev->num_regs; i++)
> +			drm_printf(&p, "0x%08x: 0x%08x\n",
> +				   adev->reset_dump_reg_list[i],
> +				   adev->reset_dump_reg_value[i]);
> +	}
> +
> +	return count - iter.remain;
> +}
> +
> +static void amdgpu_devcoredump_free(void *data)
> +{
> +}
> +
> +static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
> +{
> +	struct drm_device *dev = adev_to_drm(adev);
> +
> +	ktime_get_ts64(&adev->reset_time);
> +	dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
> +			amdgpu_devcoredump_read, amdgpu_devcoredump_free);
Alignment with line above.
> +}
> +#endif
> +
>   int amdgpu_do_asic_reset(struct list_head *device_list_handle,
>   			 struct amdgpu_reset_context *reset_context)
>   {
> @@ -4818,6 +4876,15 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
>   					goto out;
>   
>   				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
> +#ifdef CONFIG_DEV_COREDUMP
> +				tmp_adev->reset_vram_lost = vram_lost;
> +				memset(&tmp_adev->reset_task_info, 0,
> +						sizeof(tmp_adev->reset_task_info));
Alignment with the line above.
> +				if (reset_context->job && reset_context->job->vm)
> +					tmp_adev->reset_task_info =
> +						reset_context->job->vm->task_info;
> +				amdgpu_reset_capture_coredumpm(tmp_adev);
> +#endif
>   				if (vram_lost) {
>   					DRM_INFO("VRAM is lost due to GPU reset!\n");
>   					amdgpu_inc_vram_lost(tmp_adev);

With above fixed, feel free to use:
Reviewed-by: Shashank Sharma <shashank.sharma@amd.com>

- Shashank

  reply	other threads:[~2022-05-26  9:57 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-26  9:48 [PATCH v2 1/2] drm/amdgpu: save the reset dump register value for devcoredump Somalapuram Amaranath
2022-05-26  9:48 ` [PATCH v2 2/2] drm/amdgpu: adding device coredump support Somalapuram Amaranath
2022-05-26  9:57   ` Sharma, Shashank [this message]
2022-05-26 10:26   ` Wang, Yang(Kevin)
2022-05-31 13:14     ` Somalapuram, Amaranath
2022-05-26  9:54 ` [PATCH v2 1/2] drm/amdgpu: save the reset dump register value for devcoredump Sharma, Shashank
2022-05-31 13:08   ` Somalapuram, Amaranath
2022-05-31 13:17     ` Sharma, Shashank
2022-05-31 13:24       ` Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3a677ab8-42a7-4bee-6e94-d4cd43991d49@amd.com \
    --to=shashank.sharma@amd.com \
    --cc=Amaranath.Somalapuram@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.