All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/4] drm/amdgpu: add work function for GPU reset
@ 2022-01-21 20:37 Sharma, Shashank
  2022-01-24  7:17 ` Christian König
  0 siblings, 1 reply; 5+ messages in thread
From: Sharma, Shashank @ 2022-01-21 20:37 UTC (permalink / raw)
  To: amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath, Christian König

 From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Fri, 21 Jan 2022 17:33:10 +0100
Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset

This patch adds a new work function, which will get scheduled
in event of a GPU reset, and will send a uevent to indicate the same.
The userspace can do some post-processing work like collecting data
from a trace event.

Cc: Alexander Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
  2 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 269437b01328..79192f43bb71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1057,6 +1057,8 @@ struct amdgpu_device {
  	struct work_struct		xgmi_reset_work;
  	struct list_head		reset_list;

+	struct work_struct		gpu_reset_work;
+
  	long				gfx_timeout;
  	long				sdma_timeout;
  	long				video_timeout;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index af9bdf16eefd..e29e58240869 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -72,6 +72,7 @@
  #include <linux/pm_runtime.h>

  #include <drm/drm_drv.h>
+#include <drm/drm_sysfs.h>

  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct 
amdgpu_device *adev)
  	return amdgpu_device_asic_has_dc_support(adev->asic_type);
  }

+static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
+{
+	struct amdgpu_device *adev =
+		container_of(__work, struct amdgpu_device, gpu_reset_work);
+
+	/*
+	 * Inform userspace that a GPU reset happened, and it should collect
+	 * data from the trace event.
+	 */
+	drm_sysfs_gpu_reset_event(&adev->ddev);
+}
+
  static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
  {
  	struct amdgpu_device *adev =
@@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
  			  amdgpu_device_delay_enable_gfx_off);

  	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+	INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);

  	adev->gfx.gfx_off_req_count = 1;
  	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
  2022-01-21 20:37 [PATCH 2/4] drm/amdgpu: add work function for GPU reset Sharma, Shashank
@ 2022-01-24  7:17 ` Christian König
  2022-01-24 16:46   ` Sharma, Shashank
  0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2022-01-24  7:17 UTC (permalink / raw)
  To: Sharma, Shashank, amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath

Am 21.01.22 um 21:37 schrieb Sharma, Shashank:
> From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
> From: Shashank Sharma <shashank.sharma@amd.com>
> Date: Fri, 21 Jan 2022 17:33:10 +0100
> Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
>
> This patch adds a new work function, which will get scheduled
> in event of a GPU reset, and will send a uevent to indicate the same.
> The userspace can do some post-processing work like collecting data
> from a trace event.
>
> Cc: Alexander Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
>  2 files changed, 16 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 269437b01328..79192f43bb71 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1057,6 +1057,8 @@ struct amdgpu_device {
>      struct work_struct        xgmi_reset_work;
>      struct list_head        reset_list;
>
> +    struct work_struct        gpu_reset_work;

This needs a different name. "gpu_reset_work" would indicate that it 
does the GPU reset, but this really only signals the reset to userspace.

Christian.

> +
>      long                gfx_timeout;
>      long                sdma_timeout;
>      long                video_timeout;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index af9bdf16eefd..e29e58240869 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -72,6 +72,7 @@
>  #include <linux/pm_runtime.h>
>
>  #include <drm/drm_drv.h>
> +#include <drm/drm_sysfs.h>
>
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> @@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct 
> amdgpu_device *adev)
>      return amdgpu_device_asic_has_dc_support(adev->asic_type);
>  }
>
> +static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
> +{
> +    struct amdgpu_device *adev =
> +        container_of(__work, struct amdgpu_device, gpu_reset_work);
> +
> +    /*
> +     * Inform userspace that a GPU reset happened, and it should collect
> +     * data from the trace event.
> +     */
> +    drm_sysfs_gpu_reset_event(&adev->ddev);
> +}
> +
>  static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
>  {
>      struct amdgpu_device *adev =
> @@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>                amdgpu_device_delay_enable_gfx_off);
>
>      INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
> +    INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
>
>      adev->gfx.gfx_off_req_count = 1;
>      adev->pm.ac_power = power_supply_is_system_supplied() > 0;


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
  2022-01-24  7:17 ` Christian König
@ 2022-01-24 16:46   ` Sharma, Shashank
  2022-01-24 16:49     ` Christian König
  0 siblings, 1 reply; 5+ messages in thread
From: Sharma, Shashank @ 2022-01-24 16:46 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath



On 1/24/2022 8:17 AM, Christian König wrote:
> Am 21.01.22 um 21:37 schrieb Sharma, Shashank:
>> From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
>> From: Shashank Sharma <shashank.sharma@amd.com>
>> Date: Fri, 21 Jan 2022 17:33:10 +0100
>> Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
>>
>> This patch adds a new work function, which will get scheduled
>> in event of a GPU reset, and will send a uevent to indicate the same.
>> The userspace can do some post-processing work like collecting data
>> from a trace event.
>>
>> Cc: Alexander Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
>>  2 files changed, 16 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 269437b01328..79192f43bb71 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1057,6 +1057,8 @@ struct amdgpu_device {
>>      struct work_struct        xgmi_reset_work;
>>      struct list_head        reset_list;
>>
>> +    struct work_struct        gpu_reset_work;
> 
> This needs a different name. "gpu_reset_work" would indicate that it 
> does the GPU reset, but this really only signals the reset to userspace.
> 
Agree, let me come back with something like gpu_reset_housekeeping or 
something less weird that that :)

> Christian.
> 
>> +
>>      long                gfx_timeout;
>>      long                sdma_timeout;
>>      long                video_timeout;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index af9bdf16eefd..e29e58240869 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -72,6 +72,7 @@
>>  #include <linux/pm_runtime.h>
>>
>>  #include <drm/drm_drv.h>
>> +#include <drm/drm_sysfs.h>
>>
>>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>> @@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct 
>> amdgpu_device *adev)
>>      return amdgpu_device_asic_has_dc_support(adev->asic_type);
>>  }
>>
>> +static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
>> +{
>> +    struct amdgpu_device *adev =
>> +        container_of(__work, struct amdgpu_device, gpu_reset_work);
>> +
>> +    /*
>> +     * Inform userspace that a GPU reset happened, and it should collect
>> +     * data from the trace event.
>> +     */
>> +    drm_sysfs_gpu_reset_event(&adev->ddev);
>> +}
>> +
>>  static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
>>  {
>>      struct amdgpu_device *adev =
>> @@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>                amdgpu_device_delay_enable_gfx_off);
>>
>>      INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>> +    INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
>>
>>      adev->gfx.gfx_off_req_count = 1;
>>      adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
  2022-01-24 16:46   ` Sharma, Shashank
@ 2022-01-24 16:49     ` Christian König
  2022-01-24 16:52       ` Sharma, Shashank
  0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2022-01-24 16:49 UTC (permalink / raw)
  To: Sharma, Shashank, amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath



Am 24.01.22 um 17:46 schrieb Sharma, Shashank:
>
>
> On 1/24/2022 8:17 AM, Christian König wrote:
>> Am 21.01.22 um 21:37 schrieb Sharma, Shashank:
>>> From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
>>> From: Shashank Sharma <shashank.sharma@amd.com>
>>> Date: Fri, 21 Jan 2022 17:33:10 +0100
>>> Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
>>>
>>> This patch adds a new work function, which will get scheduled
>>> in event of a GPU reset, and will send a uevent to indicate the same.
>>> The userspace can do some post-processing work like collecting data
>>> from a trace event.
>>>
>>> Cc: Alexander Deucher <alexander.deucher@amd.com>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
>>>  2 files changed, 16 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 269437b01328..79192f43bb71 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1057,6 +1057,8 @@ struct amdgpu_device {
>>>      struct work_struct        xgmi_reset_work;
>>>      struct list_head        reset_list;
>>>
>>> +    struct work_struct        gpu_reset_work;
>>
>> This needs a different name. "gpu_reset_work" would indicate that it 
>> does the GPU reset, but this really only signals the reset to userspace.
>>
> Agree, let me come back with something like gpu_reset_housekeeping or 
> something less weird that that :)

How about send_gpu_reset_signal ?

Christian.

>
>> Christian.
>>
>>> +
>>>      long                gfx_timeout;
>>>      long                sdma_timeout;
>>>      long                video_timeout;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index af9bdf16eefd..e29e58240869 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -72,6 +72,7 @@
>>>  #include <linux/pm_runtime.h>
>>>
>>>  #include <drm/drm_drv.h>
>>> +#include <drm/drm_sysfs.h>
>>>
>>>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>> @@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct 
>>> amdgpu_device *adev)
>>>      return amdgpu_device_asic_has_dc_support(adev->asic_type);
>>>  }
>>>
>>> +static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
>>> +{
>>> +    struct amdgpu_device *adev =
>>> +        container_of(__work, struct amdgpu_device, gpu_reset_work);
>>> +
>>> +    /*
>>> +     * Inform userspace that a GPU reset happened, and it should 
>>> collect
>>> +     * data from the trace event.
>>> +     */
>>> +    drm_sysfs_gpu_reset_event(&adev->ddev);
>>> +}
>>> +
>>>  static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
>>>  {
>>>      struct amdgpu_device *adev =
>>> @@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device 
>>> *adev,
>>>                amdgpu_device_delay_enable_gfx_off);
>>>
>>>      INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>>> +    INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
>>>
>>>      adev->gfx.gfx_off_req_count = 1;
>>>      adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
  2022-01-24 16:49     ` Christian König
@ 2022-01-24 16:52       ` Sharma, Shashank
  0 siblings, 0 replies; 5+ messages in thread
From: Sharma, Shashank @ 2022-01-24 16:52 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath



On 1/24/2022 5:49 PM, Christian König wrote:
> 
> 
> Am 24.01.22 um 17:46 schrieb Sharma, Shashank:
>>
>>
>> On 1/24/2022 8:17 AM, Christian König wrote:
>>> Am 21.01.22 um 21:37 schrieb Sharma, Shashank:
>>>> From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
>>>> From: Shashank Sharma <shashank.sharma@amd.com>
>>>> Date: Fri, 21 Jan 2022 17:33:10 +0100
>>>> Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
>>>>
>>>> This patch adds a new work function, which will get scheduled
>>>> in event of a GPU reset, and will send a uevent to indicate the same.
>>>> The userspace can do some post-processing work like collecting data
>>>> from a trace event.
>>>>
>>>> Cc: Alexander Deucher <alexander.deucher@amd.com>
>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>>> ---
>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
>>>>  2 files changed, 16 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 269437b01328..79192f43bb71 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -1057,6 +1057,8 @@ struct amdgpu_device {
>>>>      struct work_struct        xgmi_reset_work;
>>>>      struct list_head        reset_list;
>>>>
>>>> +    struct work_struct        gpu_reset_work;
>>>
>>> This needs a different name. "gpu_reset_work" would indicate that it 
>>> does the GPU reset, but this really only signals the reset to userspace.
>>>
>> Agree, let me come back with something like gpu_reset_housekeeping or 
>> something less weird that that :)
> 
> How about send_gpu_reset_signal ?

Yes, sure, that is exactly what is is supposed to do. I will accommodate 
this in next version.
- Shashank

> 
> Christian.
> 
>>
>>> Christian.
>>>
>>>> +
>>>>      long                gfx_timeout;
>>>>      long                sdma_timeout;
>>>>      long                video_timeout;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index af9bdf16eefd..e29e58240869 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -72,6 +72,7 @@
>>>>  #include <linux/pm_runtime.h>
>>>>
>>>>  #include <drm/drm_drv.h>
>>>> +#include <drm/drm_sysfs.h>
>>>>
>>>>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>>>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>>> @@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct 
>>>> amdgpu_device *adev)
>>>>      return amdgpu_device_asic_has_dc_support(adev->asic_type);
>>>>  }
>>>>
>>>> +static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
>>>> +{
>>>> +    struct amdgpu_device *adev =
>>>> +        container_of(__work, struct amdgpu_device, gpu_reset_work);
>>>> +
>>>> +    /*
>>>> +     * Inform userspace that a GPU reset happened, and it should 
>>>> collect
>>>> +     * data from the trace event.
>>>> +     */
>>>> +    drm_sysfs_gpu_reset_event(&adev->ddev);
>>>> +}
>>>> +
>>>>  static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
>>>>  {
>>>>      struct amdgpu_device *adev =
>>>> @@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device 
>>>> *adev,
>>>>                amdgpu_device_delay_enable_gfx_off);
>>>>
>>>>      INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>>>> +    INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
>>>>
>>>>      adev->gfx.gfx_off_req_count = 1;
>>>>      adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>>
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-01-24 16:53 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-21 20:37 [PATCH 2/4] drm/amdgpu: add work function for GPU reset Sharma, Shashank
2022-01-24  7:17 ` Christian König
2022-01-24 16:46   ` Sharma, Shashank
2022-01-24 16:49     ` Christian König
2022-01-24 16:52       ` Sharma, Shashank

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.