* [PATCH 2/4] drm/amdgpu: add work function for GPU reset
@ 2022-01-21 20:37 Sharma, Shashank
2022-01-24 7:17 ` Christian König
0 siblings, 1 reply; 5+ messages in thread
From: Sharma, Shashank @ 2022-01-21 20:37 UTC (permalink / raw)
To: amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath, Christian König
From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Fri, 21 Jan 2022 17:33:10 +0100
Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
This patch adds a new work function, which will get scheduled
in event of a GPU reset, and will send a uevent to indicate the same.
The userspace can do some post-processing work like collecting data
from a trace event.
Cc: Alexander Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
2 files changed, 16 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 269437b01328..79192f43bb71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1057,6 +1057,8 @@ struct amdgpu_device {
struct work_struct xgmi_reset_work;
struct list_head reset_list;
+ struct work_struct gpu_reset_work;
+
long gfx_timeout;
long sdma_timeout;
long video_timeout;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index af9bdf16eefd..e29e58240869 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -72,6 +72,7 @@
#include <linux/pm_runtime.h>
#include <drm/drm_drv.h>
+#include <drm/drm_sysfs.h>
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct
amdgpu_device *adev)
return amdgpu_device_asic_has_dc_support(adev->asic_type);
}
+static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
+{
+ struct amdgpu_device *adev =
+ container_of(__work, struct amdgpu_device, gpu_reset_work);
+
+ /*
+ * Inform userspace that a GPU reset happened, and it should collect
+ * data from the trace event.
+ */
+ drm_sysfs_gpu_reset_event(&adev->ddev);
+}
+
static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
{
struct amdgpu_device *adev =
@@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_device_delay_enable_gfx_off);
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+ INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
adev->gfx.gfx_off_req_count = 1;
adev->pm.ac_power = power_supply_is_system_supplied() > 0;
--
2.32.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
2022-01-21 20:37 [PATCH 2/4] drm/amdgpu: add work function for GPU reset Sharma, Shashank
@ 2022-01-24 7:17 ` Christian König
2022-01-24 16:46 ` Sharma, Shashank
0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2022-01-24 7:17 UTC (permalink / raw)
To: Sharma, Shashank, amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath
Am 21.01.22 um 21:37 schrieb Sharma, Shashank:
> From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
> From: Shashank Sharma <shashank.sharma@amd.com>
> Date: Fri, 21 Jan 2022 17:33:10 +0100
> Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
>
> This patch adds a new work function, which will get scheduled
> in event of a GPU reset, and will send a uevent to indicate the same.
> The userspace can do some post-processing work like collecting data
> from a trace event.
>
> Cc: Alexander Deucher <alexander.deucher@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
> 2 files changed, 16 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 269437b01328..79192f43bb71 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1057,6 +1057,8 @@ struct amdgpu_device {
> struct work_struct xgmi_reset_work;
> struct list_head reset_list;
>
> + struct work_struct gpu_reset_work;
This needs a different name. "gpu_reset_work" would indicate that it
does the GPU reset, but this really only signals the reset to userspace.
Christian.
> +
> long gfx_timeout;
> long sdma_timeout;
> long video_timeout;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index af9bdf16eefd..e29e58240869 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -72,6 +72,7 @@
> #include <linux/pm_runtime.h>
>
> #include <drm/drm_drv.h>
> +#include <drm/drm_sysfs.h>
>
> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
> MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> @@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct
> amdgpu_device *adev)
> return amdgpu_device_asic_has_dc_support(adev->asic_type);
> }
>
> +static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
> +{
> + struct amdgpu_device *adev =
> + container_of(__work, struct amdgpu_device, gpu_reset_work);
> +
> + /*
> + * Inform userspace that a GPU reset happened, and it should collect
> + * data from the trace event.
> + */
> + drm_sysfs_gpu_reset_event(&adev->ddev);
> +}
> +
> static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
> {
> struct amdgpu_device *adev =
> @@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> amdgpu_device_delay_enable_gfx_off);
>
> INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
> + INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
>
> adev->gfx.gfx_off_req_count = 1;
> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
2022-01-24 7:17 ` Christian König
@ 2022-01-24 16:46 ` Sharma, Shashank
2022-01-24 16:49 ` Christian König
0 siblings, 1 reply; 5+ messages in thread
From: Sharma, Shashank @ 2022-01-24 16:46 UTC (permalink / raw)
To: Christian König, amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath
On 1/24/2022 8:17 AM, Christian König wrote:
> Am 21.01.22 um 21:37 schrieb Sharma, Shashank:
>> From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
>> From: Shashank Sharma <shashank.sharma@amd.com>
>> Date: Fri, 21 Jan 2022 17:33:10 +0100
>> Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
>>
>> This patch adds a new work function, which will get scheduled
>> in event of a GPU reset, and will send a uevent to indicate the same.
>> The userspace can do some post-processing work like collecting data
>> from a trace event.
>>
>> Cc: Alexander Deucher <alexander.deucher@amd.com>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
>> 2 files changed, 16 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 269437b01328..79192f43bb71 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1057,6 +1057,8 @@ struct amdgpu_device {
>> struct work_struct xgmi_reset_work;
>> struct list_head reset_list;
>>
>> + struct work_struct gpu_reset_work;
>
> This needs a different name. "gpu_reset_work" would indicate that it
> does the GPU reset, but this really only signals the reset to userspace.
>
Agree, let me come back with something like gpu_reset_housekeeping or
something less weird that that :)
> Christian.
>
>> +
>> long gfx_timeout;
>> long sdma_timeout;
>> long video_timeout;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index af9bdf16eefd..e29e58240869 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -72,6 +72,7 @@
>> #include <linux/pm_runtime.h>
>>
>> #include <drm/drm_drv.h>
>> +#include <drm/drm_sysfs.h>
>>
>> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>> MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>> @@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct
>> amdgpu_device *adev)
>> return amdgpu_device_asic_has_dc_support(adev->asic_type);
>> }
>>
>> +static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
>> +{
>> + struct amdgpu_device *adev =
>> + container_of(__work, struct amdgpu_device, gpu_reset_work);
>> +
>> + /*
>> + * Inform userspace that a GPU reset happened, and it should collect
>> + * data from the trace event.
>> + */
>> + drm_sysfs_gpu_reset_event(&adev->ddev);
>> +}
>> +
>> static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
>> {
>> struct amdgpu_device *adev =
>> @@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>> amdgpu_device_delay_enable_gfx_off);
>>
>> INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>> + INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
>>
>> adev->gfx.gfx_off_req_count = 1;
>> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
2022-01-24 16:46 ` Sharma, Shashank
@ 2022-01-24 16:49 ` Christian König
2022-01-24 16:52 ` Sharma, Shashank
0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2022-01-24 16:49 UTC (permalink / raw)
To: Sharma, Shashank, amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath
Am 24.01.22 um 17:46 schrieb Sharma, Shashank:
>
>
> On 1/24/2022 8:17 AM, Christian König wrote:
>> Am 21.01.22 um 21:37 schrieb Sharma, Shashank:
>>> From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
>>> From: Shashank Sharma <shashank.sharma@amd.com>
>>> Date: Fri, 21 Jan 2022 17:33:10 +0100
>>> Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
>>>
>>> This patch adds a new work function, which will get scheduled
>>> in event of a GPU reset, and will send a uevent to indicate the same.
>>> The userspace can do some post-processing work like collecting data
>>> from a trace event.
>>>
>>> Cc: Alexander Deucher <alexander.deucher@amd.com>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
>>> 2 files changed, 16 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 269437b01328..79192f43bb71 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1057,6 +1057,8 @@ struct amdgpu_device {
>>> struct work_struct xgmi_reset_work;
>>> struct list_head reset_list;
>>>
>>> + struct work_struct gpu_reset_work;
>>
>> This needs a different name. "gpu_reset_work" would indicate that it
>> does the GPU reset, but this really only signals the reset to userspace.
>>
> Agree, let me come back with something like gpu_reset_housekeeping or
> something less weird that that :)
How about send_gpu_reset_signal ?
Christian.
>
>> Christian.
>>
>>> +
>>> long gfx_timeout;
>>> long sdma_timeout;
>>> long video_timeout;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index af9bdf16eefd..e29e58240869 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -72,6 +72,7 @@
>>> #include <linux/pm_runtime.h>
>>>
>>> #include <drm/drm_drv.h>
>>> +#include <drm/drm_sysfs.h>
>>>
>>> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>> MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>> @@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct
>>> amdgpu_device *adev)
>>> return amdgpu_device_asic_has_dc_support(adev->asic_type);
>>> }
>>>
>>> +static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
>>> +{
>>> + struct amdgpu_device *adev =
>>> + container_of(__work, struct amdgpu_device, gpu_reset_work);
>>> +
>>> + /*
>>> + * Inform userspace that a GPU reset happened, and it should
>>> collect
>>> + * data from the trace event.
>>> + */
>>> + drm_sysfs_gpu_reset_event(&adev->ddev);
>>> +}
>>> +
>>> static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
>>> {
>>> struct amdgpu_device *adev =
>>> @@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device
>>> *adev,
>>> amdgpu_device_delay_enable_gfx_off);
>>>
>>> INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>>> + INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
>>>
>>> adev->gfx.gfx_off_req_count = 1;
>>> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
2022-01-24 16:49 ` Christian König
@ 2022-01-24 16:52 ` Sharma, Shashank
0 siblings, 0 replies; 5+ messages in thread
From: Sharma, Shashank @ 2022-01-24 16:52 UTC (permalink / raw)
To: Christian König, amd-gfx; +Cc: Deucher, Alexander, Somalapuram Amaranath
On 1/24/2022 5:49 PM, Christian König wrote:
>
>
> Am 24.01.22 um 17:46 schrieb Sharma, Shashank:
>>
>>
>> On 1/24/2022 8:17 AM, Christian König wrote:
>>> Am 21.01.22 um 21:37 schrieb Sharma, Shashank:
>>>> From c598dd586dd15fc5ae0a883a2e6f4094ec024085 Mon Sep 17 00:00:00 2001
>>>> From: Shashank Sharma <shashank.sharma@amd.com>
>>>> Date: Fri, 21 Jan 2022 17:33:10 +0100
>>>> Subject: [PATCH 2/4] drm/amdgpu: add work function for GPU reset
>>>>
>>>> This patch adds a new work function, which will get scheduled
>>>> in event of a GPU reset, and will send a uevent to indicate the same.
>>>> The userspace can do some post-processing work like collecting data
>>>> from a trace event.
>>>>
>>>> Cc: Alexander Deucher <alexander.deucher@amd.com>
>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
>>>> 2 files changed, 16 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 269437b01328..79192f43bb71 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -1057,6 +1057,8 @@ struct amdgpu_device {
>>>> struct work_struct xgmi_reset_work;
>>>> struct list_head reset_list;
>>>>
>>>> + struct work_struct gpu_reset_work;
>>>
>>> This needs a different name. "gpu_reset_work" would indicate that it
>>> does the GPU reset, but this really only signals the reset to userspace.
>>>
>> Agree, let me come back with something like gpu_reset_housekeeping or
>> something less weird that that :)
>
> How about send_gpu_reset_signal ?
Yes, sure, that is exactly what is is supposed to do. I will accommodate
this in next version.
- Shashank
>
> Christian.
>
>>
>>> Christian.
>>>
>>>> +
>>>> long gfx_timeout;
>>>> long sdma_timeout;
>>>> long video_timeout;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index af9bdf16eefd..e29e58240869 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -72,6 +72,7 @@
>>>> #include <linux/pm_runtime.h>
>>>>
>>>> #include <drm/drm_drv.h>
>>>> +#include <drm/drm_sysfs.h>
>>>>
>>>> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>>> MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>>> @@ -3274,6 +3275,18 @@ bool amdgpu_device_has_dc_support(struct
>>>> amdgpu_device *adev)
>>>> return amdgpu_device_asic_has_dc_support(adev->asic_type);
>>>> }
>>>>
>>>> +static void amdgpu_device_gpu_reset_func(struct work_struct *__work)
>>>> +{
>>>> + struct amdgpu_device *adev =
>>>> + container_of(__work, struct amdgpu_device, gpu_reset_work);
>>>> +
>>>> + /*
>>>> + * Inform userspace that a GPU reset happened, and it should
>>>> collect
>>>> + * data from the trace event.
>>>> + */
>>>> + drm_sysfs_gpu_reset_event(&adev->ddev);
>>>> +}
>>>> +
>>>> static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
>>>> {
>>>> struct amdgpu_device *adev =
>>>> @@ -3506,6 +3519,7 @@ int amdgpu_device_init(struct amdgpu_device
>>>> *adev,
>>>> amdgpu_device_delay_enable_gfx_off);
>>>>
>>>> INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>>>> + INIT_WORK(&adev->gpu_reset_work, amdgpu_device_gpu_reset_func);
>>>>
>>>> adev->gfx.gfx_off_req_count = 1;
>>>> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-01-24 16:53 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-21 20:37 [PATCH 2/4] drm/amdgpu: add work function for GPU reset Sharma, Shashank
2022-01-24 7:17 ` Christian König
2022-01-24 16:46 ` Sharma, Shashank
2022-01-24 16:49 ` Christian König
2022-01-24 16:52 ` Sharma, Shashank
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.