* [PATCH] drm/amdgpu: defer test IBs on the rings at boot
@ 2018-04-13 4:07 Shirish S
[not found] ` <1523592440-16460-1-git-send-email-shirish.s-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Shirish S @ 2018-04-13 4:07 UTC (permalink / raw)
To: Alexander.Deucher-5C7GfCeVMHo,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
Christian.Koenig-5C7GfCeVMHo
Cc: Shirish S
amdgpu_ib_ring_tests() runs test IB's on rings at boot
contributes to ~500 ms of amdgpu driver's boot time.
This patch defers it and adds a check to report
in amdgpu_info_ioctl() if it was scheduled or not.
Signed-off-by: Shirish S <shirish.s@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +++++++++++++++++---
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5734871..ae8f722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1611,6 +1611,8 @@ struct amdgpu_device {
/* delayed work_func for deferring clockgating during resume */
struct delayed_work late_init_work;
+ /* delayed work_func to defer testing IB's on rings during boot */
+ struct delayed_work late_init_test_ib_work;
struct amdgpu_virt virt;
/* firmware VRAM reservation */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1762eb4..e65a5e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
+#define AMDGPU_IB_TEST_SCHED_MS 2000
static const char *amdgpu_asic_name[] = {
"TAHITI",
@@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
}
}
+static void amdgpu_device_late_init_test_ib_func_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, late_init_test_ib_work.work);
+ int r = amdgpu_ib_ring_tests(adev);
+
+ if (r)
+ DRM_ERROR("ib ring test failed (%d).\n", r);
+}
+
/**
* amdgpu_device_has_dc_support - check if dc is supported
*
@@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(&adev->ring_lru_list);
spin_lock_init(&adev->ring_lru_list_lock);
+ INIT_DELAYED_WORK(&adev->late_init_test_ib_work,
+ amdgpu_device_late_init_test_ib_func_handler);
INIT_DELAYED_WORK(&adev->late_init_work,
amdgpu_device_ip_late_init_func_handler);
@@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
- r = amdgpu_ib_ring_tests(adev);
- if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
+ /* Schedule amdgpu_ib_ring_tests() */
+ mod_delayed_work(system_wq, &adev->late_init_test_ib_work,
+ msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
@@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
}
adev->accel_working = false;
cancel_delayed_work_sync(&adev->late_init_work);
+ cancel_delayed_work_sync(&adev->late_init_test_ib_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 487d39e..057bd9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
+ if (delayed_work_pending(&adev->late_init_test_ib_work))
+ DRM_ERROR("IB test on ring not executed\n");
+
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot
[not found] ` <1523592440-16460-1-git-send-email-shirish.s-5C7GfCeVMHo@public.gmane.org>
@ 2018-04-13 6:23 ` Christian König
[not found] ` <92585f86-7b1f-a310-7ad1-058f56e235ab-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2018-04-13 6:23 UTC (permalink / raw)
To: Shirish S, Alexander.Deucher-5C7GfCeVMHo,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 13.04.2018 um 06:07 schrieb Shirish S:
> amdgpu_ib_ring_tests() runs test IB's on rings at boot
> contributes to ~500 ms of amdgpu driver's boot time.
>
> This patch defers it and adds a check to report
> in amdgpu_info_ioctl() if it was scheduled or not.
That is rather suboptimal, but see below.
>
> Signed-off-by: Shirish S <shirish.s@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +++++++++++++++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
> 3 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 5734871..ae8f722 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1611,6 +1611,8 @@ struct amdgpu_device {
>
> /* delayed work_func for deferring clockgating during resume */
> struct delayed_work late_init_work;
> + /* delayed work_func to defer testing IB's on rings during boot */
> + struct delayed_work late_init_test_ib_work;
You must put the IB test into the late_init_work as well, otherwise the
two delayed workers can race with each other.
>
> struct amdgpu_virt virt;
> /* firmware VRAM reservation */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 1762eb4..e65a5e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>
> #define AMDGPU_RESUME_MS 2000
> +#define AMDGPU_IB_TEST_SCHED_MS 2000
>
> static const char *amdgpu_asic_name[] = {
> "TAHITI",
> @@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
> }
> }
>
> +static void amdgpu_device_late_init_test_ib_func_handler(struct work_struct *work)
> +{
> + struct amdgpu_device *adev =
> + container_of(work, struct amdgpu_device, late_init_test_ib_work.work);
> + int r = amdgpu_ib_ring_tests(adev);
> +
> + if (r)
> + DRM_ERROR("ib ring test failed (%d).\n", r);
> +}
> +
> /**
> * amdgpu_device_has_dc_support - check if dc is supported
> *
> @@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> INIT_LIST_HEAD(&adev->ring_lru_list);
> spin_lock_init(&adev->ring_lru_list_lock);
>
> + INIT_DELAYED_WORK(&adev->late_init_test_ib_work,
> + amdgpu_device_late_init_test_ib_func_handler);
> INIT_DELAYED_WORK(&adev->late_init_work,
> amdgpu_device_ip_late_init_func_handler);
>
> @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> goto failed;
> }
>
> - r = amdgpu_ib_ring_tests(adev);
> - if (r)
> - DRM_ERROR("ib ring test failed (%d).\n", r);
> + /* Schedule amdgpu_ib_ring_tests() */
> + mod_delayed_work(system_wq, &adev->late_init_test_ib_work,
> + msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
That doesn't work like you intended. mod_delayed_work() overrides the
existing handler.
What you wanted to use is queue_delayed_work(), but as I said we should
only have one delayed worker.
>
> if (amdgpu_sriov_vf(adev))
> amdgpu_virt_init_data_exchange(adev);
> @@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
> }
> adev->accel_working = false;
> cancel_delayed_work_sync(&adev->late_init_work);
> + cancel_delayed_work_sync(&adev->late_init_test_ib_work);
> /* free i2c buses */
> if (!amdgpu_device_has_dc_support(adev))
> amdgpu_i2c_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 487d39e..057bd9a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
> if (!info->return_size || !info->return_pointer)
> return -EINVAL;
>
> + if (delayed_work_pending(&adev->late_init_test_ib_work))
> + DRM_ERROR("IB test on ring not executed\n");
> +
Please use flush_delayed_work() instead of issuing and error here.
Regards,
Christian.
> switch (info->query) {
> case AMDGPU_INFO_ACCEL_WORKING:
> ui32 = adev->accel_working;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot
[not found] ` <92585f86-7b1f-a310-7ad1-058f56e235ab-5C7GfCeVMHo@public.gmane.org>
@ 2018-04-13 7:01 ` S, Shirish
[not found] ` <a296832e-1b2e-45d7-32b9-8f1e8197ce3a-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: S, Shirish @ 2018-04-13 7:01 UTC (permalink / raw)
To: Christian König, Shirish S, Alexander.Deucher-5C7GfCeVMHo,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On 4/13/2018 11:53 AM, Christian König wrote:
> Am 13.04.2018 um 06:07 schrieb Shirish S:
>> amdgpu_ib_ring_tests() runs test IB's on rings at boot
>> contributes to ~500 ms of amdgpu driver's boot time.
>>
>> This patch defers it and adds a check to report
>> in amdgpu_info_ioctl() if it was scheduled or not.
>
> That is rather suboptimal, but see below.
>
Which part is sub-optimal, deferring or checking if the work is scheduled?
>>
>> Signed-off-by: Shirish S <shirish.s@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +++++++++++++++++---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
>> 3 files changed, 22 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 5734871..ae8f722 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1611,6 +1611,8 @@ struct amdgpu_device {
>> /* delayed work_func for deferring clockgating during resume */
>> struct delayed_work late_init_work;
>> + /* delayed work_func to defer testing IB's on rings during boot */
>> + struct delayed_work late_init_test_ib_work;
>
> You must put the IB test into the late_init_work as well, otherwise
> the two delayed workers can race with each other.
>
I thought from the comment above the declaration, its clear why i am
creating 2 work structures.
late_init_work is to optimize resume time and late_init_test_ib_work is
to optimize the boot time.
There cant be race as the context in which they are called are totally
different.
>> struct amdgpu_virt virt;
>> /* firmware VRAM reservation */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 1762eb4..e65a5e6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>> MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>> #define AMDGPU_RESUME_MS 2000
>> +#define AMDGPU_IB_TEST_SCHED_MS 2000
>> static const char *amdgpu_asic_name[] = {
>> "TAHITI",
>> @@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum
>> amd_asic_type asic_type)
>> }
>> }
>> +static void amdgpu_device_late_init_test_ib_func_handler(struct
>> work_struct *work)
>> +{
>> + struct amdgpu_device *adev =
>> + container_of(work, struct amdgpu_device,
>> late_init_test_ib_work.work);
>> + int r = amdgpu_ib_ring_tests(adev);
>> +
>> + if (r)
>> + DRM_ERROR("ib ring test failed (%d).\n", r);
>> +}
>> +
>> /**
>> * amdgpu_device_has_dc_support - check if dc is supported
>> *
>> @@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>> INIT_LIST_HEAD(&adev->ring_lru_list);
>> spin_lock_init(&adev->ring_lru_list_lock);
>> + INIT_DELAYED_WORK(&adev->late_init_test_ib_work,
>> + amdgpu_device_late_init_test_ib_func_handler);
>> INIT_DELAYED_WORK(&adev->late_init_work,
>> amdgpu_device_ip_late_init_func_handler);
>> @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device
>> *adev,
>> goto failed;
>> }
>> - r = amdgpu_ib_ring_tests(adev);
>> - if (r)
>> - DRM_ERROR("ib ring test failed (%d).\n", r);
>> + /* Schedule amdgpu_ib_ring_tests() */
>> + mod_delayed_work(system_wq, &adev->late_init_test_ib_work,
>> + msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
>
> That doesn't work like you intended. mod_delayed_work() overrides the
> existing handler.
>
> What you wanted to use is queue_delayed_work(), but as I said we
> should only have one delayed worker.
mod_delayed_work() is safer and optimal method that replaces
cancel_delayed_work() followed by queue_delayed_work().
(https://lkml.org/lkml/2011/2/3/175)
But if you strongly insist i don't mind changing it.
>
>> if (amdgpu_sriov_vf(adev))
>> amdgpu_virt_init_data_exchange(adev);
>> @@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device
>> *adev)
>> }
>> adev->accel_working = false;
>> cancel_delayed_work_sync(&adev->late_init_work);
>> + cancel_delayed_work_sync(&adev->late_init_test_ib_work);
>> /* free i2c buses */
>> if (!amdgpu_device_has_dc_support(adev))
>> amdgpu_i2c_fini(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 487d39e..057bd9a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device
>> *dev, void *data, struct drm_file
>> if (!info->return_size || !info->return_pointer)
>> return -EINVAL;
>> + if (delayed_work_pending(&adev->late_init_test_ib_work))
>> + DRM_ERROR("IB test on ring not executed\n");
>> +
>
> Please use flush_delayed_work() instead of issuing and error here.
>
Agree, wasn't sure of what to do here :).
So i will re-spin with the flush part added. Hope this reply clarifies
your comments.
Thanks.
Regards,
Shirish S
> Regards,
> Christian.
>
>> switch (info->query) {
>> case AMDGPU_INFO_ACCEL_WORKING:
>> ui32 = adev->accel_working;
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot
[not found] ` <a296832e-1b2e-45d7-32b9-8f1e8197ce3a-5C7GfCeVMHo@public.gmane.org>
@ 2018-04-13 7:08 ` Christian König
[not found] ` <c6ef7e71-18cb-2ced-25d6-14f90070fda0-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2018-04-13 7:08 UTC (permalink / raw)
To: S, Shirish, Shirish S, Alexander.Deucher-5C7GfCeVMHo,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 13.04.2018 um 09:01 schrieb S, Shirish:
>
>
> On 4/13/2018 11:53 AM, Christian König wrote:
>> Am 13.04.2018 um 06:07 schrieb Shirish S:
>>> amdgpu_ib_ring_tests() runs test IB's on rings at boot
>>> contributes to ~500 ms of amdgpu driver's boot time.
>>>
>>> This patch defers it and adds a check to report
>>> in amdgpu_info_ioctl() if it was scheduled or not.
>>
>> That is rather suboptimal, but see below.
>>
> Which part is sub-optimal, deferring or checking if the work is
> scheduled?
That was about the check. We should wait for the test to finish instead
of printing an error and continuing.
>>>
>>> Signed-off-by: Shirish S <shirish.s@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +++++++++++++++++---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
>>> 3 files changed, 22 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 5734871..ae8f722 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1611,6 +1611,8 @@ struct amdgpu_device {
>>> /* delayed work_func for deferring clockgating during resume */
>>> struct delayed_work late_init_work;
>>> + /* delayed work_func to defer testing IB's on rings during boot */
>>> + struct delayed_work late_init_test_ib_work;
>>
>> You must put the IB test into the late_init_work as well, otherwise
>> the two delayed workers can race with each other.
>>
> I thought from the comment above the declaration, its clear why i am
> creating 2 work structures.
> late_init_work is to optimize resume time and late_init_test_ib_work
> is to optimize the boot time.
> There cant be race as the context in which they are called are totally
> different.
Late init enables power and clock gating. If I'm not completely mistaken
we don't do the power/clock gating earlier because we had to wait for
the IB test to finish.
Could be that modern ASICs have additional logic to prevent that, but
the last time I worked on this power gating a block while you run
something on it could even crash the whole system.
>>> struct amdgpu_virt virt;
>>> /* firmware VRAM reservation */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 1762eb4..e65a5e6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>> MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>>> #define AMDGPU_RESUME_MS 2000
>>> +#define AMDGPU_IB_TEST_SCHED_MS 2000
>>> static const char *amdgpu_asic_name[] = {
>>> "TAHITI",
>>> @@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum
>>> amd_asic_type asic_type)
>>> }
>>> }
>>> +static void amdgpu_device_late_init_test_ib_func_handler(struct
>>> work_struct *work)
>>> +{
>>> + struct amdgpu_device *adev =
>>> + container_of(work, struct amdgpu_device,
>>> late_init_test_ib_work.work);
>>> + int r = amdgpu_ib_ring_tests(adev);
>>> +
>>> + if (r)
>>> + DRM_ERROR("ib ring test failed (%d).\n", r);
>>> +}
>>> +
>>> /**
>>> * amdgpu_device_has_dc_support - check if dc is supported
>>> *
>>> @@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device
>>> *adev,
>>> INIT_LIST_HEAD(&adev->ring_lru_list);
>>> spin_lock_init(&adev->ring_lru_list_lock);
>>> + INIT_DELAYED_WORK(&adev->late_init_test_ib_work,
>>> + amdgpu_device_late_init_test_ib_func_handler);
>>> INIT_DELAYED_WORK(&adev->late_init_work,
>>> amdgpu_device_ip_late_init_func_handler);
>>> @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device
>>> *adev,
>>> goto failed;
>>> }
>>> - r = amdgpu_ib_ring_tests(adev);
>>> - if (r)
>>> - DRM_ERROR("ib ring test failed (%d).\n", r);
>>> + /* Schedule amdgpu_ib_ring_tests() */
>>> + mod_delayed_work(system_wq, &adev->late_init_test_ib_work,
>>> + msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
>>
>> That doesn't work like you intended. mod_delayed_work() overrides the
>> existing handler.
>>
>> What you wanted to use is queue_delayed_work(), but as I said we
>> should only have one delayed worker.
> mod_delayed_work() is safer and optimal method that replaces
> cancel_delayed_work() followed by queue_delayed_work().
> (https://lkml.org/lkml/2011/2/3/175)
> But if you strongly insist i don't mind changing it.
Well, mod_delayed_work() does NOT replace queue_delayed_work(). Those
two functions are for different use cases.
The link you posted actually explains it quite well:
> So, cancel_delayed_work() followed by queue_delayed_work() schedules
> the work to be executed at the specified time regardless of the
> current pending state while queue_delayed_work() takes effect iff
> currently the work item is not pending.
queue_delayed_work() takes only effect if the work item is not already
pending/executing.
In other words with queue_delayed_work() you don't risk executing things
twice when the work is already executing.
While with mod_delayed_work() you absolutely make sure to execute things
twice when it is already running.
If I'm not completely mistaken it should not matter in this case, but
queue_delayed_work() is used more commonly I think.
>>
>>> if (amdgpu_sriov_vf(adev))
>>> amdgpu_virt_init_data_exchange(adev);
>>> @@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device
>>> *adev)
>>> }
>>> adev->accel_working = false;
>>> cancel_delayed_work_sync(&adev->late_init_work);
>>> + cancel_delayed_work_sync(&adev->late_init_test_ib_work);
>>> /* free i2c buses */
>>> if (!amdgpu_device_has_dc_support(adev))
>>> amdgpu_i2c_fini(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> index 487d39e..057bd9a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> @@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device
>>> *dev, void *data, struct drm_file
>>> if (!info->return_size || !info->return_pointer)
>>> return -EINVAL;
>>> + if (delayed_work_pending(&adev->late_init_test_ib_work))
>>> + DRM_ERROR("IB test on ring not executed\n");
>>> +
>>
>> Please use flush_delayed_work() instead of issuing and error here.
>>
> Agree, wasn't sure of what to do here :).
> So i will re-spin with the flush part added. Hope this reply clarifies
> your comments.
> Thanks.
> Regards,
> Shirish S
>> Regards,
>> Christian.
>>
>>> switch (info->query) {
>>> case AMDGPU_INFO_ACCEL_WORKING:
>>> ui32 = adev->accel_working;
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu: defer test IBs on the rings at boot
[not found] ` <c6ef7e71-18cb-2ced-25d6-14f90070fda0-5C7GfCeVMHo@public.gmane.org>
@ 2018-04-13 8:38 ` S, Shirish
0 siblings, 0 replies; 5+ messages in thread
From: S, Shirish @ 2018-04-13 8:38 UTC (permalink / raw)
To: Christian König, Shirish S, Alexander.Deucher-5C7GfCeVMHo,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On 4/13/2018 12:38 PM, Christian König wrote:
> Am 13.04.2018 um 09:01 schrieb S, Shirish:
>>
>>
>> On 4/13/2018 11:53 AM, Christian König wrote:
>>> Am 13.04.2018 um 06:07 schrieb Shirish S:
>>>> amdgpu_ib_ring_tests() runs test IB's on rings at boot
>>>> contributes to ~500 ms of amdgpu driver's boot time.
>>>>
>>>> This patch defers it and adds a check to report
>>>> in amdgpu_info_ioctl() if it was scheduled or not.
>>>
>>> That is rather suboptimal, but see below.
>>>
>> Which part is sub-optimal, deferring or checking if the work is
>> scheduled?
>
> That was about the check. We should wait for the test to finish
> instead of printing an error and continuing.
>
Done. Have made this change in V2.
>>>>
>>>> Signed-off-by: Shirish S <shirish.s@amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +++++++++++++++++---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
>>>> 3 files changed, 22 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 5734871..ae8f722 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -1611,6 +1611,8 @@ struct amdgpu_device {
>>>> /* delayed work_func for deferring clockgating during
>>>> resume */
>>>> struct delayed_work late_init_work;
>>>> + /* delayed work_func to defer testing IB's on rings during
>>>> boot */
>>>> + struct delayed_work late_init_test_ib_work;
>>>
>>> You must put the IB test into the late_init_work as well, otherwise
>>> the two delayed workers can race with each other.
>>>
>> I thought from the comment above the declaration, its clear why i am
>> creating 2 work structures.
>> late_init_work is to optimize resume time and late_init_test_ib_work
>> is to optimize the boot time.
>> There cant be race as the context in which they are called are
>> totally different.
>
> Late init enables power and clock gating. If I'm not completely
> mistaken we don't do the power/clock gating earlier because we had to
> wait for the IB test to finish.
>
> Could be that modern ASICs have additional logic to prevent that, but
> the last time I worked on this power gating a block while you run
> something on it could even crash the whole system.
>
>>>> struct amdgpu_virt virt;
>>>> /* firmware VRAM reservation */
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 1762eb4..e65a5e6 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>>> MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>>>> #define AMDGPU_RESUME_MS 2000
>>>> +#define AMDGPU_IB_TEST_SCHED_MS 2000
>>>> static const char *amdgpu_asic_name[] = {
>>>> "TAHITI",
>>>> @@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum
>>>> amd_asic_type asic_type)
>>>> }
>>>> }
>>>> +static void amdgpu_device_late_init_test_ib_func_handler(struct
>>>> work_struct *work)
>>>> +{
>>>> + struct amdgpu_device *adev =
>>>> + container_of(work, struct amdgpu_device,
>>>> late_init_test_ib_work.work);
>>>> + int r = amdgpu_ib_ring_tests(adev);
>>>> +
>>>> + if (r)
>>>> + DRM_ERROR("ib ring test failed (%d).\n", r);
>>>> +}
>>>> +
>>>> /**
>>>> * amdgpu_device_has_dc_support - check if dc is supported
>>>> *
>>>> @@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device
>>>> *adev,
>>>> INIT_LIST_HEAD(&adev->ring_lru_list);
>>>> spin_lock_init(&adev->ring_lru_list_lock);
>>>> + INIT_DELAYED_WORK(&adev->late_init_test_ib_work,
>>>> + amdgpu_device_late_init_test_ib_func_handler);
>>>> INIT_DELAYED_WORK(&adev->late_init_work,
>>>> amdgpu_device_ip_late_init_func_handler);
>>>> @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device
>>>> *adev,
>>>> goto failed;
>>>> }
>>>> - r = amdgpu_ib_ring_tests(adev);
>>>> - if (r)
>>>> - DRM_ERROR("ib ring test failed (%d).\n", r);
>>>> + /* Schedule amdgpu_ib_ring_tests() */
>>>> + mod_delayed_work(system_wq, &adev->late_init_test_ib_work,
>>>> + msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));
>>>
>>> That doesn't work like you intended. mod_delayed_work() overrides
>>> the existing handler.
>>>
>>> What you wanted to use is queue_delayed_work(), but as I said we
>>> should only have one delayed worker.
>> mod_delayed_work() is safer and optimal method that replaces
>> cancel_delayed_work() followed by queue_delayed_work().
>> (https://lkml.org/lkml/2011/2/3/175)
>> But if you strongly insist i don't mind changing it.
>
> Well, mod_delayed_work() does NOT replace queue_delayed_work(). Those
> two functions are for different use cases.
>
> The link you posted actually explains it quite well:
>> So, cancel_delayed_work() followed by queue_delayed_work() schedules
>> the work to be executed at the specified time regardless of the
>> current pending state while queue_delayed_work() takes effect iff
>> currently the work item is not pending.
>
> queue_delayed_work() takes only effect if the work item is not already
> pending/executing.
>
> In other words with queue_delayed_work() you don't risk executing
> things twice when the work is already executing.
>
> While with mod_delayed_work() you absolutely make sure to execute
> things twice when it is already running.
>
> If I'm not completely mistaken it should not matter in this case, but
> queue_delayed_work() is used more commonly I think.
>
Since this work is run only once that too at the boot, we wont have a
case where in it will be required to
check if its pending or not.
Anyway, i have changed mod_delayed_work() to queue_delayed_work() and
added flush_delayed_work() in amdgpu_info_ioctl() in V2.
Thanks.
Regards,
Shirish S
>>>
>>>> if (amdgpu_sriov_vf(adev))
>>>> amdgpu_virt_init_data_exchange(adev);
>>>> @@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device
>>>> *adev)
>>>> }
>>>> adev->accel_working = false;
>>>> cancel_delayed_work_sync(&adev->late_init_work);
>>>> + cancel_delayed_work_sync(&adev->late_init_test_ib_work);
>>>> /* free i2c buses */
>>>> if (!amdgpu_device_has_dc_support(adev))
>>>> amdgpu_i2c_fini(adev);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>> index 487d39e..057bd9a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>> @@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device
>>>> *dev, void *data, struct drm_file
>>>> if (!info->return_size || !info->return_pointer)
>>>> return -EINVAL;
>>>> + if (delayed_work_pending(&adev->late_init_test_ib_work))
>>>> + DRM_ERROR("IB test on ring not executed\n");
>>>> +
>>>
>>> Please use flush_delayed_work() instead of issuing and error here.
>>>
>> Agree, wasn't sure of what to do here :).
>> So i will re-spin with the flush part added. Hope this reply
>> clarifies your comments.
>> Thanks.
>> Regards,
>> Shirish S
>>> Regards,
>>> Christian.
>>>
>>>> switch (info->query) {
>>>> case AMDGPU_INFO_ACCEL_WORKING:
>>>> ui32 = adev->accel_working;
>>>
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2018-04-13 8:38 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-13 4:07 [PATCH] drm/amdgpu: defer test IBs on the rings at boot Shirish S
[not found] ` <1523592440-16460-1-git-send-email-shirish.s-5C7GfCeVMHo@public.gmane.org>
2018-04-13 6:23 ` Christian König
[not found] ` <92585f86-7b1f-a310-7ad1-058f56e235ab-5C7GfCeVMHo@public.gmane.org>
2018-04-13 7:01 ` S, Shirish
[not found] ` <a296832e-1b2e-45d7-32b9-8f1e8197ce3a-5C7GfCeVMHo@public.gmane.org>
2018-04-13 7:08 ` Christian König
[not found] ` <c6ef7e71-18cb-2ced-25d6-14f90070fda0-5C7GfCeVMHo@public.gmane.org>
2018-04-13 8:38 ` S, Shirish
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.