From: "Christian König" <christian.koenig@amd.com>
To: Dennis Li <Dennis.Li@amd.com>,
amd-gfx@lists.freedesktop.org, Alexander.Deucher@amd.com,
felix.kuehling@amd.com, Hawking.Zhang@amd.com
Subject: Re: [PATCH 2/4] drm/amdgpu: refine the GPU recovery sequence
Date: Thu, 18 Mar 2021 08:56:55 +0100 [thread overview]
Message-ID: <123ba10f-f0d7-bbf8-8e99-7948cfb5024d@amd.com> (raw)
In-Reply-To: <20210318072339.28736-3-Dennis.Li@amd.com>
Am 18.03.21 um 08:23 schrieb Dennis Li:
> Changed to only set in_gpu_reset as 1 when the recovery thread begin,
> and delay hold reset_sem after pre-reset but before reset. It make sure
> that other threads have exited or been blocked before doing GPU reset.
> Compared with the old codes, it could make some threads exit more early
> without waiting for timeout.
>
> Introduce a event recovery_fini_event which is used to block new threads
> when recovery thread has begun. These threads are only waked up when recovery
> thread exit.
>
> v2: remove codes to check the usage of adev->reset_sem, because lockdep
> will show all locks held in the system, when system detect hung timeout
> in the recovery thread.
>
> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 02a34f9a26aa..67c716e5ee8d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1044,6 +1044,8 @@ struct amdgpu_device {
> atomic_t in_gpu_reset;
> enum pp_mp1_state mp1_state;
> struct rw_semaphore reset_sem;
> + wait_queue_head_t recovery_fini_event;
> +
> struct amdgpu_doorbell_index doorbell_index;
>
> struct mutex notifier_lock;
> @@ -1406,4 +1408,8 @@ static inline int amdgpu_in_reset(struct amdgpu_device *adev)
> {
> return atomic_read(&adev->in_gpu_reset);
> }
> +
> +int amdgpu_read_lock(struct drm_device *dev, bool interruptible);
> +void amdgpu_read_unlock(struct drm_device *dev);
> +
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 24ff5992cb02..15235610cc54 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -211,6 +211,60 @@ static ssize_t amdgpu_device_get_serial_number(struct device *dev,
> static DEVICE_ATTR(serial_number, S_IRUGO,
> amdgpu_device_get_serial_number, NULL);
>
> +int amdgpu_read_lock(struct drm_device *dev, bool interruptible)
> +{
> + struct amdgpu_device *adev = drm_to_adev(dev);
> + int ret = 0;
> +
> + /**
> + * if a thread hold the read lock, but recovery thread has started,
> + * it should release the read lock and wait for recovery thread finished
> + * Because pre-reset functions have begun, which stops old threads but no
> + * include the current thread.
> + */
> + if (interruptible) {
> + while (!(ret = down_read_killable(&adev->reset_sem)) &&
> + amdgpu_in_reset(adev)) {
> + up_read(&adev->reset_sem);
> + ret = wait_event_interruptible(adev->recovery_fini_event,
> + !amdgpu_in_reset(adev));
> + if (ret)
> + break;
> + }
> + } else {
> + down_read(&adev->reset_sem);
> + while (amdgpu_in_reset(adev)) {
> + up_read(&adev->reset_sem);
> + wait_event(adev->recovery_fini_event,
> + !amdgpu_in_reset(adev));
> + down_read(&adev->reset_sem);
> + }
> + }
Ok once more. This general approach is a NAK. We have already tried this
and it doesn't work.
All you do here is to replace the gpu reset lock with
wait_event_interruptible().
From an upstream perspective that is strictly illegal since it will
just prevent lockdep warning from filling the logs and doesn't really
solve any problem.
Regards,
Christian.
> +
> + return ret;
> +}
> +
> +void amdgpu_read_unlock(struct drm_device *dev)
> +{
> + struct amdgpu_device *adev = drm_to_adev(dev);
> +
> + up_read(&adev->reset_sem);
> +}
> +
> +static void amdgpu_write_lock(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
> +{
> + if (hive) {
> + down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
> + } else {
> + down_write(&adev->reset_sem);
> + }
> +}
> +
> +static void amdgpu_write_unlock(struct amdgpu_device *adev)
> +{
> + up_write(&adev->reset_sem);
> +}
> +
> /**
> * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control
> *
> @@ -3280,6 +3334,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> hash_init(adev->mn_hash);
> atomic_set(&adev->in_gpu_reset, 0);
> init_rwsem(&adev->reset_sem);
> + init_waitqueue_head(&adev->recovery_fini_event);
> mutex_init(&adev->psp.mutex);
> mutex_init(&adev->notifier_lock);
>
> @@ -4509,39 +4564,18 @@ int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
> return r;
> }
>
> -static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
> - struct amdgpu_hive_info *hive)
> +static bool amdgpu_device_recovery_enter(struct amdgpu_device *adev)
> {
> if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
> return false;
>
> - if (hive) {
> - down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
> - } else {
> - down_write(&adev->reset_sem);
> - }
> -
> - switch (amdgpu_asic_reset_method(adev)) {
> - case AMD_RESET_METHOD_MODE1:
> - adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
> - break;
> - case AMD_RESET_METHOD_MODE2:
> - adev->mp1_state = PP_MP1_STATE_RESET;
> - break;
> - default:
> - adev->mp1_state = PP_MP1_STATE_NONE;
> - break;
> - }
> -
> return true;
> }
>
> -static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
> +static void amdgpu_device_recovery_exit(struct amdgpu_device *adev)
> {
> - amdgpu_vf_error_trans_all(adev);
> - adev->mp1_state = PP_MP1_STATE_NONE;
> atomic_set(&adev->in_gpu_reset, 0);
> - up_write(&adev->reset_sem);
> + wake_up_interruptible_all(&adev->recovery_fini_event);
> }
>
> /*
> @@ -4550,7 +4584,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
> *
> * unlock won't require roll back.
> */
> -static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
> +static int amdgpu_hive_recovery_enter(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
> {
> struct amdgpu_device *tmp_adev = NULL;
>
> @@ -4560,10 +4594,10 @@ static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgp
> return -ENODEV;
> }
> list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> - if (!amdgpu_device_lock_adev(tmp_adev, hive))
> + if (!amdgpu_device_recovery_enter(tmp_adev))
> goto roll_back;
> }
> - } else if (!amdgpu_device_lock_adev(adev, hive))
> + } else if (!amdgpu_device_recovery_enter(adev))
> return -EAGAIN;
>
> return 0;
> @@ -4578,12 +4612,61 @@ static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgp
> */
> dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
> list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> - amdgpu_device_unlock_adev(tmp_adev);
> + amdgpu_device_recovery_exit(tmp_adev);
> }
> }
> return -EAGAIN;
> }
>
> +static void amdgpu_device_lock_adev(struct amdgpu_device *adev,
> + struct amdgpu_hive_info *hive)
> +{
> + amdgpu_write_lock(adev, hive);
> +
> + switch (amdgpu_asic_reset_method(adev)) {
> + case AMD_RESET_METHOD_MODE1:
> + adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
> + break;
> + case AMD_RESET_METHOD_MODE2:
> + adev->mp1_state = PP_MP1_STATE_RESET;
> + break;
> + default:
> + adev->mp1_state = PP_MP1_STATE_NONE;
> + break;
> + }
> +}
> +
> +static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
> +{
> + amdgpu_vf_error_trans_all(adev);
> + adev->mp1_state = PP_MP1_STATE_NONE;
> + amdgpu_write_unlock(adev);
> +}
> +
> +/*
> + * to lockup a list of amdgpu devices in a hive safely, if not a hive
> + * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
> + *
> + * unlock won't require roll back.
> + */
> +static void amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
> +{
> + struct amdgpu_device *tmp_adev = NULL;
> +
> + if (adev->gmc.xgmi.num_physical_nodes > 1) {
> + if (!hive) {
> + dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
> + return;
> + }
> +
> + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> + amdgpu_device_lock_adev(tmp_adev, hive);
> + }
> + } else {
> + amdgpu_device_lock_adev(adev, hive);
> + }
> +}
> +
> static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
> {
> struct pci_dev *p = NULL;
> @@ -4732,6 +4815,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
> bool need_emergency_restart = false;
> bool audio_suspended = false;
> int tmp_vram_lost_counter;
> + bool locked = false;
>
> /*
> * Special case: RAS triggered and full reset isn't supported
> @@ -4777,7 +4861,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
> * if didn't get the device lock, don't touch the linked list since
> * others may iterating it.
> */
> - r = amdgpu_device_lock_hive_adev(adev, hive);
> + r = amdgpu_hive_recovery_enter(adev, hive);
> if (r) {
> dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
> job ? job->base.id : -1);
> @@ -4884,6 +4968,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
> }
>
> tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
> + /*
> + * Pre reset functions called before lock, which make sure other threads
> + * who own reset lock exit successfully. No other thread runs in the driver
> + * while the recovery thread runs
> + */
> + if (!locked) {
> + amdgpu_device_lock_hive_adev(adev, hive);
> + locked = true;
> + }
> +
> /* Actual ASIC resets if needed.*/
> /* TODO Implement XGMI hive reset logic for SRIOV */
> if (amdgpu_sriov_vf(adev)) {
> @@ -4955,7 +5049,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>
> if (audio_suspended)
> amdgpu_device_resume_display_audio(tmp_adev);
> - amdgpu_device_unlock_adev(tmp_adev);
> + amdgpu_device_recovery_exit(tmp_adev);
> + if (locked)
> + amdgpu_device_unlock_adev(tmp_adev);
> }
>
> skip_recovery:
> @@ -5199,9 +5295,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
> * Locking adev->reset_sem will prevent any external access
> * to GPU during PCI error recovery
> */
> - while (!amdgpu_device_lock_adev(adev, NULL))
> + while (!amdgpu_device_recovery_enter(adev))
> amdgpu_cancel_all_tdr(adev);
>
> + amdgpu_device_lock_adev(adev, NULL);
> /*
> * Block any work scheduling as we do for regular GPU reset
> * for the duration of the recovery
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
next prev parent reply other threads:[~2021-03-18 7:57 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-18 7:23 [PATCH 0/4] Refine GPU recovery sequence to enhance its stability Dennis Li
2021-03-18 7:23 ` [PATCH 1/4] drm/amdgpu: remove reset lock from low level functions Dennis Li
2021-03-18 7:23 ` [PATCH 2/4] drm/amdgpu: refine the GPU recovery sequence Dennis Li
2021-03-18 7:56 ` Christian König [this message]
2021-03-18 7:23 ` [PATCH 3/4] drm/amdgpu: instead of using down/up_read directly Dennis Li
2021-03-18 7:23 ` [PATCH 4/4] drm/amdkfd: add reset lock protection for kfd entry functions Dennis Li
2021-03-18 7:53 ` [PATCH 0/4] Refine GPU recovery sequence to enhance its stability Christian König
2021-03-18 8:28 ` Li, Dennis
2021-03-18 8:58 ` AW: " Koenig, Christian
2021-03-18 9:30 ` Li, Dennis
2021-03-18 9:51 ` Christian König
2021-04-05 17:58 ` Andrey Grodzovsky
2021-04-06 10:34 ` Christian König
2021-04-06 11:21 ` Christian König
2021-04-06 21:22 ` Andrey Grodzovsky
2021-04-07 10:28 ` Christian König
2021-04-07 19:44 ` Andrey Grodzovsky
2021-04-08 8:22 ` Christian König
2021-04-08 8:32 ` Christian König
2021-04-08 16:08 ` Andrey Grodzovsky
2021-04-08 18:58 ` Christian König
2021-04-08 20:39 ` Andrey Grodzovsky
2021-04-09 6:53 ` Christian König
2021-04-09 7:01 ` Christian König
2021-04-09 15:42 ` Andrey Grodzovsky
2021-04-09 16:39 ` Christian König
2021-04-09 18:18 ` Andrey Grodzovsky
2021-04-10 17:34 ` Christian König
2021-04-12 17:27 ` Andrey Grodzovsky
2021-04-12 17:44 ` Christian König
2021-04-12 18:01 ` Andrey Grodzovsky
2021-04-12 18:05 ` Christian König
2021-04-12 18:18 ` Andrey Grodzovsky
2021-04-12 18:23 ` Christian König
2021-04-12 19:12 ` Andrey Grodzovsky
2021-04-12 19:18 ` Christian König
2021-04-12 20:01 ` Andrey Grodzovsky
2021-04-13 7:10 ` Christian König
2021-04-13 9:13 ` Li, Dennis
2021-04-13 9:14 ` Christian König
2021-04-13 20:08 ` Daniel Vetter
2021-04-13 15:12 ` Andrey Grodzovsky
2021-04-13 18:03 ` Christian König
2021-04-13 18:18 ` Andrey Grodzovsky
2021-04-13 18:25 ` Christian König
2021-04-13 18:30 ` Andrey Grodzovsky
2021-04-14 7:01 ` Christian König
2021-04-14 14:36 ` Andrey Grodzovsky
2021-04-14 14:58 ` Christian König
2021-04-15 6:27 ` Andrey Grodzovsky
2021-04-15 7:02 ` Christian König
2021-04-15 14:11 ` Andrey Grodzovsky
2021-04-15 15:09 ` Christian König
2021-04-13 20:07 ` Daniel Vetter
2021-04-13 5:36 ` Andrey Grodzovsky
2021-04-13 7:07 ` Christian König
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=123ba10f-f0d7-bbf8-8e99-7948cfb5024d@amd.com \
--to=christian.koenig@amd.com \
--cc=Alexander.Deucher@amd.com \
--cc=Dennis.Li@amd.com \
--cc=Hawking.Zhang@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=felix.kuehling@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).