All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <christian.koenig@amd.com>
To: Andrey Grodzovsky <andrey.grodzovsky@amd.com>,
	amd-gfx@lists.freedesktop.org
Cc: Zoy.Bai@amd.com, lijo.lazar@amd.com
Subject: Re: [PATCH v3 5/7] drm/amdgpu: Add work_struct for GPU reset from kfd.
Date: Mon, 30 May 2022 09:54:34 +0200	[thread overview]
Message-ID: <4fdeb19a-35e3-da7b-380f-ff5efda7b4b4@amd.com> (raw)
In-Reply-To: <20220525190447.239867-6-andrey.grodzovsky@amd.com>

Am 25.05.22 um 21:04 schrieb Andrey Grodzovsky:
> We need to have a work_struct to cancel this reset if another
> already in progress.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 ++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 31 ----------------------
>   3 files changed, 15 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 1f8161cd507f..a23abc0e86e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -33,6 +33,7 @@
>   #include <uapi/linux/kfd_ioctl.h>
>   #include "amdgpu_ras.h"
>   #include "amdgpu_umc.h"
> +#include "amdgpu_reset.h"
>   
>   /* Total memory size in system memory and all GPU VRAM. Used to
>    * estimate worst case amount of memory to reserve for page tables
> @@ -122,6 +123,15 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
>   	}
>   }
>   
> +
> +static void amdgpu_amdkfd_reset_work(struct work_struct *work)
> +{
> +	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
> +						  kfd.reset_work);
> +
> +	amdgpu_device_gpu_recover_imp(adev, NULL);
> +}
> +
>   void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   {
>   	int i;
> @@ -180,6 +190,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   
>   		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
>   						adev_to_drm(adev), &gpu_resources);
> +
> +		INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
>   	}
>   }
>   
> @@ -247,7 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
>   void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
>   {
>   	if (amdgpu_device_should_recover_gpu(adev))
> -		amdgpu_device_gpu_recover(adev, NULL);
> +		amdgpu_reset_domain_schedule(adev->reset_domain,
> +					     &adev->kfd.reset_work);
>   }
>   
>   int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index f8b9f27adcf5..e0709af5a326 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -96,6 +96,7 @@ struct amdgpu_kfd_dev {
>   	struct kfd_dev *dev;
>   	uint64_t vram_used;
>   	bool init_complete;
> +	struct work_struct reset_work;
>   };
>   
>   enum kgd_engine_type {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bfdd8883089a..e3e2a5d17cc2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -5312,37 +5312,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
>   	return r;
>   }
>   
> -struct amdgpu_recover_work_struct {
> -	struct work_struct base;
> -	struct amdgpu_device *adev;
> -	struct amdgpu_job *job;
> -	int ret;
> -};
> -
> -static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
> -{
> -	struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
> -
> -	amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
> -}
> -/*
> - * Serialize gpu recover into reset domain single threaded wq
> - */
> -int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
> -				    struct amdgpu_job *job)
> -{
> -	struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
> -
> -	INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
> -
> -	if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
> -		return -EAGAIN;
> -
> -	flush_work(&work.base);
> -
> -	return atomic_read(&adev->reset_domain->reset_res);
> -}
> -
>   /**
>    * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
>    *


  reply	other threads:[~2022-05-30  7:54 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-25 19:04 [PATCH v3 0/7] Fix multiple GPU resets in XGMI hive Andrey Grodzovsky
2022-05-25 19:04 ` [PATCH v3 1/7] Revert "workqueue: remove unused cancel_work()" Andrey Grodzovsky
2022-05-25 19:04 ` [PATCH v3 2/7] drm/amdgpu: Cache result of last reset at reset domain level Andrey Grodzovsky
2022-05-30  7:47   ` Christian König
2022-05-25 19:04 ` [PATCH v3 3/7] drm/admgpu: Serialize RAS recovery work directly into reset domain queue Andrey Grodzovsky
2022-05-30  7:49   ` Christian König
2022-05-31  3:02     ` Luben Tuikov
2022-05-25 19:04 ` [PATCH v3 4/7] drm/amdgpu: Add work_struct for GPU reset from debugfs Andrey Grodzovsky
2022-05-30  7:52   ` Christian König
2022-05-30 15:46     ` Andrey Grodzovsky
2022-05-25 19:04 ` [PATCH v3 5/7] drm/amdgpu: Add work_struct for GPU reset from kfd Andrey Grodzovsky
2022-05-30  7:54   ` Christian König [this message]
2022-05-31 15:31   ` Felix Kuehling
2022-05-25 19:04 ` [PATCH v3 6/7] drm/amdgpu: Rename amdgpu_device_gpu_recover_imp back to amdgpu_device_gpu_recover Andrey Grodzovsky
2022-05-30  7:55   ` Christian König
2022-05-25 19:04 ` [PATCH v3 7/7] drm/amdgpu: Stop any pending reset if another in progress Andrey Grodzovsky
2022-05-30  7:56   ` Christian König
2022-05-31 15:31   ` Felix Kuehling
2022-05-31 15:35     ` Felix Kuehling

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4fdeb19a-35e3-da7b-380f-ff5efda7b4b4@amd.com \
    --to=christian.koenig@amd.com \
    --cc=Zoy.Bai@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=andrey.grodzovsky@amd.com \
    --cc=lijo.lazar@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.