All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: fix svm_bo release invalid wait context warning
@ 2021-11-30 20:13 Philip Yang
  2021-12-01  0:42 ` Felix Kuehling
  2021-12-09 21:18 ` [PATCH v2] " Philip Yang
  0 siblings, 2 replies; 4+ messages in thread
From: Philip Yang @ 2021-11-30 20:13 UTC (permalink / raw)
  To: amd-gfx; +Cc: alex.sierra, Philip Yang, Felix.Kuehling

svm_range_bo_release could be called from __do_munmap put_page
atomic context if process release work has finished to free pranges of
the svm_bo. Schedule release_work to wait for svm_bo eviction work done
and then free svm_bo.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 36 +++++++++++++++++++---------
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 +
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2db49c7a8fd..8af87a662a0d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -327,11 +327,33 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
 	return true;
 }
 
+static void svm_range_bo_wq_release(struct work_struct *work)
+{
+	struct svm_range_bo *svm_bo;
+
+	svm_bo = container_of(work, struct svm_range_bo, release_work);
+	pr_debug("svm_bo 0x%p\n", svm_bo);
+
+	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
+		/* We're not in the eviction worker.
+		 * Signal the fence and synchronize with any
+		 * pending eviction work.
+		 */
+		dma_fence_signal(&svm_bo->eviction_fence->base);
+		cancel_work_sync(&svm_bo->eviction_work);
+	}
+	dma_fence_put(&svm_bo->eviction_fence->base);
+	amdgpu_bo_unref(&svm_bo->bo);
+	kfree(svm_bo);
+}
+
 static void svm_range_bo_release(struct kref *kref)
 {
 	struct svm_range_bo *svm_bo;
 
 	svm_bo = container_of(kref, struct svm_range_bo, kref);
+	pr_debug("svm_bo 0x%p\n", svm_bo);
+
 	spin_lock(&svm_bo->list_lock);
 	while (!list_empty(&svm_bo->range_list)) {
 		struct svm_range *prange =
@@ -352,17 +374,9 @@ static void svm_range_bo_release(struct kref *kref)
 		spin_lock(&svm_bo->list_lock);
 	}
 	spin_unlock(&svm_bo->list_lock);
-	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
-		/* We're not in the eviction worker.
-		 * Signal the fence and synchronize with any
-		 * pending eviction work.
-		 */
-		dma_fence_signal(&svm_bo->eviction_fence->base);
-		cancel_work_sync(&svm_bo->eviction_work);
-	}
-	dma_fence_put(&svm_bo->eviction_fence->base);
-	amdgpu_bo_unref(&svm_bo->bo);
-	kfree(svm_bo);
+
+	INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release);
+	schedule_work(&svm_bo->release_work);
 }
 
 void svm_range_bo_unref(struct svm_range_bo *svm_bo)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 6dc91c33e80f..23478ae7a7d9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -48,6 +48,7 @@ struct svm_range_bo {
 	struct work_struct		eviction_work;
 	struct svm_range_list		*svms;
 	uint32_t			evicting;
+	struct work_struct		release_work;
 };
 
 enum svm_work_list_ops {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdkfd: fix svm_bo release invalid wait context warning
  2021-11-30 20:13 [PATCH] drm/amdkfd: fix svm_bo release invalid wait context warning Philip Yang
@ 2021-12-01  0:42 ` Felix Kuehling
  2021-12-09 21:18 ` [PATCH v2] " Philip Yang
  1 sibling, 0 replies; 4+ messages in thread
From: Felix Kuehling @ 2021-12-01  0:42 UTC (permalink / raw)
  To: Philip Yang, amd-gfx; +Cc: alex.sierra

Am 2021-11-30 um 3:13 p.m. schrieb Philip Yang:
> svm_range_bo_release could be called from __do_munmap put_page
> atomic context if process release work has finished to free pranges of
> the svm_bo. Schedule release_work to wait for svm_bo eviction work done
> and then free svm_bo.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 36 +++++++++++++++++++---------
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 +
>  2 files changed, 26 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index f2db49c7a8fd..8af87a662a0d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -327,11 +327,33 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
>  	return true;
>  }
>  
> +static void svm_range_bo_wq_release(struct work_struct *work)
> +{
> +	struct svm_range_bo *svm_bo;
> +
> +	svm_bo = container_of(work, struct svm_range_bo, release_work);
> +	pr_debug("svm_bo 0x%p\n", svm_bo);
> +
> +	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
> +		/* We're not in the eviction worker.
> +		 * Signal the fence and synchronize with any
> +		 * pending eviction work.
> +		 */

Now that this is its own worker, it's definitely never in the eviction
worker. So this doesn't need to be conditional here.

I'm thinking, in the eviction case, the extra scheduling is not actually
needed and only adds latency and overhead. The eviction already runs in
a worker thread and the svm_range_bo_unref there is definitely not in an
atomic context.

So I wonder if we should have a two versions of
svm_range_bo_unref_sync/async and svm_range_bo_release_sync/async. The
synchronous version doesn't schedule a worker and is used when we're
sure were not in atomic context. The asynchronous version we can use in
places that may be in atomic context.

Regards,
  Felix


> +		dma_fence_signal(&svm_bo->eviction_fence->base);
> +		cancel_work_sync(&svm_bo->eviction_work);
> +	}
> +	dma_fence_put(&svm_bo->eviction_fence->base);
> +	amdgpu_bo_unref(&svm_bo->bo);
> +	kfree(svm_bo);
> +}
> +
>  static void svm_range_bo_release(struct kref *kref)
>  {
>  	struct svm_range_bo *svm_bo;
>  
>  	svm_bo = container_of(kref, struct svm_range_bo, kref);
> +	pr_debug("svm_bo 0x%p\n", svm_bo);
> +
>  	spin_lock(&svm_bo->list_lock);
>  	while (!list_empty(&svm_bo->range_list)) {
>  		struct svm_range *prange =
> @@ -352,17 +374,9 @@ static void svm_range_bo_release(struct kref *kref)
>  		spin_lock(&svm_bo->list_lock);
>  	}
>  	spin_unlock(&svm_bo->list_lock);
> -	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
> -		/* We're not in the eviction worker.
> -		 * Signal the fence and synchronize with any
> -		 * pending eviction work.
> -		 */
> -		dma_fence_signal(&svm_bo->eviction_fence->base);
> -		cancel_work_sync(&svm_bo->eviction_work);
> -	}
> -	dma_fence_put(&svm_bo->eviction_fence->base);
> -	amdgpu_bo_unref(&svm_bo->bo);
> -	kfree(svm_bo);
> +
> +	INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release);
> +	schedule_work(&svm_bo->release_work);
>  }
>  
>  void svm_range_bo_unref(struct svm_range_bo *svm_bo)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 6dc91c33e80f..23478ae7a7d9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -48,6 +48,7 @@ struct svm_range_bo {
>  	struct work_struct		eviction_work;
>  	struct svm_range_list		*svms;
>  	uint32_t			evicting;
> +	struct work_struct		release_work;
>  };
>  
>  enum svm_work_list_ops {

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2] drm/amdkfd: fix svm_bo release invalid wait context warning
  2021-11-30 20:13 [PATCH] drm/amdkfd: fix svm_bo release invalid wait context warning Philip Yang
  2021-12-01  0:42 ` Felix Kuehling
@ 2021-12-09 21:18 ` Philip Yang
  2021-12-11  1:11   ` Felix Kuehling
  1 sibling, 1 reply; 4+ messages in thread
From: Philip Yang @ 2021-12-09 21:18 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang, Felix.Kuehling

Add svm_range_bo_unref_async to schedule work to wait for svm_bo
eviction work done and then free svm_bo. __do_munmap put_page
is atomic context, call svm_range_bo_unref_async to avoid warning
invalid wait context. Other non atomic context call svm_range_bo_unref.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 31 +++++++++++++++++++++---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h     |  3 ++-
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 9731151b67d6..d5d2cf2ee788 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -550,7 +550,7 @@ static void svm_migrate_page_free(struct page *page)
 
 	if (svm_bo) {
 		pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
-		svm_range_bo_unref(svm_bo);
+		svm_range_bo_unref_async(svm_bo);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c178d56361d6..b216842b5fe2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -332,6 +332,8 @@ static void svm_range_bo_release(struct kref *kref)
 	struct svm_range_bo *svm_bo;
 
 	svm_bo = container_of(kref, struct svm_range_bo, kref);
+	pr_debug("svm_bo 0x%p\n", svm_bo);
+
 	spin_lock(&svm_bo->list_lock);
 	while (!list_empty(&svm_bo->range_list)) {
 		struct svm_range *prange =
@@ -365,12 +367,33 @@ static void svm_range_bo_release(struct kref *kref)
 	kfree(svm_bo);
 }
 
-void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+static void svm_range_bo_wq_release(struct work_struct *work)
 {
-	if (!svm_bo)
-		return;
+	struct svm_range_bo *svm_bo;
+
+	svm_bo = container_of(work, struct svm_range_bo, release_work);
+	svm_range_bo_release(&svm_bo->kref);
+}
+
+static void svm_range_bo_release_async(struct kref *kref)
+{
+	struct svm_range_bo *svm_bo;
+
+	svm_bo = container_of(kref, struct svm_range_bo, kref);
+	pr_debug("svm_bo 0x%p\n", svm_bo);
+	INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release);
+	schedule_work(&svm_bo->release_work);
+}
 
-	kref_put(&svm_bo->kref, svm_range_bo_release);
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo)
+{
+	kref_put(&svm_bo->kref, svm_range_bo_release_async);
+}
+
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+	if (svm_bo)
+		kref_put(&svm_bo->kref, svm_range_bo_release);
 }
 
 static bool
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 6dc91c33e80f..2f8a95e86dcb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -48,6 +48,7 @@ struct svm_range_bo {
 	struct work_struct		eviction_work;
 	struct svm_range_list		*svms;
 	uint32_t			evicting;
+	struct work_struct		release_work;
 };
 
 enum svm_work_list_ops {
@@ -195,7 +196,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
  */
 #define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
 
-void svm_range_bo_unref(struct svm_range_bo *svm_bo);
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
 #else
 
 struct kfd_process;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] drm/amdkfd: fix svm_bo release invalid wait context warning
  2021-12-09 21:18 ` [PATCH v2] " Philip Yang
@ 2021-12-11  1:11   ` Felix Kuehling
  0 siblings, 0 replies; 4+ messages in thread
From: Felix Kuehling @ 2021-12-11  1:11 UTC (permalink / raw)
  To: Philip Yang, amd-gfx

On 2021-12-09 4:18 p.m., Philip Yang wrote:
> Add svm_range_bo_unref_async to schedule work to wait for svm_bo
> eviction work done and then free svm_bo. __do_munmap put_page
> is atomic context, call svm_range_bo_unref_async to avoid warning
> invalid wait context. Other non atomic context call svm_range_bo_unref.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  2 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 31 +++++++++++++++++++++---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h     |  3 ++-
>   3 files changed, 30 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index 9731151b67d6..d5d2cf2ee788 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -550,7 +550,7 @@ static void svm_migrate_page_free(struct page *page)
>   
>   	if (svm_bo) {
>   		pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
> -		svm_range_bo_unref(svm_bo);
> +		svm_range_bo_unref_async(svm_bo);
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index c178d56361d6..b216842b5fe2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -332,6 +332,8 @@ static void svm_range_bo_release(struct kref *kref)
>   	struct svm_range_bo *svm_bo;
>   
>   	svm_bo = container_of(kref, struct svm_range_bo, kref);
> +	pr_debug("svm_bo 0x%p\n", svm_bo);
> +
>   	spin_lock(&svm_bo->list_lock);
>   	while (!list_empty(&svm_bo->range_list)) {
>   		struct svm_range *prange =
> @@ -365,12 +367,33 @@ static void svm_range_bo_release(struct kref *kref)
>   	kfree(svm_bo);
>   }
>   
> -void svm_range_bo_unref(struct svm_range_bo *svm_bo)
> +static void svm_range_bo_wq_release(struct work_struct *work)
>   {
> -	if (!svm_bo)
> -		return;
> +	struct svm_range_bo *svm_bo;
> +
> +	svm_bo = container_of(work, struct svm_range_bo, release_work);
> +	svm_range_bo_release(&svm_bo->kref);
> +}
> +
> +static void svm_range_bo_release_async(struct kref *kref)
> +{
> +	struct svm_range_bo *svm_bo;
> +
> +	svm_bo = container_of(kref, struct svm_range_bo, kref);
> +	pr_debug("svm_bo 0x%p\n", svm_bo);
> +	INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release);
> +	schedule_work(&svm_bo->release_work);
> +}
>   
> -	kref_put(&svm_bo->kref, svm_range_bo_release);
> +void svm_range_bo_unref_async(struct svm_range_bo *svm_bo)
> +{
> +	kref_put(&svm_bo->kref, svm_range_bo_release_async);
> +}
> +
> +static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
> +{
> +	if (svm_bo)
> +		kref_put(&svm_bo->kref, svm_range_bo_release);
>   }
>   
>   static bool
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 6dc91c33e80f..2f8a95e86dcb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -48,6 +48,7 @@ struct svm_range_bo {
>   	struct work_struct		eviction_work;
>   	struct svm_range_list		*svms;
>   	uint32_t			evicting;
> +	struct work_struct		release_work;
>   };
>   
>   enum svm_work_list_ops {
> @@ -195,7 +196,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
>    */
>   #define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
>   
> -void svm_range_bo_unref(struct svm_range_bo *svm_bo);
> +void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
>   #else
>   
>   struct kfd_process;

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-12-11  1:11 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-30 20:13 [PATCH] drm/amdkfd: fix svm_bo release invalid wait context warning Philip Yang
2021-12-01  0:42 ` Felix Kuehling
2021-12-09 21:18 ` [PATCH v2] " Philip Yang
2021-12-11  1:11   ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.