All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] drm/amdgpu: Remove eviction fence before release bo
@ 2020-02-05 12:56 Pan, Xinhui
  2020-02-05 14:02 ` Christian König
  0 siblings, 1 reply; 2+ messages in thread
From: Pan, Xinhui @ 2020-02-05 12:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: Deucher, Alexander, Kuehling, Felix, Koenig, Christian


No need to trigger eviction as the memory mapping will not be used anymore.

All pt/pd bos share same resv, hence the same shared eviction fence. Everytime page table is freed, the fence will be signled and that cuases kfd unexcepted evictions.

kfd bo uses its own resv, so it is not affetced.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 47b0f29..265b1ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -96,6 +96,7 @@
 						       struct mm_struct *mm);
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
 
 struct amdkfd_process_info {
 	/* List head of all VMs that belong to a KFD process */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ef721cb..a3c55ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -276,6 +276,26 @@
 	return 0;
 }
 
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+	struct amdgpu_vm *vm;
+	int ret = 0;
+
+	if (bo->vm_bo && bo->vm_bo->vm) {
+		vm = bo->vm_bo->vm;
+		if (vm->process_info && vm->process_info->eviction_fence) {
+			BUG_ON(!dma_resv_trylock(&bo->tbo.base._resv));
+			if (bo->tbo.base.resv != &bo->tbo.base._resv) {
+				dma_resv_copy_fences(&bo->tbo.base._resv, bo->tbo.base.resv);
+				bo->tbo.base.resv = &bo->tbo.base._resv;
+			}
+			ret = amdgpu_amdkfd_remove_eviction_fence(bo, vm->process_info->eviction_fence);
+			dma_resv_unlock(bo->tbo.base.resv);
+		}
+	}
+	return ret;
+}
+
 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
 				     bool wait)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6f60a58..4b5bee0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1307,6 +1307,9 @@
 	if (abo->kfd_bo)
 		amdgpu_amdkfd_unreserve_memory_limit(abo);
 
+	amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+	abo->vm_bo = NULL;
+
 	if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
 	    !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
 		return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index cc56eab..187cdb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -945,7 +945,6 @@
 static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
 {
 	if (entry->base.bo) {
-		entry->base.bo->vm_bo = NULL;
 		list_del(&entry->base.vm_status);
 		amdgpu_bo_unref(&entry->base.bo->shadow);
 		amdgpu_bo_unref(&entry->base.bo);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [RFC PATCH] drm/amdgpu: Remove eviction fence before release bo
  2020-02-05 12:56 [RFC PATCH] drm/amdgpu: Remove eviction fence before release bo Pan, Xinhui
@ 2020-02-05 14:02 ` Christian König
  0 siblings, 0 replies; 2+ messages in thread
From: Christian König @ 2020-02-05 14:02 UTC (permalink / raw)
  To: Pan, Xinhui, amd-gfx; +Cc: Deucher, Alexander, Kuehling, Felix

Am 05.02.20 um 13:56 schrieb Pan, Xinhui:
> No need to trigger eviction as the memory mapping will not be used anymore.
>
> All pt/pd bos share same resv, hence the same shared eviction fence. Everytime page table is freed, the fence will be signled and that cuases kfd unexcepted evictions.
>
> kfd bo uses its own resv, so it is not affetced.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 47b0f29..265b1ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -96,6 +96,7 @@
>   						       struct mm_struct *mm);
>   bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
>   struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
>   
>   struct amdkfd_process_info {
>   	/* List head of all VMs that belong to a KFD process */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index ef721cb..a3c55ad 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -276,6 +276,26 @@
>   	return 0;
>   }
>   
> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
> +{
> +	struct amdgpu_vm *vm;
> +	int ret = 0;
> +
> +	if (bo->vm_bo && bo->vm_bo->vm) {
> +		vm = bo->vm_bo->vm;
> +		if (vm->process_info && vm->process_info->eviction_fence) {

Better write that as checking of prerequisites, e.g. if (!...) return;

> +			BUG_ON(!dma_resv_trylock(&bo->tbo.base._resv));
> +			if (bo->tbo.base.resv != &bo->tbo.base._resv) {
> +				dma_resv_copy_fences(&bo->tbo.base._resv, bo->tbo.base.resv);
> +				bo->tbo.base.resv = &bo->tbo.base._resv;

That doesn't work correctly and could crash really really badly. We need 
to rework how deleted BOs are handled in TTM first for this.

Roughly a month or two ago I send out a patch set which does that, but I 
never got around to finish it up.

Regards,
Christian.

> +			}
> +			ret = amdgpu_amdkfd_remove_eviction_fence(bo, vm->process_info->eviction_fence);
> +			dma_resv_unlock(bo->tbo.base.resv);
> +		}
> +	}
> +	return ret;
> +}
> +
>   static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
>   				     bool wait)
>   {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6f60a58..4b5bee0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1307,6 +1307,9 @@
>   	if (abo->kfd_bo)
>   		amdgpu_amdkfd_unreserve_memory_limit(abo);
>   
> +	amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
> +	abo->vm_bo = NULL;
> +
>   	if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
>   	    !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
>   		return;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index cc56eab..187cdb3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -945,7 +945,6 @@
>   static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
>   {
>   	if (entry->base.bo) {
> -		entry->base.bo->vm_bo = NULL;
>   		list_del(&entry->base.vm_status);
>   		amdgpu_bo_unref(&entry->base.bo->shadow);
>   		amdgpu_bo_unref(&entry->base.bo);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-02-05 14:02 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-05 12:56 [RFC PATCH] drm/amdgpu: Remove eviction fence before release bo Pan, Xinhui
2020-02-05 14:02 ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.