* [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo
@ 2020-02-07 13:42 Pan, Xinhui
2020-02-07 13:46 ` Christian König
0 siblings, 1 reply; 4+ messages in thread
From: Pan, Xinhui @ 2020-02-07 13:42 UTC (permalink / raw)
To: amd-gfx; +Cc: Deucher, Alexander, Kuehling, Felix, Koenig, Christian
No need to trigger eviction as the memory mapping will not be used anymore.
All pt/pd bos share same resv, hence the same shared eviction fence. Everytime page table is freed, the fence will be signled and that cuases kfd unexcepted evictions.
Signed-off-by: xinhui pab <xinhui.pan@example.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 78 ++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +-
drivers/gpu/drm/ttm/ttm_bo.c | 38 +++++----
5 files changed, 111 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 47b0f2957d1f..265b1ed7264c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm);
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
struct amdkfd_process_info {
/* List head of all VMs that belong to a KFD process */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ef721cb65868..11315095c29b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -223,7 +223,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct amdgpu_amdkfd_fence *ef)
{
- struct dma_resv *resv = bo->tbo.base.resv;
+ struct dma_resv *resv = &bo->tbo.base._resv;
struct dma_resv_list *old, *new;
unsigned int i, j, k;
@@ -276,6 +276,78 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
return 0;
}
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_bo_base *vm_bo = bo->vm_bo;
+ struct amdgpu_vm *vm;
+ struct amdkfd_process_info *info;
+ struct amdgpu_amdkfd_fence *ef;
+ struct amdgpu_bo *parent;
+ int locked;
+ int ret;
+ struct ttm_bo_global *glob = &ttm_bo_glob;
+
+ if (vm_bo == NULL)
+ return 0;
+
+ /* Page table bo has a reference of the parent bo.
+ * BO itself can't guarntee the vm it points to is alive.
+ * for example, VM is going to free page tables, and the pt/pd bo might be
+ * freed by a workqueue. In that case, the vm might be freed already,
+ * leaving the bo->vm_bo points to vm.root.
+ *
+ * so to avoid that, when kfd free its vms,
+ * 1) set vm->process_info to NULL if this is the last vm.
+ * 2) set root_bo->vm_bo to NULL.
+ *
+ * but there are still races, just like
+ * cpu 1 cpu 2
+ * !vm_bo
+ * ->info = NULL
+ * free(info)
+ * ->vm_bo = NULL
+ * free (vm)
+ * info = vm->info //invalid vm
+ *
+ * So to avoid the race, use ttm_bo_glob lru_lock.
+ * generally speaking, adding a new lock is accceptable.
+ * But reusing this lock is simple.
+ */
+ parent = bo;
+ while (parent->parent)
+ parent = parent->parent;
+
+ spin_lock(&glob->lru_lock);
+ vm_bo = parent->vm_bo;
+ if (!vm_bo) {
+ spin_unlock(&glob->lru_lock);
+ return 0;
+ }
+
+ vm = vm_bo->vm;
+ if (!vm) {
+ spin_unlock(&glob->lru_lock);
+ return 0;
+ }
+
+ info = vm->process_info;
+ if (!info || !info->eviction_fence) {
+ spin_unlock(&glob->lru_lock);
+ return 0;
+ }
+
+ ef = container_of(dma_fence_get(&info->eviction_fence->base),
+ struct amdgpu_amdkfd_fence, base);
+ spin_unlock(&glob->lru_lock);
+
+ locked = dma_resv_trylock(&bo->tbo.base._resv);
+ ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
+ dma_fence_put(&ef->base);
+ if (locked)
+ dma_resv_unlock(&bo->tbo.base._resv);
+ return ret;
+}
+
static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
bool wait)
{
@@ -1030,6 +1102,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
{
struct amdkfd_process_info *process_info = vm->process_info;
struct amdgpu_bo *pd = vm->root.base.bo;
+ struct ttm_bo_global *glob = &ttm_bo_glob;
if (!process_info)
return;
@@ -1051,6 +1124,9 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
WARN_ON(!list_empty(&process_info->userptr_valid_list));
WARN_ON(!list_empty(&process_info->userptr_inval_list));
+ spin_lock(&glob->lru_lock);
+ vm->process_info = NULL;
+ spin_unlock(&glob->lru_lock);
dma_fence_put(&process_info->eviction_fence->base);
cancel_delayed_work_sync(&process_info->restore_userptr_work);
put_pid(process_info->pid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6f60a581e3ba..6ad12298fa84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1307,19 +1307,23 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (abo->kfd_bo)
amdgpu_amdkfd_unreserve_memory_limit(abo);
+ amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+ abo->vm_bo = NULL;
+
if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
!(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
return;
- dma_resv_lock(bo->base.resv, NULL);
+ /* Only kfd bo wipe vram on release. The resv is &_resv. */
+ dma_resv_lock(&bo->base._resv, NULL);
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, &bo->base._resv, &fence);
if (!WARN_ON(r)) {
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
- dma_resv_unlock(bo->base.resv);
+ dma_resv_unlock(&bo->base._resv);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index cc56eaba1911..2b96447e30e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -945,7 +945,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
{
if (entry->base.bo) {
- entry->base.bo->vm_bo = NULL;
list_del(&entry->base.vm_status);
amdgpu_bo_unref(&entry->base.bo->shadow);
amdgpu_bo_unref(&entry->base.bo);
@@ -3074,6 +3073,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
struct amdgpu_bo *root;
+ struct ttm_bo_global *glob = &ttm_bo_glob;
int i;
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
@@ -3105,6 +3105,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
amdgpu_vm_free_pts(adev, vm, NULL);
+ spin_lock(&glob->lru_lock);
+ root->vm_bo = NULL;
+ spin_unlock(&glob->lru_lock);
amdgpu_bo_unreserve(root);
amdgpu_bo_unref(&root);
WARN_ON(vm->root.base.bo);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1494aebb8128..706cd60eb9e0 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -429,8 +429,8 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
BUG_ON(!dma_resv_trylock(&bo->base._resv));
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
- if (r)
- dma_resv_unlock(&bo->base._resv);
+
+ dma_resv_unlock(&bo->base._resv);
return r;
}
@@ -455,23 +455,19 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
}
}
-static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
+static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo, bool enqueue)
{
struct ttm_bo_device *bdev = bo->bdev;
int ret;
- ret = ttm_bo_individualize_resv(bo);
- if (ret) {
- /* Last resort, if we fail to allocate memory for the
- * fences block for the BO to become idle
- */
- dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
- 30 * HZ);
- spin_lock(&ttm_bo_glob.lru_lock);
- goto error;
- }
-
spin_lock(&ttm_bo_glob.lru_lock);
+
+ if (enqueue)
+ goto queue;
+
+ if (bo->base.resv != &bo->base._resv)
+ BUG_ON(!dma_resv_trylock(&bo->base._resv));
+
ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
if (!ret) {
if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
@@ -504,7 +500,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
dma_resv_unlock(&bo->base._resv);
}
-error:
+queue:
kref_get(&bo->list_kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
spin_unlock(&ttm_bo_glob.lru_lock);
@@ -655,6 +651,16 @@ static void ttm_bo_release(struct kref *kref)
container_of(kref, struct ttm_buffer_object, kref);
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
+ int ret;
+
+ ret = ttm_bo_individualize_resv(bo);
+ if (ret) {
+ /* Last resort, if we fail to allocate memory for the
+ * fences block for the BO to become idle
+ */
+ dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
+ 30 * HZ);
+ }
if (bo->bdev->driver->release_notify)
bo->bdev->driver->release_notify(bo);
@@ -663,7 +669,7 @@ static void ttm_bo_release(struct kref *kref)
ttm_mem_io_lock(man, false);
ttm_mem_io_free_vm(bo);
ttm_mem_io_unlock(man);
- ttm_bo_cleanup_refs_or_queue(bo);
+ ttm_bo_cleanup_refs_or_queue(bo, !!ret);
kref_put(&bo->list_kref, ttm_bo_release_list);
}
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo
2020-02-07 13:42 [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo Pan, Xinhui
@ 2020-02-07 13:46 ` Christian König
2020-02-07 13:50 ` 回复: " Pan, Xinhui
0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2020-02-07 13:46 UTC (permalink / raw)
To: Pan, Xinhui, amd-gfx; +Cc: Deucher, Alexander, Kuehling, Felix
Am 07.02.20 um 14:42 schrieb Pan, Xinhui:
> No need to trigger eviction as the memory mapping will not be used anymore.
>
> All pt/pd bos share same resv, hence the same shared eviction fence. Everytime page table is freed, the fence will be signled and that cuases kfd unexcepted evictions.
>
> Signed-off-by: xinhui pab <xinhui.pan@example.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 78 ++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +-
> drivers/gpu/drm/ttm/ttm_bo.c | 38 +++++----
> 5 files changed, 111 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 47b0f2957d1f..265b1ed7264c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
> struct mm_struct *mm);
> bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
> struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
>
> struct amdkfd_process_info {
> /* List head of all VMs that belong to a KFD process */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index ef721cb65868..11315095c29b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -223,7 +223,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
> static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
> struct amdgpu_amdkfd_fence *ef)
> {
> - struct dma_resv *resv = bo->tbo.base.resv;
> + struct dma_resv *resv = &bo->tbo.base._resv;
That won't work either and probably break a bunch of other cases.
Christian.
> struct dma_resv_list *old, *new;
> unsigned int i, j, k;
>
> @@ -276,6 +276,78 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
> return 0;
> }
>
> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
> +{
> + struct amdgpu_vm_bo_base *vm_bo = bo->vm_bo;
> + struct amdgpu_vm *vm;
> + struct amdkfd_process_info *info;
> + struct amdgpu_amdkfd_fence *ef;
> + struct amdgpu_bo *parent;
> + int locked;
> + int ret;
> + struct ttm_bo_global *glob = &ttm_bo_glob;
> +
> + if (vm_bo == NULL)
> + return 0;
> +
> + /* Page table bo has a reference of the parent bo.
> + * BO itself can't guarntee the vm it points to is alive.
> + * for example, VM is going to free page tables, and the pt/pd bo might be
> + * freed by a workqueue. In that case, the vm might be freed already,
> + * leaving the bo->vm_bo points to vm.root.
> + *
> + * so to avoid that, when kfd free its vms,
> + * 1) set vm->process_info to NULL if this is the last vm.
> + * 2) set root_bo->vm_bo to NULL.
> + *
> + * but there are still races, just like
> + * cpu 1 cpu 2
> + * !vm_bo
> + * ->info = NULL
> + * free(info)
> + * ->vm_bo = NULL
> + * free (vm)
> + * info = vm->info //invalid vm
> + *
> + * So to avoid the race, use ttm_bo_glob lru_lock.
> + * generally speaking, adding a new lock is accceptable.
> + * But reusing this lock is simple.
> + */
> + parent = bo;
> + while (parent->parent)
> + parent = parent->parent;
> +
> + spin_lock(&glob->lru_lock);
> + vm_bo = parent->vm_bo;
> + if (!vm_bo) {
> + spin_unlock(&glob->lru_lock);
> + return 0;
> + }
> +
> + vm = vm_bo->vm;
> + if (!vm) {
> + spin_unlock(&glob->lru_lock);
> + return 0;
> + }
> +
> + info = vm->process_info;
> + if (!info || !info->eviction_fence) {
> + spin_unlock(&glob->lru_lock);
> + return 0;
> + }
> +
> + ef = container_of(dma_fence_get(&info->eviction_fence->base),
> + struct amdgpu_amdkfd_fence, base);
> + spin_unlock(&glob->lru_lock);
> +
> + locked = dma_resv_trylock(&bo->tbo.base._resv);
> + ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
> + dma_fence_put(&ef->base);
> + if (locked)
> + dma_resv_unlock(&bo->tbo.base._resv);
> + return ret;
> +}
> +
> static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
> bool wait)
> {
> @@ -1030,6 +1102,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
> {
> struct amdkfd_process_info *process_info = vm->process_info;
> struct amdgpu_bo *pd = vm->root.base.bo;
> + struct ttm_bo_global *glob = &ttm_bo_glob;
>
> if (!process_info)
> return;
> @@ -1051,6 +1124,9 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
> WARN_ON(!list_empty(&process_info->userptr_valid_list));
> WARN_ON(!list_empty(&process_info->userptr_inval_list));
>
> + spin_lock(&glob->lru_lock);
> + vm->process_info = NULL;
> + spin_unlock(&glob->lru_lock);
> dma_fence_put(&process_info->eviction_fence->base);
> cancel_delayed_work_sync(&process_info->restore_userptr_work);
> put_pid(process_info->pid);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6f60a581e3ba..6ad12298fa84 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1307,19 +1307,23 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> if (abo->kfd_bo)
> amdgpu_amdkfd_unreserve_memory_limit(abo);
>
> + amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
> + abo->vm_bo = NULL;
> +
> if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
> !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
> return;
>
> - dma_resv_lock(bo->base.resv, NULL);
> + /* Only kfd bo wipe vram on release. The resv is &_resv. */
> + dma_resv_lock(&bo->base._resv, NULL);
>
> - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
> + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, &bo->base._resv, &fence);
> if (!WARN_ON(r)) {
> amdgpu_bo_fence(abo, fence, false);
> dma_fence_put(fence);
> }
>
> - dma_resv_unlock(bo->base.resv);
> + dma_resv_unlock(&bo->base._resv);
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index cc56eaba1911..2b96447e30e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -945,7 +945,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
> {
> if (entry->base.bo) {
> - entry->base.bo->vm_bo = NULL;
> list_del(&entry->base.vm_status);
> amdgpu_bo_unref(&entry->base.bo->shadow);
> amdgpu_bo_unref(&entry->base.bo);
> @@ -3074,6 +3073,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> struct amdgpu_bo_va_mapping *mapping, *tmp;
> bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
> struct amdgpu_bo *root;
> + struct ttm_bo_global *glob = &ttm_bo_glob;
> int i;
>
> amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
> @@ -3105,6 +3105,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> }
>
> amdgpu_vm_free_pts(adev, vm, NULL);
> + spin_lock(&glob->lru_lock);
> + root->vm_bo = NULL;
> + spin_unlock(&glob->lru_lock);
> amdgpu_bo_unreserve(root);
> amdgpu_bo_unref(&root);
> WARN_ON(vm->root.base.bo);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 1494aebb8128..706cd60eb9e0 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -429,8 +429,8 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> BUG_ON(!dma_resv_trylock(&bo->base._resv));
>
> r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
> - if (r)
> - dma_resv_unlock(&bo->base._resv);
> +
> + dma_resv_unlock(&bo->base._resv);
>
> return r;
> }
> @@ -455,23 +455,19 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
> }
> }
>
> -static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
> +static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo, bool enqueue)
> {
> struct ttm_bo_device *bdev = bo->bdev;
> int ret;
>
> - ret = ttm_bo_individualize_resv(bo);
> - if (ret) {
> - /* Last resort, if we fail to allocate memory for the
> - * fences block for the BO to become idle
> - */
> - dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
> - 30 * HZ);
> - spin_lock(&ttm_bo_glob.lru_lock);
> - goto error;
> - }
> -
> spin_lock(&ttm_bo_glob.lru_lock);
> +
> + if (enqueue)
> + goto queue;
> +
> + if (bo->base.resv != &bo->base._resv)
> + BUG_ON(!dma_resv_trylock(&bo->base._resv));
> +
> ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
> if (!ret) {
> if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
> @@ -504,7 +500,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
> dma_resv_unlock(&bo->base._resv);
> }
>
> -error:
> +queue:
> kref_get(&bo->list_kref);
> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> spin_unlock(&ttm_bo_glob.lru_lock);
> @@ -655,6 +651,16 @@ static void ttm_bo_release(struct kref *kref)
> container_of(kref, struct ttm_buffer_object, kref);
> struct ttm_bo_device *bdev = bo->bdev;
> struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
> + int ret;
> +
> + ret = ttm_bo_individualize_resv(bo);
> + if (ret) {
> + /* Last resort, if we fail to allocate memory for the
> + * fences block for the BO to become idle
> + */
> + dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
> + 30 * HZ);
> + }
>
> if (bo->bdev->driver->release_notify)
> bo->bdev->driver->release_notify(bo);
> @@ -663,7 +669,7 @@ static void ttm_bo_release(struct kref *kref)
> ttm_mem_io_lock(man, false);
> ttm_mem_io_free_vm(bo);
> ttm_mem_io_unlock(man);
> - ttm_bo_cleanup_refs_or_queue(bo);
> + ttm_bo_cleanup_refs_or_queue(bo, !!ret);
> kref_put(&bo->list_kref, ttm_bo_release_list);
> }
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* 回复: [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo
2020-02-07 13:46 ` Christian König
@ 2020-02-07 13:50 ` Pan, Xinhui
2020-02-07 13:52 ` Christian König
0 siblings, 1 reply; 4+ messages in thread
From: Pan, Xinhui @ 2020-02-07 13:50 UTC (permalink / raw)
To: Koenig, Christian, amd-gfx; +Cc: Deucher, Alexander, Kuehling, Felix
[AMD Official Use Only - Internal Distribution Only]
________________________________________
发件人: Koenig, Christian <Christian.Koenig@amd.com>
发送时间: 2020年2月7日 21:46
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
抄送: Kuehling, Felix; Deucher, Alexander
主题: Re: [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo
Am 07.02.20 um 14:42 schrieb Pan, Xinhui:
> No need to trigger eviction as the memory mapping will not be used anymore.
>
> All pt/pd bos share same resv, hence the same shared eviction fence. Everytime page table is freed, the fence will be signled and that cuases kfd unexcepted evictions.
>
> Signed-off-by: xinhui pab <xinhui.pan@example.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 78 ++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +-
> drivers/gpu/drm/ttm/ttm_bo.c | 38 +++++----
> 5 files changed, 111 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 47b0f2957d1f..265b1ed7264c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
> struct mm_struct *mm);
> bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
> struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
>
> struct amdkfd_process_info {
> /* List head of all VMs that belong to a KFD process */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index ef721cb65868..11315095c29b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -223,7 +223,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
> static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
> struct amdgpu_amdkfd_fence *ef)
> {
> - struct dma_resv *resv = bo->tbo.base.resv;
> + struct dma_resv *resv = &bo->tbo.base._resv;
That won't work either and probably break a bunch of other cases.
[xh] kfd bos which are allocated explicitly use _resv AFAIK.
only pt/pd bos share the root._resv
Christian.
> struct dma_resv_list *old, *new;
> unsigned int i, j, k;
>
> @@ -276,6 +276,78 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
> return 0;
> }
>
> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
> +{
> + struct amdgpu_vm_bo_base *vm_bo = bo->vm_bo;
> + struct amdgpu_vm *vm;
> + struct amdkfd_process_info *info;
> + struct amdgpu_amdkfd_fence *ef;
> + struct amdgpu_bo *parent;
> + int locked;
> + int ret;
> + struct ttm_bo_global *glob = &ttm_bo_glob;
> +
> + if (vm_bo == NULL)
> + return 0;
> +
> + /* Page table bo has a reference of the parent bo.
> + * BO itself can't guarntee the vm it points to is alive.
> + * for example, VM is going to free page tables, and the pt/pd bo might be
> + * freed by a workqueue. In that case, the vm might be freed already,
> + * leaving the bo->vm_bo points to vm.root.
> + *
> + * so to avoid that, when kfd free its vms,
> + * 1) set vm->process_info to NULL if this is the last vm.
> + * 2) set root_bo->vm_bo to NULL.
> + *
> + * but there are still races, just like
> + * cpu 1 cpu 2
> + * !vm_bo
> + * ->info = NULL
> + * free(info)
> + * ->vm_bo = NULL
> + * free (vm)
> + * info = vm->info //invalid vm
> + *
> + * So to avoid the race, use ttm_bo_glob lru_lock.
> + * generally speaking, adding a new lock is accceptable.
> + * But reusing this lock is simple.
> + */
> + parent = bo;
> + while (parent->parent)
> + parent = parent->parent;
> +
> + spin_lock(&glob->lru_lock);
> + vm_bo = parent->vm_bo;
> + if (!vm_bo) {
> + spin_unlock(&glob->lru_lock);
> + return 0;
> + }
> +
> + vm = vm_bo->vm;
> + if (!vm) {
> + spin_unlock(&glob->lru_lock);
> + return 0;
> + }
> +
> + info = vm->process_info;
> + if (!info || !info->eviction_fence) {
> + spin_unlock(&glob->lru_lock);
> + return 0;
> + }
> +
> + ef = container_of(dma_fence_get(&info->eviction_fence->base),
> + struct amdgpu_amdkfd_fence, base);
> + spin_unlock(&glob->lru_lock);
> +
> + locked = dma_resv_trylock(&bo->tbo.base._resv);
> + ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
> + dma_fence_put(&ef->base);
> + if (locked)
> + dma_resv_unlock(&bo->tbo.base._resv);
> + return ret;
> +}
> +
> static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
> bool wait)
> {
> @@ -1030,6 +1102,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
> {
> struct amdkfd_process_info *process_info = vm->process_info;
> struct amdgpu_bo *pd = vm->root.base.bo;
> + struct ttm_bo_global *glob = &ttm_bo_glob;
>
> if (!process_info)
> return;
> @@ -1051,6 +1124,9 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
> WARN_ON(!list_empty(&process_info->userptr_valid_list));
> WARN_ON(!list_empty(&process_info->userptr_inval_list));
>
> + spin_lock(&glob->lru_lock);
> + vm->process_info = NULL;
> + spin_unlock(&glob->lru_lock);
> dma_fence_put(&process_info->eviction_fence->base);
> cancel_delayed_work_sync(&process_info->restore_userptr_work);
> put_pid(process_info->pid);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6f60a581e3ba..6ad12298fa84 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1307,19 +1307,23 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> if (abo->kfd_bo)
> amdgpu_amdkfd_unreserve_memory_limit(abo);
>
> + amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
> + abo->vm_bo = NULL;
> +
> if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
> !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
> return;
>
> - dma_resv_lock(bo->base.resv, NULL);
> + /* Only kfd bo wipe vram on release. The resv is &_resv. */
> + dma_resv_lock(&bo->base._resv, NULL);
>
> - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
> + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, &bo->base._resv, &fence);
> if (!WARN_ON(r)) {
> amdgpu_bo_fence(abo, fence, false);
> dma_fence_put(fence);
> }
>
> - dma_resv_unlock(bo->base.resv);
> + dma_resv_unlock(&bo->base._resv);
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index cc56eaba1911..2b96447e30e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -945,7 +945,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
> {
> if (entry->base.bo) {
> - entry->base.bo->vm_bo = NULL;
> list_del(&entry->base.vm_status);
> amdgpu_bo_unref(&entry->base.bo->shadow);
> amdgpu_bo_unref(&entry->base.bo);
> @@ -3074,6 +3073,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> struct amdgpu_bo_va_mapping *mapping, *tmp;
> bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
> struct amdgpu_bo *root;
> + struct ttm_bo_global *glob = &ttm_bo_glob;
> int i;
>
> amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
> @@ -3105,6 +3105,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> }
>
> amdgpu_vm_free_pts(adev, vm, NULL);
> + spin_lock(&glob->lru_lock);
> + root->vm_bo = NULL;
> + spin_unlock(&glob->lru_lock);
> amdgpu_bo_unreserve(root);
> amdgpu_bo_unref(&root);
> WARN_ON(vm->root.base.bo);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 1494aebb8128..706cd60eb9e0 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -429,8 +429,8 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> BUG_ON(!dma_resv_trylock(&bo->base._resv));
>
> r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
> - if (r)
> - dma_resv_unlock(&bo->base._resv);
> +
> + dma_resv_unlock(&bo->base._resv);
>
> return r;
> }
> @@ -455,23 +455,19 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
> }
> }
>
> -static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
> +static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo, bool enqueue)
> {
> struct ttm_bo_device *bdev = bo->bdev;
> int ret;
>
> - ret = ttm_bo_individualize_resv(bo);
> - if (ret) {
> - /* Last resort, if we fail to allocate memory for the
> - * fences block for the BO to become idle
> - */
> - dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
> - 30 * HZ);
> - spin_lock(&ttm_bo_glob.lru_lock);
> - goto error;
> - }
> -
> spin_lock(&ttm_bo_glob.lru_lock);
> +
> + if (enqueue)
> + goto queue;
> +
> + if (bo->base.resv != &bo->base._resv)
> + BUG_ON(!dma_resv_trylock(&bo->base._resv));
> +
> ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
> if (!ret) {
> if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
> @@ -504,7 +500,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
> dma_resv_unlock(&bo->base._resv);
> }
>
> -error:
> +queue:
> kref_get(&bo->list_kref);
> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> spin_unlock(&ttm_bo_glob.lru_lock);
> @@ -655,6 +651,16 @@ static void ttm_bo_release(struct kref *kref)
> container_of(kref, struct ttm_buffer_object, kref);
> struct ttm_bo_device *bdev = bo->bdev;
> struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
> + int ret;
> +
> + ret = ttm_bo_individualize_resv(bo);
> + if (ret) {
> + /* Last resort, if we fail to allocate memory for the
> + * fences block for the BO to become idle
> + */
> + dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
> + 30 * HZ);
> + }
>
> if (bo->bdev->driver->release_notify)
> bo->bdev->driver->release_notify(bo);
> @@ -663,7 +669,7 @@ static void ttm_bo_release(struct kref *kref)
> ttm_mem_io_lock(man, false);
> ttm_mem_io_free_vm(bo);
> ttm_mem_io_unlock(man);
> - ttm_bo_cleanup_refs_or_queue(bo);
> + ttm_bo_cleanup_refs_or_queue(bo, !!ret);
> kref_put(&bo->list_kref, ttm_bo_release_list);
> }
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: 回复: [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo
2020-02-07 13:50 ` 回复: " Pan, Xinhui
@ 2020-02-07 13:52 ` Christian König
0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2020-02-07 13:52 UTC (permalink / raw)
To: Pan, Xinhui, amd-gfx; +Cc: Deucher, Alexander, Kuehling, Felix
Am 07.02.20 um 14:50 schrieb Pan, Xinhui:
> [AMD Official Use Only - Internal Distribution Only]
>
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2020年2月7日 21:46
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org
> 抄送: Kuehling, Felix; Deucher, Alexander
> 主题: Re: [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo
>
> Am 07.02.20 um 14:42 schrieb Pan, Xinhui:
>> No need to trigger eviction as the memory mapping will not be used anymore.
>>
>> All pt/pd bos share same resv, hence the same shared eviction fence. Everytime page table is freed, the fence will be signled and that cuases kfd unexcepted evictions.
>>
>> Signed-off-by: xinhui pab <xinhui.pan@example.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 78 ++++++++++++++++++-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 ++-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +-
>> drivers/gpu/drm/ttm/ttm_bo.c | 38 +++++----
>> 5 files changed, 111 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 47b0f2957d1f..265b1ed7264c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
>> struct mm_struct *mm);
>> bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
>> struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
>> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
>>
>> struct amdkfd_process_info {
>> /* List head of all VMs that belong to a KFD process */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index ef721cb65868..11315095c29b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -223,7 +223,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
>> static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>> struct amdgpu_amdkfd_fence *ef)
>> {
>> - struct dma_resv *resv = bo->tbo.base.resv;
>> + struct dma_resv *resv = &bo->tbo.base._resv;
> That won't work either and probably break a bunch of other cases.
>
> [xh] kfd bos which are allocated explicitly use _resv AFAIK.
> only pt/pd bos share the root._resv
No that is not correct. Imports potentially don't use _resv here.
Anyway please see the patchset I've pointed out in one of my last mails.
With that we can assign bo->tbo.base.resv to bo->tbo._base for deleted BOs.
Christian.
>
> Christian.
>
>> struct dma_resv_list *old, *new;
>> unsigned int i, j, k;
>>
>> @@ -276,6 +276,78 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>> return 0;
>> }
>>
>> +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
>> +{
>> + struct amdgpu_vm_bo_base *vm_bo = bo->vm_bo;
>> + struct amdgpu_vm *vm;
>> + struct amdkfd_process_info *info;
>> + struct amdgpu_amdkfd_fence *ef;
>> + struct amdgpu_bo *parent;
>> + int locked;
>> + int ret;
>> + struct ttm_bo_global *glob = &ttm_bo_glob;
>> +
>> + if (vm_bo == NULL)
>> + return 0;
>> +
>> + /* Page table bo has a reference of the parent bo.
>> + * BO itself can't guarntee the vm it points to is alive.
>> + * for example, VM is going to free page tables, and the pt/pd bo might be
>> + * freed by a workqueue. In that case, the vm might be freed already,
>> + * leaving the bo->vm_bo points to vm.root.
>> + *
>> + * so to avoid that, when kfd free its vms,
>> + * 1) set vm->process_info to NULL if this is the last vm.
>> + * 2) set root_bo->vm_bo to NULL.
>> + *
>> + * but there are still races, just like
>> + * cpu 1 cpu 2
>> + * !vm_bo
>> + * ->info = NULL
>> + * free(info)
>> + * ->vm_bo = NULL
>> + * free (vm)
>> + * info = vm->info //invalid vm
>> + *
>> + * So to avoid the race, use ttm_bo_glob lru_lock.
>> + * generally speaking, adding a new lock is accceptable.
>> + * But reusing this lock is simple.
>> + */
>> + parent = bo;
>> + while (parent->parent)
>> + parent = parent->parent;
>> +
>> + spin_lock(&glob->lru_lock);
>> + vm_bo = parent->vm_bo;
>> + if (!vm_bo) {
>> + spin_unlock(&glob->lru_lock);
>> + return 0;
>> + }
>> +
>> + vm = vm_bo->vm;
>> + if (!vm) {
>> + spin_unlock(&glob->lru_lock);
>> + return 0;
>> + }
>> +
>> + info = vm->process_info;
>> + if (!info || !info->eviction_fence) {
>> + spin_unlock(&glob->lru_lock);
>> + return 0;
>> + }
>> +
>> + ef = container_of(dma_fence_get(&info->eviction_fence->base),
>> + struct amdgpu_amdkfd_fence, base);
>> + spin_unlock(&glob->lru_lock);
>> +
>> + locked = dma_resv_trylock(&bo->tbo.base._resv);
>> + ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
>> + dma_fence_put(&ef->base);
>> + if (locked)
>> + dma_resv_unlock(&bo->tbo.base._resv);
>> + return ret;
>> +}
>> +
>> static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
>> bool wait)
>> {
>> @@ -1030,6 +1102,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>> {
>> struct amdkfd_process_info *process_info = vm->process_info;
>> struct amdgpu_bo *pd = vm->root.base.bo;
>> + struct ttm_bo_global *glob = &ttm_bo_glob;
>>
>> if (!process_info)
>> return;
>> @@ -1051,6 +1124,9 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>> WARN_ON(!list_empty(&process_info->userptr_valid_list));
>> WARN_ON(!list_empty(&process_info->userptr_inval_list));
>>
>> + spin_lock(&glob->lru_lock);
>> + vm->process_info = NULL;
>> + spin_unlock(&glob->lru_lock);
>> dma_fence_put(&process_info->eviction_fence->base);
>> cancel_delayed_work_sync(&process_info->restore_userptr_work);
>> put_pid(process_info->pid);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 6f60a581e3ba..6ad12298fa84 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -1307,19 +1307,23 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>> if (abo->kfd_bo)
>> amdgpu_amdkfd_unreserve_memory_limit(abo);
>>
>> + amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
>> + abo->vm_bo = NULL;
>> +
>> if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
>> !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
>> return;
>>
>> - dma_resv_lock(bo->base.resv, NULL);
>> + /* Only kfd bo wipe vram on release. The resv is &_resv. */
>> + dma_resv_lock(&bo->base._resv, NULL);
>>
>> - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
>> + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, &bo->base._resv, &fence);
>> if (!WARN_ON(r)) {
>> amdgpu_bo_fence(abo, fence, false);
>> dma_fence_put(fence);
>> }
>>
>> - dma_resv_unlock(bo->base.resv);
>> + dma_resv_unlock(&bo->base._resv);
>> }
>>
>> /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index cc56eaba1911..2b96447e30e5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -945,7 +945,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
>> {
>> if (entry->base.bo) {
>> - entry->base.bo->vm_bo = NULL;
>> list_del(&entry->base.vm_status);
>> amdgpu_bo_unref(&entry->base.bo->shadow);
>> amdgpu_bo_unref(&entry->base.bo);
>> @@ -3074,6 +3073,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>> struct amdgpu_bo_va_mapping *mapping, *tmp;
>> bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
>> struct amdgpu_bo *root;
>> + struct ttm_bo_global *glob = &ttm_bo_glob;
>> int i;
>>
>> amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
>> @@ -3105,6 +3105,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>> }
>>
>> amdgpu_vm_free_pts(adev, vm, NULL);
>> + spin_lock(&glob->lru_lock);
>> + root->vm_bo = NULL;
>> + spin_unlock(&glob->lru_lock);
>> amdgpu_bo_unreserve(root);
>> amdgpu_bo_unref(&root);
>> WARN_ON(vm->root.base.bo);
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 1494aebb8128..706cd60eb9e0 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -429,8 +429,8 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>> BUG_ON(!dma_resv_trylock(&bo->base._resv));
>>
>> r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
>> - if (r)
>> - dma_resv_unlock(&bo->base._resv);
>> +
>> + dma_resv_unlock(&bo->base._resv);
>>
>> return r;
>> }
>> @@ -455,23 +455,19 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
>> }
>> }
>>
>> -static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>> +static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo, bool enqueue)
>> {
>> struct ttm_bo_device *bdev = bo->bdev;
>> int ret;
>>
>> - ret = ttm_bo_individualize_resv(bo);
>> - if (ret) {
>> - /* Last resort, if we fail to allocate memory for the
>> - * fences block for the BO to become idle
>> - */
>> - dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
>> - 30 * HZ);
>> - spin_lock(&ttm_bo_glob.lru_lock);
>> - goto error;
>> - }
>> -
>> spin_lock(&ttm_bo_glob.lru_lock);
>> +
>> + if (enqueue)
>> + goto queue;
>> +
>> + if (bo->base.resv != &bo->base._resv)
>> + BUG_ON(!dma_resv_trylock(&bo->base._resv));
>> +
>> ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
>> if (!ret) {
>> if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
>> @@ -504,7 +500,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
>> dma_resv_unlock(&bo->base._resv);
>> }
>>
>> -error:
>> +queue:
>> kref_get(&bo->list_kref);
>> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>> spin_unlock(&ttm_bo_glob.lru_lock);
>> @@ -655,6 +651,16 @@ static void ttm_bo_release(struct kref *kref)
>> container_of(kref, struct ttm_buffer_object, kref);
>> struct ttm_bo_device *bdev = bo->bdev;
>> struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
>> + int ret;
>> +
>> + ret = ttm_bo_individualize_resv(bo);
>> + if (ret) {
>> + /* Last resort, if we fail to allocate memory for the
>> + * fences block for the BO to become idle
>> + */
>> + dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
>> + 30 * HZ);
>> + }
>>
>> if (bo->bdev->driver->release_notify)
>> bo->bdev->driver->release_notify(bo);
>> @@ -663,7 +669,7 @@ static void ttm_bo_release(struct kref *kref)
>> ttm_mem_io_lock(man, false);
>> ttm_mem_io_free_vm(bo);
>> ttm_mem_io_unlock(man);
>> - ttm_bo_cleanup_refs_or_queue(bo);
>> + ttm_bo_cleanup_refs_or_queue(bo, !!ret);
>> kref_put(&bo->list_kref, ttm_bo_release_list);
>> }
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-02-07 13:52 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-07 13:42 [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo Pan, Xinhui
2020-02-07 13:46 ` Christian König
2020-02-07 13:50 ` 回复: " Pan, Xinhui
2020-02-07 13:52 ` Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.