* [PATCH] drm/ttm: rework BO delayed delete. v2
@ 2020-02-11 13:43 Christian König
2020-02-11 14:10 ` Pan, Xinhui
0 siblings, 1 reply; 2+ messages in thread
From: Christian König @ 2020-02-11 13:43 UTC (permalink / raw)
To: Xinhui.Pan, amd-gfx, dri-devel
This patch reworks the whole delayed deletion of BOs which aren't idle.
Instead of having two counters for the BO structure we resurrect the BO
when we find that a deleted BO is not idle yet.
This has many advantages, especially that we don't need to
increment/decrement the BOs reference counter any more when it
moves on the LRUs.
v2: remove duplicate ttm_tt_destroy, fix holde lock for LRU move
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 217 +++++++++++++-----------------
drivers/gpu/drm/ttm/ttm_bo_util.c | 1 -
include/drm/ttm/ttm_bo_api.h | 11 +-
3 files changed, 97 insertions(+), 132 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1fbc36f05d89..bfc42a9e4fb4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -145,26 +145,6 @@ static inline uint32_t ttm_bo_type_flags(unsigned type)
return 1 << (type);
}
-static void ttm_bo_release_list(struct kref *list_kref)
-{
- struct ttm_buffer_object *bo =
- container_of(list_kref, struct ttm_buffer_object, list_kref);
- size_t acc_size = bo->acc_size;
-
- BUG_ON(kref_read(&bo->list_kref));
- BUG_ON(kref_read(&bo->kref));
- BUG_ON(bo->mem.mm_node != NULL);
- BUG_ON(!list_empty(&bo->lru));
- BUG_ON(!list_empty(&bo->ddestroy));
- ttm_tt_destroy(bo->ttm);
- atomic_dec(&ttm_bo_glob.bo_count);
- dma_fence_put(bo->moving);
- if (!ttm_bo_uses_embedded_gem_object(bo))
- dma_resv_fini(&bo->base._resv);
- bo->destroy(bo);
- ttm_mem_global_free(&ttm_mem_glob, acc_size);
-}
-
static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem)
{
@@ -181,21 +161,14 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
man = &bdev->man[mem->mem_type];
list_add_tail(&bo->lru, &man->lru[bo->priority]);
- kref_get(&bo->list_kref);
if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm &&
!(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
TTM_PAGE_FLAG_SWAPPED))) {
list_add_tail(&bo->swap, &ttm_bo_glob.swap_lru[bo->priority]);
- kref_get(&bo->list_kref);
}
}
-static void ttm_bo_ref_bug(struct kref *list_kref)
-{
- BUG();
-}
-
static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
{
struct ttm_bo_device *bdev = bo->bdev;
@@ -203,12 +176,10 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
if (!list_empty(&bo->swap)) {
list_del_init(&bo->swap);
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
notify = true;
}
if (!list_empty(&bo->lru)) {
list_del_init(&bo->lru);
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
notify = true;
}
@@ -421,8 +392,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
BUG_ON(!dma_resv_trylock(&bo->base._resv));
r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
- if (r)
- dma_resv_unlock(&bo->base._resv);
+ dma_resv_unlock(&bo->base._resv);
return r;
}
@@ -447,68 +417,10 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
}
}
-static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
-{
- struct ttm_bo_device *bdev = bo->bdev;
- int ret;
-
- ret = ttm_bo_individualize_resv(bo);
- if (ret) {
- /* Last resort, if we fail to allocate memory for the
- * fences block for the BO to become idle
- */
- dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
- 30 * HZ);
- spin_lock(&ttm_bo_glob.lru_lock);
- goto error;
- }
-
- spin_lock(&ttm_bo_glob.lru_lock);
- ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
- if (!ret) {
- if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
- ttm_bo_del_from_lru(bo);
- spin_unlock(&ttm_bo_glob.lru_lock);
- if (bo->base.resv != &bo->base._resv)
- dma_resv_unlock(&bo->base._resv);
-
- ttm_bo_cleanup_memtype_use(bo);
- dma_resv_unlock(bo->base.resv);
- return;
- }
-
- ttm_bo_flush_all_fences(bo);
-
- /*
- * Make NO_EVICT bos immediately available to
- * shrinkers, now that they are queued for
- * destruction.
- */
- if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
- bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
- ttm_bo_move_to_lru_tail(bo, NULL);
- }
-
- dma_resv_unlock(bo->base.resv);
- }
- if (bo->base.resv != &bo->base._resv) {
- ttm_bo_flush_all_fences(bo);
- dma_resv_unlock(&bo->base._resv);
- }
-
-error:
- kref_get(&bo->list_kref);
- list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_bo_glob.lru_lock);
-
- schedule_delayed_work(&bdev->wq,
- ((HZ / 100) < 1) ? 1 : HZ / 100);
-}
-
/**
* function ttm_bo_cleanup_refs
- * If bo idle, remove from delayed- and lru lists, and unref.
- * If not idle, do nothing.
+ * If bo idle, remove from lru lists, and unref.
+ * If not idle, block if possible.
*
* Must be called with lru_lock and reservation held, this function
* will drop the lru lock and optionally the reservation lock before returning.
@@ -576,14 +488,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
- kref_put(&bo->list_kref, ttm_bo_ref_bug);
-
spin_unlock(&ttm_bo_glob.lru_lock);
ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
+ ttm_bo_put(bo);
+
return 0;
}
@@ -605,8 +517,9 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
ddestroy);
- kref_get(&bo->list_kref);
list_move_tail(&bo->ddestroy, &removed);
+ if (!ttm_bo_get_unless_zero(bo))
+ continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
spin_unlock(&glob->lru_lock);
@@ -621,7 +534,7 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
spin_unlock(&glob->lru_lock);
}
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
spin_lock(&glob->lru_lock);
}
list_splice_tail(&removed, &bdev->ddestroy);
@@ -647,16 +560,68 @@ static void ttm_bo_release(struct kref *kref)
container_of(kref, struct ttm_buffer_object, kref);
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
+ size_t acc_size = bo->acc_size;
+ int ret;
- if (bo->bdev->driver->release_notify)
- bo->bdev->driver->release_notify(bo);
+ if (!bo->deleted) {
+ if (bo->bdev->driver->release_notify)
+ bo->bdev->driver->release_notify(bo);
- drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
- ttm_mem_io_lock(man, false);
- ttm_mem_io_free_vm(bo);
- ttm_mem_io_unlock(man);
- ttm_bo_cleanup_refs_or_queue(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
+ ttm_mem_io_lock(man, false);
+ ttm_mem_io_free_vm(bo);
+ ttm_mem_io_unlock(man);
+
+ ret = ttm_bo_individualize_resv(bo);
+ if (ret) {
+ /* Last resort, if we fail to allocate memory for the
+ * fences block for the BO to become idle
+ */
+ dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
+ 30 * HZ);
+ }
+ }
+
+ if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) {
+ /* The BO is not idle, resurrect it for delayed destroy */
+ ttm_bo_flush_all_fences(bo);
+ bo->deleted = true;
+
+ spin_lock(&ttm_bo_glob.lru_lock);
+
+ /*
+ * Make NO_EVICT bos immediately available to
+ * shrinkers, now that they are queued for
+ * destruction.
+ */
+ if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
+ bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
+ ttm_bo_move_to_lru_tail(bo, NULL);
+ }
+
+ kref_init(&bo->kref);
+ list_add_tail(&bo->ddestroy, &bdev->ddestroy);
+ spin_unlock(&ttm_bo_glob.lru_lock);
+
+ schedule_delayed_work(&bdev->wq,
+ ((HZ / 100) < 1) ? 1 : HZ / 100);
+ return;
+ }
+
+ spin_lock(&ttm_bo_glob.lru_lock);
+ ttm_bo_del_from_lru(bo);
+ list_del(&bo->ddestroy);
+ spin_unlock(&ttm_bo_glob.lru_lock);
+
+ ttm_bo_cleanup_memtype_use(bo);
+
+ BUG_ON(bo->mem.mm_node != NULL);
+ atomic_dec(&ttm_bo_glob.bo_count);
+ dma_fence_put(bo->moving);
+ if (!ttm_bo_uses_embedded_gem_object(bo))
+ dma_resv_fini(&bo->base._resv);
+ bo->destroy(bo);
+ ttm_mem_global_free(&ttm_mem_glob, acc_size);
}
void ttm_bo_put(struct ttm_buffer_object *bo)
@@ -759,8 +724,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->base.resv == ctx->resv) {
dma_resv_assert_held(bo->base.resv);
- if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
- || !list_empty(&bo->ddestroy))
+ if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted)
ret = true;
*locked = false;
if (busy)
@@ -841,6 +805,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
dma_resv_unlock(bo->base.resv);
continue;
}
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ continue;
+ }
break;
}
@@ -852,21 +821,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
}
if (!bo) {
- if (busy_bo)
- kref_get(&busy_bo->list_kref);
+ if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
+ busy_bo = NULL;
spin_unlock(&ttm_bo_glob.lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
- kref_put(&busy_bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(busy_bo);
return ret;
}
- kref_get(&bo->list_kref);
-
- if (!list_empty(&bo->ddestroy)) {
+ if (bo->deleted) {
ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
ctx->no_wait_gpu, locked);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
@@ -876,7 +843,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
if (locked)
ttm_bo_unreserve(bo);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
@@ -1288,7 +1255,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
kref_init(&bo->kref);
- kref_init(&bo->list_kref);
INIT_LIST_HEAD(&bo->lru);
INIT_LIST_HEAD(&bo->ddestroy);
INIT_LIST_HEAD(&bo->swap);
@@ -1808,11 +1774,18 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
spin_lock(&glob->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL)) {
- ret = 0;
- break;
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
+ NULL))
+ continue;
+
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ continue;
}
+
+ ret = 0;
+ break;
}
if (!ret)
break;
@@ -1823,11 +1796,9 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
return ret;
}
- kref_get(&bo->list_kref);
-
- if (!list_empty(&bo->ddestroy)) {
+ if (bo->deleted) {
ret = ttm_bo_cleanup_refs(bo, false, false, locked);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
@@ -1881,7 +1852,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
*/
if (locked)
dma_resv_unlock(bo->base.resv);
- kref_put(&bo->list_kref, ttm_bo_release_list);
+ ttm_bo_put(bo);
return ret;
}
EXPORT_SYMBOL(ttm_bo_swapout);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 2b0e5a088da0..0b1a43ba7af4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -507,7 +507,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
fbo->base.moving = NULL;
drm_vma_node_reset(&fbo->base.base.vma_node);
- kref_init(&fbo->base.list_kref);
kref_init(&fbo->base.kref);
fbo->base.destroy = &ttm_transfered_destroy;
fbo->base.acc_size = 0;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 66ca49db9633..b9bc1b00142e 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -135,18 +135,14 @@ struct ttm_tt;
* @num_pages: Actual number of pages.
* @acc_size: Accounted size for this object.
* @kref: Reference count of this buffer object. When this refcount reaches
- * zero, the object is put on the delayed delete list.
- * @list_kref: List reference count of this buffer object. This member is
- * used to avoid destruction while the buffer object is still on a list.
- * Lru lists may keep one refcount, the delayed delete list, and kref != 0
- * keeps one refcount. When this refcount reaches zero,
- * the object is destroyed.
+ * zero, the object is destroyed or put on the delayed delete list.
* @mem: structure describing current placement.
* @persistent_swap_storage: Usually the swap storage is deleted for buffers
* pinned in physical memory. If this behaviour is not desired, this member
* holds a pointer to a persistent shmem object.
* @ttm: TTM structure holding system pages.
* @evicted: Whether the object was evicted without user-space knowing.
+ * @deleted: True if the object is only a zombie and already deleted.
* @lru: List head for the lru list.
* @ddestroy: List head for the delayed destroy list.
* @swap: List head for swap LRU list.
@@ -183,9 +179,7 @@ struct ttm_buffer_object {
/**
* Members not needing protection.
*/
-
struct kref kref;
- struct kref list_kref;
/**
* Members protected by the bo::resv::reserved lock.
@@ -195,6 +189,7 @@ struct ttm_buffer_object {
struct file *persistent_swap_storage;
struct ttm_tt *ttm;
bool evicted;
+ bool deleted;
/**
* Members protected by the bdev::lru_lock.
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] drm/ttm: rework BO delayed delete. v2
2020-02-11 13:43 [PATCH] drm/ttm: rework BO delayed delete. v2 Christian König
@ 2020-02-11 14:10 ` Pan, Xinhui
0 siblings, 0 replies; 2+ messages in thread
From: Pan, Xinhui @ 2020-02-11 14:10 UTC (permalink / raw)
To: Christian König; +Cc: Pan, Xinhui, dri-devel, amd-gfx
Reviewed-by: xinhui pan <xinhui.pan@amd.com>
> 2020年2月11日 21:43,Christian König <ckoenig.leichtzumerken@gmail.com> 写道:
>
> This patch reworks the whole delayed deletion of BOs which aren't idle.
>
> Instead of having two counters for the BO structure we resurrect the BO
> when we find that a deleted BO is not idle yet.
>
> This has many advantages, especially that we don't need to
> increment/decrement the BOs reference counter any more when it
> moves on the LRUs.
>
> v2: remove duplicate ttm_tt_destroy, fix holde lock for LRU move
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 217 +++++++++++++-----------------
> drivers/gpu/drm/ttm/ttm_bo_util.c | 1 -
> include/drm/ttm/ttm_bo_api.h | 11 +-
> 3 files changed, 97 insertions(+), 132 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 1fbc36f05d89..bfc42a9e4fb4 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -145,26 +145,6 @@ static inline uint32_t ttm_bo_type_flags(unsigned type)
> return 1 << (type);
> }
>
> -static void ttm_bo_release_list(struct kref *list_kref)
> -{
> - struct ttm_buffer_object *bo =
> - container_of(list_kref, struct ttm_buffer_object, list_kref);
> - size_t acc_size = bo->acc_size;
> -
> - BUG_ON(kref_read(&bo->list_kref));
> - BUG_ON(kref_read(&bo->kref));
> - BUG_ON(bo->mem.mm_node != NULL);
> - BUG_ON(!list_empty(&bo->lru));
> - BUG_ON(!list_empty(&bo->ddestroy));
> - ttm_tt_destroy(bo->ttm);
> - atomic_dec(&ttm_bo_glob.bo_count);
> - dma_fence_put(bo->moving);
> - if (!ttm_bo_uses_embedded_gem_object(bo))
> - dma_resv_fini(&bo->base._resv);
> - bo->destroy(bo);
> - ttm_mem_global_free(&ttm_mem_glob, acc_size);
> -}
> -
> static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
> struct ttm_mem_reg *mem)
> {
> @@ -181,21 +161,14 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
>
> man = &bdev->man[mem->mem_type];
> list_add_tail(&bo->lru, &man->lru[bo->priority]);
> - kref_get(&bo->list_kref);
>
> if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm &&
> !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
> TTM_PAGE_FLAG_SWAPPED))) {
> list_add_tail(&bo->swap, &ttm_bo_glob.swap_lru[bo->priority]);
> - kref_get(&bo->list_kref);
> }
> }
>
> -static void ttm_bo_ref_bug(struct kref *list_kref)
> -{
> - BUG();
> -}
> -
> static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
> {
> struct ttm_bo_device *bdev = bo->bdev;
> @@ -203,12 +176,10 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
>
> if (!list_empty(&bo->swap)) {
> list_del_init(&bo->swap);
> - kref_put(&bo->list_kref, ttm_bo_ref_bug);
> notify = true;
> }
> if (!list_empty(&bo->lru)) {
> list_del_init(&bo->lru);
> - kref_put(&bo->list_kref, ttm_bo_ref_bug);
> notify = true;
> }
>
> @@ -421,8 +392,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> BUG_ON(!dma_resv_trylock(&bo->base._resv));
>
> r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
> - if (r)
> - dma_resv_unlock(&bo->base._resv);
> + dma_resv_unlock(&bo->base._resv);
>
> return r;
> }
> @@ -447,68 +417,10 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
> }
> }
>
> -static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
> -{
> - struct ttm_bo_device *bdev = bo->bdev;
> - int ret;
> -
> - ret = ttm_bo_individualize_resv(bo);
> - if (ret) {
> - /* Last resort, if we fail to allocate memory for the
> - * fences block for the BO to become idle
> - */
> - dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
> - 30 * HZ);
> - spin_lock(&ttm_bo_glob.lru_lock);
> - goto error;
> - }
> -
> - spin_lock(&ttm_bo_glob.lru_lock);
> - ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
> - if (!ret) {
> - if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
> - ttm_bo_del_from_lru(bo);
> - spin_unlock(&ttm_bo_glob.lru_lock);
> - if (bo->base.resv != &bo->base._resv)
> - dma_resv_unlock(&bo->base._resv);
> -
> - ttm_bo_cleanup_memtype_use(bo);
> - dma_resv_unlock(bo->base.resv);
> - return;
> - }
> -
> - ttm_bo_flush_all_fences(bo);
> -
> - /*
> - * Make NO_EVICT bos immediately available to
> - * shrinkers, now that they are queued for
> - * destruction.
> - */
> - if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
> - bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
> - ttm_bo_move_to_lru_tail(bo, NULL);
> - }
> -
> - dma_resv_unlock(bo->base.resv);
> - }
> - if (bo->base.resv != &bo->base._resv) {
> - ttm_bo_flush_all_fences(bo);
> - dma_resv_unlock(&bo->base._resv);
> - }
> -
> -error:
> - kref_get(&bo->list_kref);
> - list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> - spin_unlock(&ttm_bo_glob.lru_lock);
> -
> - schedule_delayed_work(&bdev->wq,
> - ((HZ / 100) < 1) ? 1 : HZ / 100);
> -}
> -
> /**
> * function ttm_bo_cleanup_refs
> - * If bo idle, remove from delayed- and lru lists, and unref.
> - * If not idle, do nothing.
> + * If bo idle, remove from lru lists, and unref.
> + * If not idle, block if possible.
> *
> * Must be called with lru_lock and reservation held, this function
> * will drop the lru lock and optionally the reservation lock before returning.
> @@ -576,14 +488,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>
> ttm_bo_del_from_lru(bo);
> list_del_init(&bo->ddestroy);
> - kref_put(&bo->list_kref, ttm_bo_ref_bug);
> -
> spin_unlock(&ttm_bo_glob.lru_lock);
> ttm_bo_cleanup_memtype_use(bo);
>
> if (unlock_resv)
> dma_resv_unlock(bo->base.resv);
>
> + ttm_bo_put(bo);
> +
> return 0;
> }
>
> @@ -605,8 +517,9 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
>
> bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
> ddestroy);
> - kref_get(&bo->list_kref);
> list_move_tail(&bo->ddestroy, &removed);
> + if (!ttm_bo_get_unless_zero(bo))
> + continue;
>
> if (remove_all || bo->base.resv != &bo->base._resv) {
> spin_unlock(&glob->lru_lock);
> @@ -621,7 +534,7 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
> spin_unlock(&glob->lru_lock);
> }
>
> - kref_put(&bo->list_kref, ttm_bo_release_list);
> + ttm_bo_put(bo);
> spin_lock(&glob->lru_lock);
> }
> list_splice_tail(&removed, &bdev->ddestroy);
> @@ -647,16 +560,68 @@ static void ttm_bo_release(struct kref *kref)
> container_of(kref, struct ttm_buffer_object, kref);
> struct ttm_bo_device *bdev = bo->bdev;
> struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
> + size_t acc_size = bo->acc_size;
> + int ret;
>
> - if (bo->bdev->driver->release_notify)
> - bo->bdev->driver->release_notify(bo);
> + if (!bo->deleted) {
> + if (bo->bdev->driver->release_notify)
> + bo->bdev->driver->release_notify(bo);
>
> - drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
> - ttm_mem_io_lock(man, false);
> - ttm_mem_io_free_vm(bo);
> - ttm_mem_io_unlock(man);
> - ttm_bo_cleanup_refs_or_queue(bo);
> - kref_put(&bo->list_kref, ttm_bo_release_list);
> + drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
> + ttm_mem_io_lock(man, false);
> + ttm_mem_io_free_vm(bo);
> + ttm_mem_io_unlock(man);
> +
> + ret = ttm_bo_individualize_resv(bo);
> + if (ret) {
> + /* Last resort, if we fail to allocate memory for the
> + * fences block for the BO to become idle
> + */
> + dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
> + 30 * HZ);
> + }
> + }
> +
> + if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) {
> + /* The BO is not idle, resurrect it for delayed destroy */
> + ttm_bo_flush_all_fences(bo);
> + bo->deleted = true;
> +
> + spin_lock(&ttm_bo_glob.lru_lock);
> +
> + /*
> + * Make NO_EVICT bos immediately available to
> + * shrinkers, now that they are queued for
> + * destruction.
> + */
> + if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
> + bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
> + ttm_bo_move_to_lru_tail(bo, NULL);
> + }
> +
> + kref_init(&bo->kref);
> + list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> + spin_unlock(&ttm_bo_glob.lru_lock);
> +
> + schedule_delayed_work(&bdev->wq,
> + ((HZ / 100) < 1) ? 1 : HZ / 100);
> + return;
> + }
> +
> + spin_lock(&ttm_bo_glob.lru_lock);
> + ttm_bo_del_from_lru(bo);
> + list_del(&bo->ddestroy);
> + spin_unlock(&ttm_bo_glob.lru_lock);
> +
> + ttm_bo_cleanup_memtype_use(bo);
> +
> + BUG_ON(bo->mem.mm_node != NULL);
> + atomic_dec(&ttm_bo_glob.bo_count);
> + dma_fence_put(bo->moving);
> + if (!ttm_bo_uses_embedded_gem_object(bo))
> + dma_resv_fini(&bo->base._resv);
> + bo->destroy(bo);
> + ttm_mem_global_free(&ttm_mem_glob, acc_size);
> }
>
> void ttm_bo_put(struct ttm_buffer_object *bo)
> @@ -759,8 +724,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>
> if (bo->base.resv == ctx->resv) {
> dma_resv_assert_held(bo->base.resv);
> - if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
> - || !list_empty(&bo->ddestroy))
> + if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted)
> ret = true;
> *locked = false;
> if (busy)
> @@ -841,6 +805,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> dma_resv_unlock(bo->base.resv);
> continue;
> }
> + if (!ttm_bo_get_unless_zero(bo)) {
> + if (locked)
> + dma_resv_unlock(bo->base.resv);
> + continue;
> + }
> break;
> }
>
> @@ -852,21 +821,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> }
>
> if (!bo) {
> - if (busy_bo)
> - kref_get(&busy_bo->list_kref);
> + if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
> + busy_bo = NULL;
> spin_unlock(&ttm_bo_glob.lru_lock);
> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
> if (busy_bo)
> - kref_put(&busy_bo->list_kref, ttm_bo_release_list);
> + ttm_bo_put(busy_bo);
> return ret;
> }
>
> - kref_get(&bo->list_kref);
> -
> - if (!list_empty(&bo->ddestroy)) {
> + if (bo->deleted) {
> ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
> ctx->no_wait_gpu, locked);
> - kref_put(&bo->list_kref, ttm_bo_release_list);
> + ttm_bo_put(bo);
> return ret;
> }
>
> @@ -876,7 +843,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> if (locked)
> ttm_bo_unreserve(bo);
>
> - kref_put(&bo->list_kref, ttm_bo_release_list);
> + ttm_bo_put(bo);
> return ret;
> }
>
> @@ -1288,7 +1255,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
> bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
>
> kref_init(&bo->kref);
> - kref_init(&bo->list_kref);
> INIT_LIST_HEAD(&bo->lru);
> INIT_LIST_HEAD(&bo->ddestroy);
> INIT_LIST_HEAD(&bo->swap);
> @@ -1808,11 +1774,18 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
> spin_lock(&glob->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> - if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> - NULL)) {
> - ret = 0;
> - break;
> + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> + NULL))
> + continue;
> +
> + if (!ttm_bo_get_unless_zero(bo)) {
> + if (locked)
> + dma_resv_unlock(bo->base.resv);
> + continue;
> }
> +
> + ret = 0;
> + break;
> }
> if (!ret)
> break;
> @@ -1823,11 +1796,9 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
> return ret;
> }
>
> - kref_get(&bo->list_kref);
> -
> - if (!list_empty(&bo->ddestroy)) {
> + if (bo->deleted) {
> ret = ttm_bo_cleanup_refs(bo, false, false, locked);
> - kref_put(&bo->list_kref, ttm_bo_release_list);
> + ttm_bo_put(bo);
> return ret;
> }
>
> @@ -1881,7 +1852,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
> */
> if (locked)
> dma_resv_unlock(bo->base.resv);
> - kref_put(&bo->list_kref, ttm_bo_release_list);
> + ttm_bo_put(bo);
> return ret;
> }
> EXPORT_SYMBOL(ttm_bo_swapout);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 2b0e5a088da0..0b1a43ba7af4 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -507,7 +507,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
> fbo->base.moving = NULL;
> drm_vma_node_reset(&fbo->base.base.vma_node);
>
> - kref_init(&fbo->base.list_kref);
> kref_init(&fbo->base.kref);
> fbo->base.destroy = &ttm_transfered_destroy;
> fbo->base.acc_size = 0;
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 66ca49db9633..b9bc1b00142e 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -135,18 +135,14 @@ struct ttm_tt;
> * @num_pages: Actual number of pages.
> * @acc_size: Accounted size for this object.
> * @kref: Reference count of this buffer object. When this refcount reaches
> - * zero, the object is put on the delayed delete list.
> - * @list_kref: List reference count of this buffer object. This member is
> - * used to avoid destruction while the buffer object is still on a list.
> - * Lru lists may keep one refcount, the delayed delete list, and kref != 0
> - * keeps one refcount. When this refcount reaches zero,
> - * the object is destroyed.
> + * zero, the object is destroyed or put on the delayed delete list.
> * @mem: structure describing current placement.
> * @persistent_swap_storage: Usually the swap storage is deleted for buffers
> * pinned in physical memory. If this behaviour is not desired, this member
> * holds a pointer to a persistent shmem object.
> * @ttm: TTM structure holding system pages.
> * @evicted: Whether the object was evicted without user-space knowing.
> + * @deleted: True if the object is only a zombie and already deleted.
> * @lru: List head for the lru list.
> * @ddestroy: List head for the delayed destroy list.
> * @swap: List head for swap LRU list.
> @@ -183,9 +179,7 @@ struct ttm_buffer_object {
> /**
> * Members not needing protection.
> */
> -
> struct kref kref;
> - struct kref list_kref;
>
> /**
> * Members protected by the bo::resv::reserved lock.
> @@ -195,6 +189,7 @@ struct ttm_buffer_object {
> struct file *persistent_swap_storage;
> struct ttm_tt *ttm;
> bool evicted;
> + bool deleted;
>
> /**
> * Members protected by the bdev::lru_lock.
> --
> 2.17.1
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-02-11 14:11 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-11 13:43 [PATCH] drm/ttm: rework BO delayed delete. v2 Christian König
2020-02-11 14:10 ` Pan, Xinhui
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).