All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: "Christian König" <ckoenig.leichtzumerken@gmail.com>,
	dri-devel@lists.freedesktop.org
Subject: Re: [PATCH 4/5] drm/ttm: move the LRU into resource handling
Date: Mon, 23 Aug 2021 10:10:42 +0200	[thread overview]
Message-ID: <9bfdf5b8b9bb8890e1d7e0801b742196ffd1273e.camel@linux.intel.com> (raw)
In-Reply-To: <20210719115145.1260-4-christian.koenig@amd.com>

On Mon, 2021-07-19 at 13:51 +0200, Christian König wrote:
> This way we finally fix the problem that new resource are
> not immediately evict-able after allocation.
> 
> That has caused numerous problems including OOM on GDS handling
> and not being able to use TTM as general resource manager.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |   8 +-
>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c |   2 +-
>  drivers/gpu/drm/ttm/ttm_bo.c            | 101 ++-----------------
>  drivers/gpu/drm/ttm/ttm_bo_util.c       |   1 -
>  drivers/gpu/drm/ttm/ttm_device.c        |   4 +-
>  drivers/gpu/drm/ttm/ttm_resource.c      | 127
> ++++++++++++++++++++++++
>  include/drm/ttm/ttm_bo_api.h            |  16 ---
>  include/drm/ttm/ttm_bo_driver.h         |  29 +-----
>  include/drm/ttm/ttm_resource.h          |  35 +++++++
>  9 files changed, 177 insertions(+), 146 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 18246b5b6ee3..4b178a74b4e0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -643,12 +643,12 @@ void amdgpu_vm_move_to_lru_tail(struct
> amdgpu_device *adev,
>  
>         if (vm->bulk_moveable) {
>                 spin_lock(&adev->mman.bdev.lru_lock);
> -               ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> +               ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
>                 spin_unlock(&adev->mman.bdev.lru_lock);
>                 return;
>         }
>  
> -       memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> +       ttm_lru_bulk_move_init(&vm->lru_bulk_move);
>  
>         spin_lock(&adev->mman.bdev.lru_lock);
>         list_for_each_entry(bo_base, &vm->idle, vm_status) {
> @@ -658,11 +658,9 @@ void amdgpu_vm_move_to_lru_tail(struct
> amdgpu_device *adev,
>                 if (!bo->parent)
>                         continue;
>  
> -               ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource,
> -                                       &vm->lru_bulk_move);
> +               ttm_bo_move_to_lru_tail(&bo->tbo, &vm-
> >lru_bulk_move);
>                 if (shadow)
>                         ttm_bo_move_to_lru_tail(&shadow->tbo,
> -                                               shadow->tbo.resource,
>                                                 &vm->lru_bulk_move);
>         }
>         spin_unlock(&adev->mman.bdev.lru_lock);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index bf33724bed5c..b38eef37f1c8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -472,7 +472,7 @@ static void i915_ttm_adjust_lru(struct
> drm_i915_gem_object *obj)
>                         bo->priority = I915_TTM_PRIO_NO_PAGES;
>         }
>  
> -       ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> +       ttm_bo_move_to_lru_tail(bo, NULL);
>         spin_unlock(&bo->bdev->lru_lock);
>  }
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> b/drivers/gpu/drm/ttm/ttm_bo.c
> index 5a2dc712c632..09a62ad06b9d 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -69,95 +69,15 @@ static void ttm_bo_mem_space_debug(struct
> ttm_buffer_object *bo,
>         }
>  }
>  
> -static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
> -{
> -       struct ttm_device *bdev = bo->bdev;
> -
> -       list_del_init(&bo->lru);
> -
> -       if (bdev->funcs->del_from_lru_notify)
> -               bdev->funcs->del_from_lru_notify(bo);
> -}
> -
> -static void ttm_bo_bulk_move_set_pos(struct ttm_lru_bulk_move_pos
> *pos,
> -                                    struct ttm_buffer_object *bo)
> -{
> -       if (!pos->first)
> -               pos->first = bo;
> -       pos->last = bo;
> -}
> -
>  void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> -                            struct ttm_resource *mem,
>                              struct ttm_lru_bulk_move *bulk)
>  {
> -       struct ttm_device *bdev = bo->bdev;
> -       struct ttm_resource_manager *man;
> -
> -       if (!bo->deleted)
> -               dma_resv_assert_held(bo->base.resv);
> -
> -       if (bo->pin_count) {
> -               ttm_bo_del_from_lru(bo);
> -               return;
> -       }
> -
> -       man = ttm_manager_type(bdev, mem->mem_type);
> -       list_move_tail(&bo->lru, &man->lru[bo->priority]);
> -
> -       if (bdev->funcs->del_from_lru_notify)
> -               bdev->funcs->del_from_lru_notify(bo);
> -
> -       if (bulk && !bo->pin_count) {
> -               switch (bo->resource->mem_type) {
> -               case TTM_PL_TT:
> -                       ttm_bo_bulk_move_set_pos(&bulk->tt[bo-
> >priority], bo);
> -                       break;
> +       dma_resv_assert_held(bo->base.resv);
>  
> -               case TTM_PL_VRAM:
> -                       ttm_bo_bulk_move_set_pos(&bulk->vram[bo-
> >priority], bo);
> -                       break;
> -               }
> -       }
> +       ttm_resource_move_to_lru_tail(bo->resource, bulk);
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  
> -void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> -{
> -       unsigned i;
> -
> -       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -               struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
> -               struct ttm_resource_manager *man;
> -
> -               if (!pos->first)
> -                       continue;
> -
> -               dma_resv_assert_held(pos->first->base.resv);
> -               dma_resv_assert_held(pos->last->base.resv);
> -
> -               man = ttm_manager_type(pos->first->bdev, TTM_PL_TT);
> -               list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> -                                   &pos->last->lru);
> -       }
> -
> -       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -               struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
> -               struct ttm_resource_manager *man;
> -
> -               if (!pos->first)
> -                       continue;
> -
> -               dma_resv_assert_held(pos->first->base.resv);
> -               dma_resv_assert_held(pos->last->base.resv);
> -
> -               man = ttm_manager_type(pos->first->bdev,
> TTM_PL_VRAM);
> -               list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> -                                   &pos->last->lru);
> -       }
> -}
> -EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> -
>  static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
>                                   struct ttm_resource *mem, bool
> evict,
>                                   struct ttm_operation_ctx *ctx,
> @@ -339,7 +259,6 @@ static int ttm_bo_cleanup_refs(struct
> ttm_buffer_object *bo,
>                 return ret;
>         }
>  
> -       ttm_bo_del_from_lru(bo);
>         list_del_init(&bo->ddestroy);
>         spin_unlock(&bo->bdev->lru_lock);
>         ttm_bo_cleanup_memtype_use(bo);
> @@ -440,7 +359,7 @@ static void ttm_bo_release(struct kref *kref)
>                  */
>                 if (bo->pin_count) {
>                         bo->pin_count = 0;
> -                       ttm_bo_move_to_lru_tail(bo, bo->resource,
> NULL);
> +                       ttm_resource_move_to_lru_tail(bo->resource,
> NULL);
>                 }
>  
>                 kref_init(&bo->kref);
> @@ -453,7 +372,6 @@ static void ttm_bo_release(struct kref *kref)
>         }
>  
>         spin_lock(&bo->bdev->lru_lock);
> -       ttm_bo_del_from_lru(bo);
>         list_del(&bo->ddestroy);
>         spin_unlock(&bo->bdev->lru_lock);
>  
> @@ -667,15 +585,17 @@ int ttm_mem_evict_first(struct ttm_device
> *bdev,
>                         struct ww_acquire_ctx *ticket)
>  {
>         struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
> +       struct ttm_resource *res;
>         bool locked = false;
>         unsigned i;
>         int ret;
>  
>         spin_lock(&bdev->lru_lock);
>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -               list_for_each_entry(bo, &man->lru[i], lru) {
> +               list_for_each_entry(res, &man->lru[i], lru) {
>                         bool busy;
>  
> +                       bo = res->bo;

Follow up to previous review: What happens here if someone now
reassigns @res->bo and then kills @bo. At least it's not immediately
clear what's protecting from that. Isn't a kref_get_unless_zero() on
the bo needed here, and res->bo being assigned (and properly cleared on
bo destruction) under the lru_lock when needed?

Admittedly as you pointed out earlier we can't kref_put() the bo under
the lru lock but (if all else fails) one could perhaps defer the put to
a worker, or move the bo to lru tail and drop the lru lock iff
kref_put() may hit a zero refcount.

/Thomas








  reply	other threads:[~2021-08-23  8:10 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-19 11:51 [PATCH 1/5] drm/ttm: add a weak BO reference to the resource v2 Christian König
2021-07-19 11:51 ` [PATCH 2/5] drm/ttm: add ttm_resource_fini Christian König
2021-07-19 11:51 ` [PATCH 3/5] drm/ttm: add common accounting to the resource mgr Christian König
2021-07-19 11:51 ` [PATCH 4/5] drm/ttm: move the LRU into resource handling Christian König
2021-08-23  8:10   ` Thomas Hellström [this message]
2021-07-19 11:51 ` [PATCH 5/5] drm/ttm: add resource iterator Christian König
2021-08-23  7:56 ` [PATCH 1/5] drm/ttm: add a weak BO reference to the resource v2 Thomas Hellström
2021-08-23 10:02   ` Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9bfdf5b8b9bb8890e1d7e0801b742196ffd1273e.camel@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=ckoenig.leichtzumerken@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.