All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Vetter <daniel@ffwll.ch>
To: "Christian König" <ckoenig.leichtzumerken@gmail.com>
Cc: thomas.hellstrom@linux.intel.com, ray.huang@amd.com,
	dri-devel@lists.freedesktop.org
Subject: Re: [PATCH 05/12] drm/ttm: move the LRU into resource handling v2
Date: Tue, 25 Jan 2022 17:52:04 +0100	[thread overview]
Message-ID: <YfAqtI95aewAb10L@phenom.ffwll.local> (raw)
In-Reply-To: <20220124122514.1832-6-christian.koenig@amd.com>

On Mon, Jan 24, 2022 at 01:25:07PM +0100, Christian König wrote:
> This way we finally fix the problem that new resource are
> not immediately evict-able after allocation.
> 
> That has caused numerous problems including OOM on GDS handling
> and not being able to use TTM as general resource manager.
> 
> v2: stop assuming in ttm_resource_fini that res->bo is still valid.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |   8 +-
>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c |   2 +-
>  drivers/gpu/drm/ttm/ttm_bo.c            | 115 ++--------------------
>  drivers/gpu/drm/ttm/ttm_bo_util.c       |   1 -
>  drivers/gpu/drm/ttm/ttm_device.c        |  64 ++++++-------
>  drivers/gpu/drm/ttm/ttm_resource.c      | 122 +++++++++++++++++++++++-
>  include/drm/ttm/ttm_bo_api.h            |  16 ----
>  include/drm/ttm/ttm_bo_driver.h         |  29 +-----
>  include/drm/ttm/ttm_resource.h          |  35 +++++++
>  9 files changed, 201 insertions(+), 191 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index a96ae4c0e040..cdb489cdf27b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -683,12 +683,12 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  
>  	if (vm->bulk_moveable) {
>  		spin_lock(&adev->mman.bdev.lru_lock);
> -		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> +		ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
>  		spin_unlock(&adev->mman.bdev.lru_lock);
>  		return;
>  	}
>  
> -	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> +	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
>  
>  	spin_lock(&adev->mman.bdev.lru_lock);
>  	list_for_each_entry(bo_base, &vm->idle, vm_status) {
> @@ -698,11 +698,9 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>  		if (!bo->parent)
>  			continue;
>  
> -		ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource,
> -					&vm->lru_bulk_move);
> +		ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
>  		if (shadow)
>  			ttm_bo_move_to_lru_tail(&shadow->tbo,
> -						shadow->tbo.resource,
>  						&vm->lru_bulk_move);
>  	}
>  	spin_unlock(&adev->mman.bdev.lru_lock);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index 218a9b3037c7..cdf1ff87c88d 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -835,7 +835,7 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
>  			bo->priority = I915_TTM_PRIO_NO_PAGES;
>  	}
>  
> -	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> +	ttm_bo_move_to_lru_tail(bo, NULL);
>  	spin_unlock(&bo->bdev->lru_lock);
>  }
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index db3dc7ef5382..cb0fa932d495 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -69,108 +69,16 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
>  	}
>  }
>  
> -static inline void ttm_bo_move_to_pinned(struct ttm_buffer_object *bo)
> -{
> -	struct ttm_device *bdev = bo->bdev;
> -
> -	list_move_tail(&bo->lru, &bdev->pinned);
> -
> -	if (bdev->funcs->del_from_lru_notify)
> -		bdev->funcs->del_from_lru_notify(bo);
> -}
> -
> -static inline void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
> -{
> -	struct ttm_device *bdev = bo->bdev;
> -
> -	list_del_init(&bo->lru);
> -
> -	if (bdev->funcs->del_from_lru_notify)
> -		bdev->funcs->del_from_lru_notify(bo);
> -}
> -
> -static void ttm_bo_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
> -				     struct ttm_buffer_object *bo)
> -{
> -	if (!pos->first)
> -		pos->first = bo;
> -	pos->last = bo;
> -}
> -
>  void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> -			     struct ttm_resource *mem,
>  			     struct ttm_lru_bulk_move *bulk)
>  {
> -	struct ttm_device *bdev = bo->bdev;
> -	struct ttm_resource_manager *man;
> -
> -	if (!bo->deleted)
> -		dma_resv_assert_held(bo->base.resv);
> -
> -	if (bo->pin_count) {
> -		ttm_bo_move_to_pinned(bo);
> -		return;
> -	}
> -
> -	if (!mem)
> -		return;
> -
> -	man = ttm_manager_type(bdev, mem->mem_type);
> -	list_move_tail(&bo->lru, &man->lru[bo->priority]);
> -
> -	if (bdev->funcs->del_from_lru_notify)
> -		bdev->funcs->del_from_lru_notify(bo);
> -
> -	if (bulk && !bo->pin_count) {
> -		switch (bo->resource->mem_type) {
> -		case TTM_PL_TT:
> -			ttm_bo_bulk_move_set_pos(&bulk->tt[bo->priority], bo);
> -			break;
> +	dma_resv_assert_held(bo->base.resv);
>  
> -		case TTM_PL_VRAM:
> -			ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
> -			break;
> -		}
> -	}
> +	if (bo->resource)
> +		ttm_resource_move_to_lru_tail(bo->resource, bulk);
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  
> -void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> -{
> -	unsigned i;
> -
> -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
> -		struct ttm_resource_manager *man;
> -
> -		if (!pos->first)
> -			continue;
> -
> -		dma_resv_assert_held(pos->first->base.resv);
> -		dma_resv_assert_held(pos->last->base.resv);
> -
> -		man = ttm_manager_type(pos->first->bdev, TTM_PL_TT);
> -		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> -				    &pos->last->lru);
> -	}
> -
> -	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
> -		struct ttm_resource_manager *man;
> -
> -		if (!pos->first)
> -			continue;
> -
> -		dma_resv_assert_held(pos->first->base.resv);
> -		dma_resv_assert_held(pos->last->base.resv);
> -
> -		man = ttm_manager_type(pos->first->bdev, TTM_PL_VRAM);
> -		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> -				    &pos->last->lru);
> -	}
> -}
> -EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> -
>  static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
>  				  struct ttm_resource *mem, bool evict,
>  				  struct ttm_operation_ctx *ctx,
> @@ -344,7 +252,6 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  		return ret;
>  	}
>  
> -	ttm_bo_move_to_pinned(bo);
>  	list_del_init(&bo->ddestroy);
>  	spin_unlock(&bo->bdev->lru_lock);
>  	ttm_bo_cleanup_memtype_use(bo);
> @@ -445,7 +352,7 @@ static void ttm_bo_release(struct kref *kref)
>  		 */
>  		if (bo->pin_count) {
>  			bo->pin_count = 0;
> -			ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> +			ttm_resource_move_to_lru_tail(bo->resource, NULL);
>  		}
>  
>  		kref_init(&bo->kref);
> @@ -458,7 +365,6 @@ static void ttm_bo_release(struct kref *kref)
>  	}
>  
>  	spin_lock(&bo->bdev->lru_lock);
> -	ttm_bo_del_from_lru(bo);
>  	list_del(&bo->ddestroy);
>  	spin_unlock(&bo->bdev->lru_lock);
>  
> @@ -673,15 +579,17 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  			struct ww_acquire_ctx *ticket)
>  {
>  	struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
> +	struct ttm_resource *res;
>  	bool locked = false;
>  	unsigned i;
>  	int ret;
>  
>  	spin_lock(&bdev->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> -		list_for_each_entry(bo, &man->lru[i], lru) {
> +		list_for_each_entry(res, &man->lru[i], lru) {
>  			bool busy;
>  
> +			bo = res->bo;
>  			if (!ttm_bo_evict_swapout_allowable(bo, ctx, place,
>  							    &locked, &busy)) {
>  				if (busy && !busy_bo && ticket !=
> @@ -699,7 +607,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  		}
>  
>  		/* If the inner loop terminated early, we have our candidate */
> -		if (&bo->lru != &man->lru[i])
> +		if (&res->lru != &man->lru[i])
>  			break;
>  
>  		bo = NULL;
> @@ -875,9 +783,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
>  	}
>  
>  error:
> -	if (bo->resource->mem_type == TTM_PL_SYSTEM && !bo->pin_count)
> -		ttm_bo_move_to_lru_tail_unlocked(bo);
> -
>  	return ret;
>  }
>  EXPORT_SYMBOL(ttm_bo_mem_space);
> @@ -971,7 +876,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>  	bo->destroy = destroy ? destroy : ttm_bo_default_destroy;
>  
>  	kref_init(&bo->kref);
> -	INIT_LIST_HEAD(&bo->lru);
>  	INIT_LIST_HEAD(&bo->ddestroy);
>  	bo->bdev = bdev;
>  	bo->type = type;
> @@ -1021,8 +925,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>  		return ret;
>  	}
>  
> -	ttm_bo_move_to_lru_tail_unlocked(bo);
> -
>  	return ret;
>  }
>  EXPORT_SYMBOL(ttm_bo_init_reserved);
> @@ -1123,7 +1025,6 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>  		return ret == -EBUSY ? -ENOSPC : ret;
>  	}
>  
> -	ttm_bo_move_to_pinned(bo);
>  	/* TODO: Cleanup the locking */
>  	spin_unlock(&bo->bdev->lru_lock);
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 544a84fa6589..0163e92b57af 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -231,7 +231,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>  
>  	atomic_inc(&ttm_glob.bo_count);
>  	INIT_LIST_HEAD(&fbo->base.ddestroy);
> -	INIT_LIST_HEAD(&fbo->base.lru);
>  	fbo->base.moving = NULL;
>  	drm_vma_node_reset(&fbo->base.base.vma_node);
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index be24bb6cefd0..ba35887147ba 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -144,6 +144,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>  {
>  	struct ttm_resource_manager *man;
>  	struct ttm_buffer_object *bo;
> +	struct ttm_resource *res;
>  	unsigned i, j;
>  	int ret;
>  
> @@ -154,8 +155,11 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>  			continue;
>  
>  		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> -			list_for_each_entry(bo, &man->lru[j], lru) {
> -				uint32_t num_pages = PFN_UP(bo->base.size);
> +			list_for_each_entry(res, &man->lru[j], lru) {
> +				uint32_t num_pages;
> +
> +				bo = res->bo;
> +				num_pages = PFN_UP(bo->base.size);
>  
>  				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
>  				/* ttm_bo_swapout has dropped the lru_lock */
> @@ -259,49 +263,45 @@ void ttm_device_fini(struct ttm_device *bdev)
>  }
>  EXPORT_SYMBOL(ttm_device_fini);
>  
> -void ttm_device_clear_dma_mappings(struct ttm_device *bdev)
> +static void ttm_device_clear_lru_dma_mappings(struct ttm_device *bdev,
> +					      struct list_head *list)
>  {
> -	struct ttm_resource_manager *man;
> -	struct ttm_buffer_object *bo;
> -	unsigned int i, j;
> +	struct ttm_resource *res;
>  
>  	spin_lock(&bdev->lru_lock);
> -	while (!list_empty(&bdev->pinned)) {
> -		bo = list_first_entry(&bdev->pinned, struct ttm_buffer_object, lru);
> +	while ((res = list_first_entry_or_null(list, typeof(*res), lru))) {
> +		struct ttm_buffer_object *bo = res->bo;
> +
>  		/* Take ref against racing releases once lru_lock is unlocked */
> -		if (ttm_bo_get_unless_zero(bo)) {
> -			list_del_init(&bo->lru);
> -			spin_unlock(&bdev->lru_lock);
> +		if (!ttm_bo_get_unless_zero(bo))
> +			continue;
>  
> -			if (bo->ttm)
> -				ttm_tt_unpopulate(bo->bdev, bo->ttm);
> +		list_del_init(&res->lru);
> +		spin_unlock(&bdev->lru_lock);
>  
> -			ttm_bo_put(bo);
> -			spin_lock(&bdev->lru_lock);
> -		}
> +		if (bo->ttm)
> +			ttm_tt_unpopulate(bo->bdev, bo->ttm);
> +
> +		ttm_bo_put(bo);
> +		spin_lock(&bdev->lru_lock);
>  	}
> +	spin_unlock(&bdev->lru_lock);
> +}
> +
> +void ttm_device_clear_dma_mappings(struct ttm_device *bdev)
> +{
> +	struct ttm_resource_manager *man;
> +	unsigned int i, j;
> +
> +	ttm_device_clear_lru_dma_mappings(bdev, &bdev->pinned);
>  
>  	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>  		man = ttm_manager_type(bdev, i);
>  		if (!man || !man->use_tt)
>  			continue;
>  
> -		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> -			while (!list_empty(&man->lru[j])) {
> -				bo = list_first_entry(&man->lru[j], struct ttm_buffer_object, lru);
> -				if (ttm_bo_get_unless_zero(bo)) {
> -					list_del_init(&bo->lru);
> -					spin_unlock(&bdev->lru_lock);
> -
> -					if (bo->ttm)
> -						ttm_tt_unpopulate(bo->bdev, bo->ttm);
> -
> -					ttm_bo_put(bo);
> -					spin_lock(&bdev->lru_lock);
> -				}
> -			}
> -		}
> +		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j)
> +			ttm_device_clear_lru_dma_mappings(bdev, &man->lru[j]);
>  	}
> -	spin_unlock(&bdev->lru_lock);
>  }
>  EXPORT_SYMBOL(ttm_device_clear_dma_mappings);
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index b8362492980d..450e665c357b 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -29,6 +29,106 @@
>  #include <drm/ttm/ttm_resource.h>
>  #include <drm/ttm/ttm_bo_driver.h>
>  
> +/**
> + * ttm_lru_bulk_move_init - initialize a bulk move structure
> + * @bulk: the structure to init
> + *
> + * For now just memset the structure to zero.
> + */
> +void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk)

I like the ttm_lru_bulk_move prefixe bikeshed.

> +{
> +	memset(bulk, 0, sizeof(*bulk));
> +}
> +EXPORT_SYMBOL(ttm_lru_bulk_move_init);
> +
> +/**
> + * ttm_lru_bulk_move_tail
> + *
> + * @bulk: bulk move structure
> + *
> + * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
> + * resource order never changes. Should be called with ttm_device::lru_lock held.

							  ^
This is a doxygen link or something, defo not sphinx.  Pls fix.

Also I think a bit more explaining how exactly this works (e.g. mention
that you need to guarantee that _all_ the bos in your bulk lru section
need to be locked) and other things would be good here. I know this is
just moved, but given how much fun you guys had to debug corner cases I
think it's worth to elaborate here a bit.

> + */
> +void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk)
> +{
> +	unsigned i;

Also assert_lockdep_held here for the lru lock, due to all the callers all
over.

> +
> +	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
> +		struct ttm_resource_manager *man;
> +
> +		if (!pos->first)
> +			continue;
> +
> +		dma_resv_assert_held(pos->first->bo->base.resv);

btw random idea for debug mode with lockdep enabled: Loop from first ->
last and check that they're _all_ locked? Also catches the case where
first/last aren't on the same list, and other screw-ups. And performance
with lockdep enabled is pretty bad anyway, so the full list walk here
shouldn't be a big disaster.

Or maybe only do that when we have the special dma_resv debug Kconfig
enabled.

> +		dma_resv_assert_held(pos->last->bo->base.resv);
> +
> +		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_TT);
> +		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> +				    &pos->last->lru);
> +	}
> +
> +	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
> +		struct ttm_resource_manager *man;
> +
> +		if (!pos->first)
> +			continue;
> +
> +		dma_resv_assert_held(pos->first->bo->base.resv);
> +		dma_resv_assert_held(pos->last->bo->base.resv);
> +
> +		man = ttm_manager_type(pos->first->bo->bdev, TTM_PL_VRAM);
> +		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> +				    &pos->last->lru);
> +	}
> +}
> +EXPORT_SYMBOL(ttm_lru_bulk_move_tail);
> +
> +/* Record a resource position in a bulk move structure */
> +static void ttm_lru_bulk_move_set_pos(struct ttm_lru_bulk_move_pos *pos,
> +				      struct ttm_resource *res)
> +{
> +	if (!pos->first)
> +		pos->first = res;
> +	pos->last = res;
> +}
> +
> +/* Move a resource to the LRU tail and track the bulk position */
> +void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
> +				   struct ttm_lru_bulk_move *bulk)
> +{
> +	struct ttm_buffer_object *bo = res->bo;
> +	struct ttm_device *bdev = bo->bdev;
> +	struct ttm_resource_manager *man;

I think lockdep_assert_held here on the lru lock would be good, since
there's quite a few callers all over.

Also maybe double-check that the dma_resv is also held, like in the bulk
move. I get why the bulk move checks more since it's easier to screw that
up, but checking here is probably also good.

> +
> +	if (bo->pin_count) {
> +		list_move_tail(&res->lru, &bdev->pinned);
> +		if (bdev->funcs->del_from_lru_notify)
> +			bdev->funcs->del_from_lru_notify(res->bo);
> +		return;
> +	}
> +
> +	man = ttm_manager_type(bdev, res->mem_type);
> +	list_move_tail(&res->lru, &man->lru[bo->priority]);
> +
> +	if (bdev->funcs->del_from_lru_notify)
> +		bdev->funcs->del_from_lru_notify(bo);
> +
> +	if (!bulk)
> +		return;
> +
> +	switch (res->mem_type) {
> +	case TTM_PL_TT:
> +		ttm_lru_bulk_move_set_pos(&bulk->tt[bo->priority], res);
> +		break;
> +
> +	case TTM_PL_VRAM:
> +		ttm_lru_bulk_move_set_pos(&bulk->vram[bo->priority], res);
> +		break;
> +	}
> +}
> +
>  void ttm_resource_init(struct ttm_buffer_object *bo,
>                         const struct ttm_place *place,
>                         struct ttm_resource *res)
> @@ -44,16 +144,36 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
>  	res->bus.is_iomem = false;
>  	res->bus.caching = ttm_cached;
>  	res->bo = bo;
> +	INIT_LIST_HEAD(&res->lru);
>  
>  	man = ttm_manager_type(bo->bdev, place->mem_type);
>  	atomic64_add(bo->base.size, &man->usage);
> +
> +	spin_lock(&bo->bdev->lru_lock);
> +	ttm_resource_move_to_lru_tail(res, NULL);
> +	spin_unlock(&bo->bdev->lru_lock);
>  }
>  EXPORT_SYMBOL(ttm_resource_init);
>  
> +/**
> + * ttm_resource_fini
> + *
> + * @res: the resource to clean up
> + *
> + * Make sure the resource is removed from the LRU before destruction.
> + */

Ah kerneldoc should probably be put into the earlier patch and explain
more when driver authors need to call this, and not so much what it
internally all needs to guaranteed.

>  void ttm_resource_fini(struct ttm_resource_manager *man,
>  		       struct ttm_resource *res)
>  {
> -	atomic64_sub(res->bo->base.size, &man->usage);
> +	struct ttm_device *bdev = man->bdev;
> +
> +	spin_lock(&bdev->lru_lock);
> +	list_del_init(&res->lru);
> +	if (res->bo && bdev->funcs->del_from_lru_notify)
> +		bdev->funcs->del_from_lru_notify(res->bo);
> +	spin_unlock(&bdev->lru_lock);
> +
> +	atomic64_sub(res->num_pages << PAGE_SHIFT, &man->usage);

Yeah more reasons that you really don't need an atomic here. I think if
you reorder the patches to first move the lru and then add account it will
be blantantly obvious that we don't need the resource tracking to be
atomic.


>  }
>  EXPORT_SYMBOL(ttm_resource_fini);
>  
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index c17b2df9178b..3da77fc54552 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -55,8 +55,6 @@ struct ttm_placement;
>  
>  struct ttm_place;
>  
> -struct ttm_lru_bulk_move;
> -
>  /**
>   * enum ttm_bo_type
>   *
> @@ -94,7 +92,6 @@ struct ttm_tt;
>   * @ttm: TTM structure holding system pages.
>   * @evicted: Whether the object was evicted without user-space knowing.
>   * @deleted: True if the object is only a zombie and already deleted.
> - * @lru: List head for the lru list.
>   * @ddestroy: List head for the delayed destroy list.
>   * @swap: List head for swap LRU list.
>   * @moving: Fence set when BO is moving
> @@ -143,7 +140,6 @@ struct ttm_buffer_object {
>  	 * Members protected by the bdev::lru_lock.
>  	 */
>  
> -	struct list_head lru;
>  	struct list_head ddestroy;
>  
>  	/**
> @@ -295,7 +291,6 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
>   * ttm_bo_move_to_lru_tail
>   *
>   * @bo: The buffer object.
> - * @mem: Resource object.
>   * @bulk: optional bulk move structure to remember BO positions
>   *
>   * Move this BO to the tail of all lru lists used to lookup and reserve an
> @@ -303,19 +298,8 @@ void ttm_bo_put(struct ttm_buffer_object *bo);
>   * held, and is used to make a BO less likely to be considered for eviction.
>   */
>  void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> -			     struct ttm_resource *mem,
>  			     struct ttm_lru_bulk_move *bulk);
>  
> -/**
> - * ttm_bo_bulk_move_lru_tail
> - *
> - * @bulk: bulk move structure
> - *
> - * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
> - * BO order never changes. Should be called with ttm_global::lru_lock held.
> - */
> -void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk);
> -
>  /**
>   * ttm_bo_lock_delayed_workqueue
>   *
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index 5f087575194b..6c7352e13708 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -45,33 +45,6 @@
>  #include "ttm_tt.h"
>  #include "ttm_pool.h"
>  
> -/**
> - * struct ttm_lru_bulk_move_pos
> - *
> - * @first: first BO in the bulk move range
> - * @last: last BO in the bulk move range
> - *
> - * Positions for a lru bulk move.
> - */
> -struct ttm_lru_bulk_move_pos {
> -	struct ttm_buffer_object *first;
> -	struct ttm_buffer_object *last;
> -};
> -
> -/**
> - * struct ttm_lru_bulk_move
> - *
> - * @tt: first/last lru entry for BOs in the TT domain
> - * @vram: first/last lru entry for BOs in the VRAM domain
> - * @swap: first/last lru entry for BOs on the swap list
> - *
> - * Helper structure for bulk moves on the LRU list.
> - */
> -struct ttm_lru_bulk_move {
> -	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> -	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> -};
> -
>  /*
>   * ttm_bo.c
>   */
> @@ -182,7 +155,7 @@ static inline void
>  ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>  {
>  	spin_lock(&bo->bdev->lru_lock);
> -	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
> +	ttm_bo_move_to_lru_tail(bo, NULL);
>  	spin_unlock(&bo->bdev->lru_lock);
>  }
>  
> diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> index 3d391279b33f..a54d52517a30 100644
> --- a/include/drm/ttm/ttm_resource.h
> +++ b/include/drm/ttm/ttm_resource.h
> @@ -26,10 +26,12 @@
>  #define _TTM_RESOURCE_H_
>  
>  #include <linux/types.h>
> +#include <linux/list.h>
>  #include <linux/mutex.h>
>  #include <linux/atomic.h>
>  #include <linux/dma-buf-map.h>
>  #include <linux/dma-fence.h>
> +
>  #include <drm/drm_print.h>
>  #include <drm/ttm/ttm_caching.h>
>  #include <drm/ttm/ttm_kmap_iter.h>
> @@ -178,6 +180,33 @@ struct ttm_resource {
>  	uint32_t placement;
>  	struct ttm_bus_placement bus;
>  	struct ttm_buffer_object *bo;

Kerneldoc missing here. Please use an in-line one which explains:
- locking
- refcounting
- anything else that ever matters really, like that we guarantee that
  statistics gathered are consistent when you hold the lru lock
- Maybe link to the bulk lru movement for added doc goodness?

> +	struct list_head lru;
> +};
> +
> +/**
> + * struct ttm_lru_bulk_move_pos
> + *
> + * @first: first res in the bulk move range
> + * @last: last res in the bulk move range
> + *
> + * Positions for a lru bulk move.
> + */
> +struct ttm_lru_bulk_move_pos {
> +	struct ttm_resource *first;
> +	struct ttm_resource *last;
> +};
> +
> +/**
> + * struct ttm_lru_bulk_move
> + *
> + * @tt: first/last lru entry for resources in the TT domain
> + * @vram: first/last lru entry for resources in the VRAM domain
> + *
> + * Helper structure for bulk moves on the LRU list.

I know it's just moved, but please also link to the bulk lru functions
here.

> + */
> +struct ttm_lru_bulk_move {
> +	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> +	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
>  };
>  
>  /**
> @@ -279,6 +308,12 @@ ttm_resource_manager_usage(struct ttm_resource_manager *man)
>  	return atomic64_read(&man->usage);
>  }
>  
> +void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
> +void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
> +
> +void ttm_resource_move_to_lru_tail(struct ttm_resource *res,
> +				   struct ttm_lru_bulk_move *bulk);
> +
>  void ttm_resource_init(struct ttm_buffer_object *bo,
>                         const struct ttm_place *place,
>                         struct ttm_resource *res);

I didn't check all the code movements in detail, but looks good with a few
spot checks. With the bikesheds addressed somehow:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

  reply	other threads:[~2022-01-25 16:52 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-24 12:25 drm/ttm: moving the LRU into the resource Christian König
2022-01-24 12:25 ` [PATCH 01/12] drm/ttm: add ttm_resource_fini Christian König
2022-01-25 16:26   ` Daniel Vetter
2022-01-24 12:25 ` [PATCH 02/12] drm/ttm: add back a reference to the bdev to the res manager Christian König
2022-01-25 16:30   ` Daniel Vetter
2022-01-24 12:25 ` [PATCH 03/12] drm/ttm: add a weak BO reference to the resource v3 Christian König
2022-01-24 12:25 ` [PATCH 04/12] drm/ttm: add common accounting to the resource mgr v2 Christian König
2022-01-25 16:37   ` Daniel Vetter
2022-01-26 14:42     ` Christian König
2022-01-27  8:48       ` Daniel Vetter
2022-01-24 12:25 ` [PATCH 05/12] drm/ttm: move the LRU into resource handling v2 Christian König
2022-01-25 16:52   ` Daniel Vetter [this message]
2022-01-24 12:25 ` [PATCH 06/12] drm/ttm: add resource iterator Christian König
2022-01-25 16:56   ` Daniel Vetter
2022-01-24 12:25 ` [PATCH 07/12] drm/radeon: remove resource accounting Christian König
2022-01-24 12:25 ` [PATCH 08/12] drm/amdgpu: remove GTT accounting Christian König
2022-01-24 12:25 ` [PATCH 09/12] drm/amdgpu: remove VRAM accounting Christian König
2022-01-24 12:25 ` [PATCH 10/12] drm/amdgpu: drop amdgpu_gtt_node Christian König
2022-01-24 12:25 ` [PATCH 11/12] drm/ttm: allow bulk moves for all domains Christian König
2022-01-25 17:16   ` Daniel Vetter
2022-01-24 12:25 ` [PATCH 12/12] drm/ttm: rework bulk move handling Christian König
2022-01-25 17:12   ` Daniel Vetter
  -- strict thread matches above, loose matches on Subject: below --
2021-11-24 12:44 drm/ttm: moving the LRU into the resource Christian König
2021-11-24 12:44 ` [PATCH 05/12] drm/ttm: move the LRU into resource handling v2 Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YfAqtI95aewAb10L@phenom.ffwll.local \
    --to=daniel@ffwll.ch \
    --cc=ckoenig.leichtzumerken@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=ray.huang@amd.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.