All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <ckoenig.leichtzumerken@gmail.com>
To: Chunming Zhou <david1.zhou@amd.com>,
	Christian.Koenig@amd.com, Prike.Liang@amd.com,
	dri-devel@lists.freedesktop.org
Subject: Re: [PATCH] drm/ttm: fix busy memory to fail other user v5
Date: Tue, 7 May 2019 11:03:42 +0200	[thread overview]
Message-ID: <3ea80c26-d500-2ccf-440c-bdec05bca68b@gmail.com> (raw)
In-Reply-To: <6f501d5f-1ac1-42e9-88af-c699d6fe44fe@gmail.com>

Ping!

Marek is going to need this for its GDS patches as well.

Christina.

Am 30.04.19 um 11:10 schrieb Christian König:
> Am 30.04.19 um 09:01 schrieb Chunming Zhou:
>> heavy gpu job could occupy memory long time, which lead other user 
>> fail to get memory.
>>
>> basically pick up Christian idea:
>>
>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>> 2. If we then run into this EBUSY condition in TTM check if the BO we 
>> need memory for (or rather the ww_mutex of its reservation object) 
>> has a ticket assigned.
>> 3. If we have a ticket we grab a reference to the first BO on the 
>> LRU, drop the LRU lock and try to grab the reservation lock with the 
>> ticket.
>> 4. If getting the reservation lock with the ticket succeeded we check 
>> if the BO is still the first one on the LRU in question (the BO could 
>> have moved).
>> 5. If the BO is still the first one on the LRU in question we try to 
>> evict it as we would evict any other BO.
>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>
>> v2: fix some minor check
>> v3: address Christian v2 comments.
>> v4: fix some missing
>> v5: handle first_bo unlock and bo_get/put
>>
>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  7 +-
>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 +++--
>>   drivers/gpu/drm/ttm/ttm_bo.c                  | 81 +++++++++++++++++--
>>   3 files changed, 99 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index affde72b44db..523773e85284 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -811,7 +811,12 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
>> *bo, u32 domain,
>>                    u64 min_offset, u64 max_offset)
>>   {
>>       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> -    struct ttm_operation_ctx ctx = { false, false };
>> +    struct ttm_operation_ctx ctx = {
>> +        .interruptible = false,
>> +        .no_wait_gpu = false,
>> +        .resv = bo->tbo.resv,
>> +        .flags = 0
>> +    };
>>       int r, i;
>>         if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index a5cacf846e1b..cc3677c4a4c2 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -4101,6 +4101,9 @@ static int dm_plane_helper_prepare_fb(struct 
>> drm_plane *plane,
>>       struct amdgpu_device *adev;
>>       struct amdgpu_bo *rbo;
>>       struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
>> +    struct list_head list, duplicates;
>> +    struct ttm_validate_buffer tv;
>> +    struct ww_acquire_ctx ticket;
>>       uint64_t tiling_flags;
>>       uint32_t domain;
>>       int r;
>> @@ -4117,9 +4120,18 @@ static int dm_plane_helper_prepare_fb(struct 
>> drm_plane *plane,
>>       obj = new_state->fb->obj[0];
>>       rbo = gem_to_amdgpu_bo(obj);
>>       adev = amdgpu_ttm_adev(rbo->tbo.bdev);
>> -    r = amdgpu_bo_reserve(rbo, false);
>> -    if (unlikely(r != 0))
>> +    INIT_LIST_HEAD(&list);
>> +    INIT_LIST_HEAD(&duplicates);
>> +
>> +    tv.bo = &rbo->tbo;
>> +    tv.num_shared = 1;
>> +    list_add(&tv.head, &list);
>> +
>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>> +    if (r) {
>> +        dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
>>           return r;
>> +    }
>>         if (plane->type != DRM_PLANE_TYPE_CURSOR)
>>           domain = amdgpu_display_supported_domains(adev);
>> @@ -4130,21 +4142,21 @@ static int dm_plane_helper_prepare_fb(struct 
>> drm_plane *plane,
>>       if (unlikely(r != 0)) {
>>           if (r != -ERESTARTSYS)
>>               DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
>> -        amdgpu_bo_unreserve(rbo);
>> +        ttm_eu_backoff_reservation(&ticket, &list);
>>           return r;
>>       }
>>         r = amdgpu_ttm_alloc_gart(&rbo->tbo);
>>       if (unlikely(r != 0)) {
>>           amdgpu_bo_unpin(rbo);
>> -        amdgpu_bo_unreserve(rbo);
>> +        ttm_eu_backoff_reservation(&ticket, &list);
>>           DRM_ERROR("%p bind failed\n", rbo);
>>           return r;
>>       }
>>         amdgpu_bo_get_tiling_flags(rbo, &tiling_flags);
>>   -    amdgpu_bo_unreserve(rbo);
>> +    ttm_eu_backoff_reservation(&ticket, &list);
>
> Well I can only repeat myself, please put that into a separate patch!
>
>>         afb->address = amdgpu_bo_gpu_offset(rbo);
>>   diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
>> b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 8502b3ed2d88..2c4963e105d9 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>    * b. Otherwise, trylock it.
>>    */
>>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object 
>> *bo,
>> -            struct ttm_operation_ctx *ctx, bool *locked)
>> +            struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>   {
>>       bool ret = false;
>>         *locked = false;
>> +    if (busy)
>> +        *busy = false;
>>       if (bo->resv == ctx->resv) {
>>           reservation_object_assert_held(bo->resv);
>>           if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>> @@ -779,6 +781,8 @@ static bool ttm_bo_evict_swapout_allowable(struct 
>> ttm_buffer_object *bo,
>>       } else {
>>           *locked = reservation_object_trylock(bo->resv);
>>           ret = *locked;
>> +        if (!ret && busy)
>> +            *busy = true;
>>       }
>>         return ret;
>> @@ -791,7 +795,7 @@ static int ttm_mem_evict_first(struct 
>> ttm_bo_device *bdev,
>>   {
>>       struct ttm_bo_global *glob = bdev->glob;
>>       struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>> -    struct ttm_buffer_object *bo = NULL;
>> +    struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
>>       bool locked = false;
>>       unsigned i;
>>       int ret;
>> @@ -799,8 +803,15 @@ static int ttm_mem_evict_first(struct 
>> ttm_bo_device *bdev,
>>       spin_lock(&glob->lru_lock);
>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>           list_for_each_entry(bo, &man->lru[i], lru) {
>> -            if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
>> +            bool busy = false;
>> +            if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>> +                                &busy)) {
>> +                if (!first_bo && busy) {
>> +                    ttm_bo_get(bo);
>> +                    first_bo = bo;
>> +                }
>>                   continue;
>> +            }
>>                 if (place && !bdev->driver->eviction_valuable(bo,
>>                                         place)) {
>> @@ -808,6 +819,7 @@ static int ttm_mem_evict_first(struct 
>> ttm_bo_device *bdev,
>>                       reservation_object_unlock(bo->resv);
>>                   continue;
>>               }
>> +
>>               break;
>>           }
>>   @@ -820,7 +832,65 @@ static int ttm_mem_evict_first(struct 
>> ttm_bo_device *bdev,
>>         if (!bo) {
>>           spin_unlock(&glob->lru_lock);
>> -        return -EBUSY;
>> +        /* check if other user occupy memory too long time */
>> +        if (!first_bo || !ctx || !ctx->resv || !ctx->resv->lock.ctx) {
>> +            if (first_bo)
>> +                ttm_bo_put(first_bo);
>> +            return -EBUSY;
>> +        }
>> +        if (ctx->interruptible)
>> +            ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
>> +                              ctx->resv->lock.ctx);
>> +        else
>> +            ret = ww_mutex_lock(&first_bo->resv->lock, 
>> ctx->resv->lock.ctx);
>> +        if (ret) {
>> +            ttm_bo_put(first_bo);
>> +            return ret;
>> +        }
>> +        spin_lock(&glob->lru_lock);
>> +        for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> +            /* previous busy resv lock is held by above, idle now,
>> +             * so let them evictable.
>> +             */
>> +            struct ttm_operation_ctx busy_ctx = {
>> +                .interruptible = ctx->interruptible,
>> +                .no_wait_gpu   = ctx->no_wait_gpu,
>> +                .resv           = first_bo->resv,
>> +                .flags           = TTM_OPT_FLAG_ALLOW_RES_EVICT
>> +            };
>> +            list_for_each_entry(bo, &man->lru[i], lru) {
>> +                if (!ttm_bo_evict_swapout_allowable(bo,
>> +                                    &busy_ctx,
>> +                                    &locked,
>> +                                    NULL))
>> +                    continue;
>> +
>> +                if (place && !bdev->driver->eviction_valuable(bo,
>> +                                      place)) {
>> +                    if (locked)
>> +                        reservation_object_unlock(bo->resv);
>> +                    continue;
>> +                }
>> +                break;
>> +            }
>> +            /* If the inner loop terminated early, we have our 
>> candidate */
>> +            if (&bo->lru != &man->lru[i])
>> +                break;
>> +            bo = NULL;
>> +        }
>
> So we are now searching for any BO again which matches the criteria? 
> Good idea.
>
> Can we factor those two loops out into a separate function? I mean 
> they are mostly identical now to the one above, aren't they?
>
>> +        if (bo && (bo->resv == first_bo->resv))
>> +            locked = true;
>> +        else if (bo)
>> +            ww_mutex_unlock(&first_bo->resv->lock);
>> +        ttm_bo_put(first_bo);
>> +        first_bo = NULL;
>> +        if (!bo) {
>> +            spin_unlock(&glob->lru_lock);
>> +            return -EBUSY;
>> +        }
>> +    } else {
>> +        if (first_bo)
>> +            ttm_bo_put(first_bo);
>
> This must come a bit later, since we can't call ttm_bo_put() while 
> holding the spinlock.
>
> Regards,
> Christian.
>
>>       }
>>         kref_get(&bo->list_kref);
>> @@ -1784,7 +1854,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, 
>> struct ttm_operation_ctx *ctx)
>>       spin_lock(&glob->lru_lock);
>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>           list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>> -            if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
>> +            if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>> +                               NULL)) {
>>                   ret = 0;
>>                   break;
>>               }
>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

  reply	other threads:[~2019-05-07  9:03 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-30  7:01 [PATCH] drm/ttm: fix busy memory to fail other user v5 Chunming Zhou
2019-04-30  9:10 ` Christian König
2019-05-07  9:03   ` Christian König [this message]
2019-05-07  9:38     ` zhoucm1
  -- strict thread matches above, loose matches on Subject: below --
2019-04-26 10:20 Chunming Zhou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3ea80c26-d500-2ccf-440c-bdec05bca68b@gmail.com \
    --to=ckoenig.leichtzumerken@gmail.com \
    --cc=Christian.Koenig@amd.com \
    --cc=Prike.Liang@amd.com \
    --cc=david1.zhou@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.