All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Marek Olšák" <maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
To: "Christian König" <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
Cc: zhoucm1 <david1.zhou-5C7GfCeVMHo@public.gmane.org>,
	"Olsak, Marek" <Marek.Olsak-5C7GfCeVMHo@public.gmane.org>,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
Date: Fri, 25 Aug 2017 18:22:12 +0200	[thread overview]
Message-ID: <CAAxE2A5dSR-PY+zZ3VeaT7iiCmj5jfty0hv7XZjz4HgOrApQHw@mail.gmail.com> (raw)
In-Reply-To: <9304342a-def2-187e-4e9c-d872c58cdc17-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>

On Fri, Aug 25, 2017 at 3:00 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>
>>
>>
>> On 2017年08月25日 17:38, Christian König wrote:
>>>
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>>
>>> Still WIP since not all corner cases are tested yet, but this reduces
>>> average
>>> CS overhead for 10K BOs from 21ms down to 48us.
>>
>> Wow, cheers, eventually you get per vm bo to same reservation with PD/pts,
>> indeed save a lot of bo list.
>
>
> Don't cheer to loud yet, that is a completely constructed test case.
>
> So far I wasn't able to archive any improvements with any real game on this
> with Mesa.
>
> BTW: Marek can you take a look with some CPU bound tests? I can provide a
> kernel branch if necessary.

Do you have a branch that works on Raven? This patch series doesn't,
and I didn't investigate why.

Marek

>
> Regards,
> Christian.
>
>
>> overall looks good, I will take a detailed check for this tomorrow.
>>
>> Regards,
>> David Zhou
>>>
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59
>>> ++++++++++++++++++++++---------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>>>   5 files changed, 51 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index b1e817c..21cab36 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>    */
>>>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long
>>> size,
>>> -                int alignment, u32 initial_domain,
>>> -                u64 flags, bool kernel,
>>> -                struct drm_gem_object **obj);
>>> +                 int alignment, u32 initial_domain,
>>> +                 u64 flags, bool kernel,
>>> +                 struct reservation_object *resv,
>>> +                 struct drm_gem_object **obj);
>>>     int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>                   struct drm_device *dev,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> index 0e907ea..7256f83 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>> amdgpu_fbdev *rfbdev,
>>>                          AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>                          AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>                          AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>> -                       true, &gobj);
>>> +                       true, NULL, &gobj);
>>>       if (ret) {
>>>           pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>>           return -ENOMEM;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d028806..b8e8d67 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object
>>> *gobj)
>>>   }
>>>     int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned
>>> long size,
>>> -                int alignment, u32 initial_domain,
>>> -                u64 flags, bool kernel,
>>> -                struct drm_gem_object **obj)
>>> +                 int alignment, u32 initial_domain,
>>> +                 u64 flags, bool kernel,
>>> +                 struct reservation_object *resv,
>>> +                 struct drm_gem_object **obj)
>>>   {
>>> -    struct amdgpu_bo *robj;
>>> +    struct amdgpu_bo *bo;
>>>       int r;
>>>         *obj = NULL;
>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>> *adev, unsigned long size,
>>>     retry:
>>>       r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>>> -                 flags, NULL, NULL, 0, &robj);
>>> +                 flags, NULL, resv, 0, &bo);
>>>       if (r) {
>>>           if (r != -ERESTARTSYS) {
>>>               if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>> *adev, unsigned long size,
>>>           }
>>>           return r;
>>>       }
>>> -    *obj = &robj->gem_base;
>>> +    *obj = &bo->gem_base;
>>>         return 0;
>>>   }
>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>> *obj,
>>>       struct amdgpu_vm *vm = &fpriv->vm;
>>>         struct amdgpu_bo_list_entry vm_pd;
>>> -    struct list_head list;
>>> +    struct list_head list, duplicates;
>>>       struct ttm_validate_buffer tv;
>>>       struct ww_acquire_ctx ticket;
>>>       struct amdgpu_bo_va *bo_va;
>>>       int r;
>>>         INIT_LIST_HEAD(&list);
>>> +    INIT_LIST_HEAD(&duplicates);
>>>         tv.bo = &bo->tbo;
>>>       tv.shared = true;
>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>> *obj,
>>>         amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>       if (r) {
>>>           dev_err(adev->dev, "leaking bo va because "
>>>               "we fail to reserve bo (%d)\n", r);
>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>                   struct drm_file *filp)
>>>   {
>>>       struct amdgpu_device *adev = dev->dev_private;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>       union drm_amdgpu_gem_create *args = data;
>>>       uint64_t flags = args->in.domain_flags;
>>>       uint64_t size = args->in.bo_size;
>>> +    struct reservation_object *resv = NULL;
>>>       struct drm_gem_object *gobj;
>>>       uint32_t handle;
>>>       int r;
>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>       if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>                 AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>                 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>> +              AMDGPU_GEM_CREATE_LOCAL))
>>>           return -EINVAL;
>>>         /* reject invalid gem domains */
>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>       }
>>>       size = roundup(size, PAGE_SIZE);
>>>   +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>> +        if (r)
>>> +            return r;
>>> +
>>> +        resv = vm->root.base.bo->tbo.resv;
>>> +    }
>>> +
>>>       r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>                        (u32)(0xffffffff & args->in.domains),
>>> -                     flags, false, &gobj);
>>> +                     flags, false, resv, &gobj);
>>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>> +        if (!r) {
>>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>> +
>>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>> +        }
>>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>>> +    }
>>>       if (r)
>>>           return r;
>>>   @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>> *dev, void *data,
>>>       }
>>>         /* create a gem object to contain this object in */
>>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>>> -                     0, &gobj);
>>> +    r = amdgpu_gem_object_create(adev, args->size, 0,
>>> AMDGPU_GEM_DOMAIN_CPU,
>>> +                     0, 0, NULL, &gobj);
>>>       if (r)
>>>           return r;
>>>   @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>       struct amdgpu_bo_list_entry vm_pd;
>>>       struct ttm_validate_buffer tv;
>>>       struct ww_acquire_ctx ticket;
>>> -    struct list_head list;
>>> +    struct list_head list, duplicates;
>>>       uint64_t va_flags;
>>>       int r = 0;
>>>   @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>       }
>>>         INIT_LIST_HEAD(&list);
>>> +    INIT_LIST_HEAD(&duplicates);
>>>       if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>           !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>           gobj = drm_gem_object_lookup(filp, args->handle);
>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
>>> *data,
>>>         amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>       if (r)
>>>           goto error_unref;
>>>   @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>               struct drm_file *filp)
>>>   {
>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>       struct drm_amdgpu_gem_op *args = data;
>>>       struct drm_gem_object *gobj;
>>>       struct amdgpu_bo *robj;
>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void
>>> *data,
>>>           if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>               robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>   +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>>> +
>>>           amdgpu_bo_unreserve(robj);
>>>           break;
>>>       default:
>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>> *file_priv,
>>>       r = amdgpu_gem_object_create(adev, args->size, 0,
>>>                        AMDGPU_GEM_DOMAIN_VRAM,
>>>                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>> -                     ttm_bo_type_device,
>>> -                     &gobj);
>>> +                     false, NULL, &gobj);
>>>       if (r)
>>>           return -ENOMEM;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> index 5b3f928..f407499 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>> drm_device *dev,
>>>   {
>>>       struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>   -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>           return ERR_PTR(-EPERM);
>>>         return drm_gem_prime_export(dev, gobj, flags);
>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>> b/include/uapi/drm/amdgpu_drm.h
>>> index d0ee739..05241a6 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -89,6 +89,8 @@ extern "C" {
>>>   #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>>   /* Flag that allocating the BO should use linear VRAM */
>>>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>>> +/* Flag that BO is local in the VM */
>>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>>     struct drm_amdgpu_gem_create_in  {
>>>       /** the requested memory size */
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2017-08-25 16:22 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-25  9:38 [PATCH 1/9] drm/amdgpu: fix amdgpu_vm_bo_map trace point Christian König
     [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-25  9:38   ` [PATCH 2/9] drm/amdgpu: fix and cleanup VM ready check Christian König
     [not found]     ` <1503653899-1781-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  4:09       ` zhoucm1
2017-08-25  9:38   ` [PATCH 3/9] drm/amdgpu: cleanup GWS, GDS and OA allocation Christian König
     [not found]     ` <1503653899-1781-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  4:10       ` zhoucm1
2017-08-25  9:38   ` [PATCH 4/9] drm/amdgpu: add bo_va cleared flag again Christian König
     [not found]     ` <1503653899-1781-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  4:12       ` zhoucm1
     [not found]         ` <d076aae6-5620-dd91-f504-982abb2292dd-5C7GfCeVMHo@public.gmane.org>
2017-08-28 11:57           ` Christian König
2017-08-25  9:38   ` [PATCH 5/9] drm/amdgpu: rework moved handling in the VM Christian König
     [not found]     ` <1503653899-1781-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  6:51       ` zhoucm1
2017-08-25  9:38   ` [PATCH 6/9] drm/amdgpu: track evicted page tables v2 Christian König
     [not found]     ` <1503653899-1781-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  6:58       ` zhoucm1
2017-08-25  9:38   ` [PATCH 7/9] drm/amdgpu: rework page directory filling v2 Christian König
     [not found]     ` <1503653899-1781-7-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  7:06       ` zhoucm1
2017-08-25  9:38   ` [PATCH 8/9] drm/amdgpu: add support for per VM BOs Christian König
2017-08-25  9:38   ` [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface " Christian König
     [not found]     ` <1503653899-1781-9-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-25 10:32       ` zhoucm1
     [not found]         ` <19c04fac-1fdd-1436-e85c-95dd4ac02b1b-5C7GfCeVMHo@public.gmane.org>
2017-08-25 13:00           ` Christian König
     [not found]             ` <9304342a-def2-187e-4e9c-d872c58cdc17-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-25 16:22               ` Marek Olšák [this message]
     [not found]                 ` <CAAxE2A5dSR-PY+zZ3VeaT7iiCmj5jfty0hv7XZjz4HgOrApQHw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-25 19:19                   ` Christian König
     [not found]                     ` <e7d5f9d9-ed3e-2654-9acd-c7339976006f-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-27 10:03                       ` Christian König
     [not found]                         ` <fba30bfa-aa7c-d342-b4b6-85058f5db5bf-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  4:21                           ` zhoucm1
     [not found]                             ` <a897738d-ab23-8b43-9b9e-c64f7da5e065-5C7GfCeVMHo@public.gmane.org>
2017-08-28 11:55                               ` Christian König
     [not found]                                 ` <0006623b-f042-dda0-b6a2-425dc568ff03-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28 12:59                                   ` Zhou, David(ChunMing)
     [not found]                                     ` <MWHPR1201MB0206D4E64E86D7AADA159A47B49E0-3iK1xFAIwjrUF/YbdlDdgWrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-08-29 13:59                                       ` Christian König
     [not found]                                         ` <8e7b93cf-033b-ac3a-4c81-446db00186f5-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30  0:27                                           ` Marek Olšák
     [not found]                                             ` <CAAxE2A7bS9e34U-t==udoPMn-YYuP3auY4Ca+dpaboL+ob5J+g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-30 14:58                                               ` Christian König
2017-08-25 21:31       ` Felix Kuehling
     [not found]         ` <3039e134-7ee0-792a-b8ad-f01f86bc1164-5C7GfCeVMHo@public.gmane.org>
2017-08-26 13:20           ` Christian König
2017-08-28  4:08   ` [PATCH 1/9] drm/amdgpu: fix amdgpu_vm_bo_map trace point zhoucm1

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAAxE2A5dSR-PY+zZ3VeaT7iiCmj5jfty0hv7XZjz4HgOrApQHw@mail.gmail.com \
    --to=maraeo-re5jqeeqqe8avxtiumwx3w@public.gmane.org \
    --cc=Marek.Olsak-5C7GfCeVMHo@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=david1.zhou-5C7GfCeVMHo@public.gmane.org \
    --cc=deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.