From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 665BDC433EF for ; Mon, 23 May 2022 15:52:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238268AbiEWPw3 (ORCPT ); Mon, 23 May 2022 11:52:29 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:53698 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S238159AbiEWPw1 (ORCPT ); Mon, 23 May 2022 11:52:27 -0400 Received: from bhuna.collabora.co.uk (bhuna.collabora.co.uk [46.235.227.227]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C216222B2C for ; Mon, 23 May 2022 08:52:24 -0700 (PDT) Received: from [127.0.0.1] (localhost [127.0.0.1]) (Authenticated sender: bbeckett) with ESMTPSA id 4B3311F43999 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=collabora.com; s=mail; t=1653321143; bh=DA/mN+1BU4Nd9tCudbkLz/qbQo49NA3cNG9K0ROzBek=; h=Date:Subject:To:Cc:References:From:In-Reply-To:From; b=PmHIqMTFfkEfEjXHQhKBoKfR/Bed1U7zykhrRhdueC2AqwcYG4OASMJbFwiX/06mf qitPcNGvzBgqUC1noiRGz6btaGdEAfVjdUC4ZoOHw4MZrI/r+6NGSz+iJylStaFcuM EsSPt3/FGiAI+drcfTA68KRJ0AjTTlNY1mQbU8i3LCde4mpsB0HwKcZP2olVkmSiEb kL9F8YSDitcajaQpC62d/QfNV8nJ67f4ydkJht9xu2NJPN6EyvHRCu6MSca6+dszTJ CbSnlVRWz72TQbiKJ+1DgOVNzem3UtPdT4BOGf4VFtLi/1+uSyE7gx1gAA5E276vdZ p+s5msBEfzXqg== Message-ID: <74e101c2-39c7-a169-f7c4-8947517dde8b@collabora.com> Date: Mon, 23 May 2022 16:52:21 +0100 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.8.1 Subject: Re: [PATCH 4/4] drm/i915: internal buffers use ttm backend Content-Language: en-US To: =?UTF-8?Q?Thomas_Hellstr=c3=b6m?= , dri-devel@lists.freedesktop.org, intel-gfx@lists.freedesktop.org, Jani Nikula , Joonas Lahtinen , Rodrigo Vivi , Tvrtko Ursulin , David Airlie , Daniel Vetter Cc: Matthew Auld , linux-kernel@vger.kernel.org References: <20220503191316.1145124-1-bob.beckett@collabora.com> <20220503191316.1145124-5-bob.beckett@collabora.com> <4b4e59cf422819cd9dd18c7c73b7869b99ea4c65.camel@linux.intel.com> From: Robert Beckett In-Reply-To: <4b4e59cf422819cd9dd18c7c73b7869b99ea4c65.camel@linux.intel.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 11/05/2022 15:14, Thomas Hellström wrote: > On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote: >> refactor internal buffer backend to allocate volatile pages via >> ttm pool allocator >> >> Signed-off-by: Robert Beckett >> --- >>  drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++--------- >> -- >>  drivers/gpu/drm/i915/gem/i915_gem_internal.h |   5 - >>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c      |  12 +- >>  drivers/gpu/drm/i915/gem/i915_gem_ttm.h      |  12 +- >>  4 files changed, 125 insertions(+), 168 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c >> b/drivers/gpu/drm/i915/gem/i915_gem_internal.c >> index c698f95af15f..815ec9466cc0 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c >> @@ -4,156 +4,119 @@ >>   * Copyright © 2014-2016 Intel Corporation >>   */ >> >> -#include >> -#include >> -#include >> - >> +#include >> +#include >> +#include "drm/ttm/ttm_bo_api.h" >> +#include "gem/i915_gem_internal.h" >> +#include "gem/i915_gem_region.h" >> +#include "gem/i915_gem_ttm.h" >>  #include "i915_drv.h" >> -#include "i915_gem.h" >> -#include "i915_gem_internal.h" >> -#include "i915_gem_object.h" >> -#include "i915_scatterlist.h" >> -#include "i915_utils.h" >> - >> -#define QUIET (__GFP_NORETRY | __GFP_NOWARN) >> -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) >> - >> -static void internal_free_pages(struct sg_table *st) >> -{ >> -       struct scatterlist *sg; >> - >> -       for (sg = st->sgl; sg; sg = __sg_next(sg)) { >> -               if (sg_page(sg)) >> -                       __free_pages(sg_page(sg), get_order(sg- >>> length)); >> -       } >> - >> -       sg_free_table(st); >> -       kfree(st); >> -} >> >> -static int i915_gem_object_get_pages_internal(struct >> drm_i915_gem_object *obj) >> +static int i915_internal_get_pages(struct drm_i915_gem_object *obj) >>  { >> -       struct drm_i915_private *i915 = to_i915(obj->base.dev); >> -       struct sg_table *st; >> -       struct scatterlist *sg; >> -       unsigned int sg_page_sizes; >> -       unsigned int npages; >> -       int max_order; >> -       gfp_t gfp; >> - >> -       max_order = MAX_ORDER; >> -#ifdef CONFIG_SWIOTLB >> -       if (is_swiotlb_active(obj->base.dev->dev)) { >> -               unsigned int max_segment; >> - >> -               max_segment = swiotlb_max_segment(); >> -               if (max_segment) { >> -                       max_segment = max_t(unsigned int, >> max_segment, >> -                                           PAGE_SIZE) >> PAGE_SHIFT; >> -                       max_order = min(max_order, >> ilog2(max_segment)); >> -               } >> +       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); >> +       struct ttm_operation_ctx ctx = { >> +               .interruptible = true, >> +               .no_wait_gpu = false, >> +       }; >> +       struct ttm_place place = { >> +               .fpfn = 0, >> +               .lpfn = 0, >> +               .mem_type = I915_PL_SYSTEM, >> +               .flags = 0, >> +       }; >> +       struct ttm_placement placement = { >> +               .num_placement = 1, >> +               .placement = &place, >> +               .num_busy_placement = 0, >> +               .busy_placement = NULL, >> +       }; >> +       int ret; >> + >> +       ret = ttm_bo_validate(bo, &placement, &ctx); >> +       if (ret) { >> +               ret = i915_ttm_err_to_gem(ret); >> +               return ret; >>         } >> -#endif >> >> -       gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; >> -       if (IS_I965GM(i915) || IS_I965G(i915)) { >> -               /* 965gm cannot relocate objects above 4GiB. */ >> -               gfp &= ~__GFP_HIGHMEM; >> -               gfp |= __GFP_DMA32; > > > It looks like we're losing this restriction? > > There is a flag to ttm_device_init() to make TTM only do __GFP_DMA32 > allocations. agreed. will fix for v2 > >> +       if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { >> +               ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); >> +               if (ret) >> +                       return ret; >>         } >> >> -create_st: >> -       st = kmalloc(sizeof(*st), GFP_KERNEL); >> -       if (!st) >> -               return -ENOMEM; >> +       if (!i915_gem_object_has_pages(obj)) { >> +               struct i915_refct_sgt *rsgt = >> +                       i915_ttm_resource_get_st(obj, bo->resource); >> >> -       npages = obj->base.size / PAGE_SIZE; >> -       if (sg_alloc_table(st, npages, GFP_KERNEL)) { >> -               kfree(st); >> -               return -ENOMEM; >> -       } >> +               if (IS_ERR(rsgt)) >> +                       return PTR_ERR(rsgt); >> >> -       sg = st->sgl; >> -       st->nents = 0; >> -       sg_page_sizes = 0; >> - >> -       do { >> -               int order = min(fls(npages) - 1, max_order); >> -               struct page *page; >> - >> -               do { >> -                       page = alloc_pages(gfp | (order ? QUIET : >> MAYFAIL), >> -                                          order); >> -                       if (page) >> -                               break; >> -                       if (!order--) >> -                               goto err; >> - >> -                       /* Limit subsequent allocations as well */ >> -                       max_order = order; >> -               } while (1); >> - >> -               sg_set_page(sg, page, PAGE_SIZE << order, 0); >> -               sg_page_sizes |= PAGE_SIZE << order; >> -               st->nents++; >> - >> -               npages -= 1 << order; >> -               if (!npages) { >> -                       sg_mark_end(sg); >> -                       break; >> -               } >> - >> -               sg = __sg_next(sg); >> -       } while (1); >> - >> -       if (i915_gem_gtt_prepare_pages(obj, st)) { >> -               /* Failed to dma-map try again with single page sg >> segments */ >> -               if (get_order(st->sgl->length)) { >> -                       internal_free_pages(st); >> -                       max_order = 0; >> -                       goto create_st; >> -               } >> -               goto err; >> +               GEM_BUG_ON(obj->mm.rsgt); >> +               obj->mm.rsgt = rsgt; >> +               __i915_gem_object_set_pages(obj, &rsgt->table, >> +                                           i915_sg_dma_sizes(rsgt- >>> table.sgl)); >>         } >> >> -       __i915_gem_object_set_pages(obj, st, sg_page_sizes); >> +       GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo- >>> ttm->num_pages)); >> +       i915_ttm_adjust_lru(obj); >> >>         return 0; >> +} >> >> -err: >> -       sg_set_page(sg, NULL, 0, 0); >> -       sg_mark_end(sg); >> -       internal_free_pages(st); >> +static const struct drm_i915_gem_object_ops >> i915_gem_object_internal_ops = { >> +       .name = "i915_gem_object_ttm", >> +       .flags = I915_GEM_OBJECT_IS_SHRINKABLE, >> >> -       return -ENOMEM; >> -} >> +       .get_pages = i915_internal_get_pages, >> +       .put_pages = i915_ttm_put_pages, >> +       .adjust_lru = i915_ttm_adjust_lru, >> +       .delayed_free = i915_ttm_delayed_free, >> +}; >> >> -static void i915_gem_object_put_pages_internal(struct >> drm_i915_gem_object *obj, >> -                                              struct sg_table >> *pages) >> +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo) >>  { >> -       i915_gem_gtt_finish_pages(obj, pages); >> -       internal_free_pages(pages); >> +       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); >> >> -       obj->mm.dirty = false; >> +       mutex_destroy(&obj->ttm.get_io_page.lock); >> >> -       __start_cpu_write(obj); >> -} >> +       if (obj->ttm.created) { >> +               /* This releases all gem object bindings to the >> backend. */ >> +               __i915_gem_free_object(obj); >> >> -static const struct drm_i915_gem_object_ops >> i915_gem_object_internal_ops = { >> -       .name = "i915_gem_object_internal", >> -       .flags = I915_GEM_OBJECT_IS_SHRINKABLE, >> -       .get_pages = i915_gem_object_get_pages_internal, >> -       .put_pages = i915_gem_object_put_pages_internal, >> -}; >> +               call_rcu(&obj->rcu, __i915_gem_free_object_rcu); >> +       } else { >> +               __i915_gem_object_fini(obj); >> +       } >> +} >> >> +/** >> + * i915_gem_object_create_internal: create an object with volatile >> pages >> + * @i915: the i915 device >> + * @size: the size in bytes of backing storage to allocate for the >> object >> + * >> + * Creates a new object that wraps some internal memory for private >> use. >> + * This object is not backed by swappable storage, and as such its >> contents >> + * are volatile and only valid whilst pinned. If the object is >> reaped by the >> + * shrinker, its pages and data will be discarded. Equally, it is >> not a full >> + * GEM object and so not valid for access from userspace. This makes >> it useful >> + * for hardware interfaces like ringbuffers (which are pinned from >> the time >> + * the request is written to the time the hardware stops accessing >> it), but >> + * not for contexts (which need to be preserved when not active for >> later >> + * reuse). Note that it is not cleared upon allocation. >> + */ >>  struct drm_i915_gem_object * >> -__i915_gem_object_create_internal(struct drm_i915_private *i915, >> -                                 const struct >> drm_i915_gem_object_ops *ops, >> -                                 phys_addr_t size) >> +i915_gem_object_create_internal(struct drm_i915_private *i915, >> +                               phys_addr_t size) >>  { >>         static struct lock_class_key lock_class; >>         struct drm_i915_gem_object *obj; >>         unsigned int cache_level; >> +       struct ttm_operation_ctx ctx = { >> +               .interruptible = true, >> +               .no_wait_gpu = false, >> +       }; >> +       int ret; >> >>         GEM_BUG_ON(!size); >>         GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); >> @@ -166,45 +129,34 @@ __i915_gem_object_create_internal(struct >> drm_i915_private *i915, >>                 return ERR_PTR(-ENOMEM); >> >>         drm_gem_private_object_init(&i915->drm, &obj->base, size); >> -       i915_gem_object_init(obj, ops, &lock_class, 0); >> -       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; >> +       i915_gem_object_init(obj, &i915_gem_object_internal_ops, >> &lock_class, >> +                            I915_BO_ALLOC_VOLATILE); >> + >> +       INIT_LIST_HEAD(&obj->mm.region_link); >> + >> +       INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | >> __GFP_NOWARN); >> +       mutex_init(&obj->ttm.get_io_page.lock); >> >> -       /* >> -        * Mark the object as volatile, such that the pages are >> marked as >> -        * dontneed whilst they are still pinned. As soon as they are >> unpinned >> -        * they are allowed to be reaped by the shrinker, and the >> caller is >> -        * expected to repopulate - the contents of this object are >> only valid >> -        * whilst active and pinned. >> -        */ >> -       i915_gem_object_set_volatile(obj); >> +       obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); >> >> +       ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), >> size, >> +                                  ttm_bo_type_kernel, >> i915_ttm_sys_placement(), >> +                                  0, &ctx, NULL, NULL, >> i915_ttm_internal_bo_destroy); >> +       if (ret) { >> +               ret = i915_ttm_err_to_gem(ret); >> +               i915_gem_object_free(obj); >> +               return ERR_PTR(ret); >> +       } >> + >> +       obj->ttm.created = true; >>         obj->read_domains = I915_GEM_DOMAIN_CPU; >>         obj->write_domain = I915_GEM_DOMAIN_CPU; >> - >> +       obj->mem_flags &= ~I915_BO_FLAG_IOMEM; >> +       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; >>         cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : >> I915_CACHE_NONE; >>         i915_gem_object_set_cache_coherency(obj, cache_level); >> +       i915_gem_object_unlock(obj); >> >>         return obj; >>  } >> >> -/** >> - * i915_gem_object_create_internal: create an object with volatile >> pages >> - * @i915: the i915 device >> - * @size: the size in bytes of backing storage to allocate for the >> object >> - * >> - * Creates a new object that wraps some internal memory for private >> use. >> - * This object is not backed by swappable storage, and as such its >> contents >> - * are volatile and only valid whilst pinned. If the object is >> reaped by the >> - * shrinker, its pages and data will be discarded. Equally, it is >> not a full >> - * GEM object and so not valid for access from userspace. This makes >> it useful >> - * for hardware interfaces like ringbuffers (which are pinned from >> the time >> - * the request is written to the time the hardware stops accessing >> it), but >> - * not for contexts (which need to be preserved when not active for >> later >> - * reuse). Note that it is not cleared upon allocation. >> - */ >> -struct drm_i915_gem_object * >> -i915_gem_object_create_internal(struct drm_i915_private *i915, >> -                               phys_addr_t size) >> -{ >> -       return __i915_gem_object_create_internal(i915, >> &i915_gem_object_internal_ops, size); > > While we don't have a TTM shmem backend ready yet for internal, > > Did you consider setting up just yet another region, > INTEL_REGION_INTERNAL, > .class = INTEL_MEMORY_SYSTEM and > .instance = 1, > > And make it create a TTM system region on integrated, and use > same region as INTEL_REGION_SMEM on dgfx. > > I think ttm should automatically map that to I915_PL_SYSTEM and the > backwards mapping in i915_ttm_region() should never get called since > the object is never moved. > > Then I figure it should suffice to just call > __i915_gem_ttm_object_init() and we could drop a lot of code. > i briefly considered using a new fake region, but with current precedent mapping memory regions to real segemented memory areas I considered it an abuse of the semantics of memory regions. If we are happy to have fake regions, I can revert it back to a previous design of using system region for discreet and add a fake region setup for integrated. Would this be preferred over the current design? > /Thomas > > > > >> -} >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h >> b/drivers/gpu/drm/i915/gem/i915_gem_internal.h >> index 6664e06112fc..524e1042b20f 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.h >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h >> @@ -15,9 +15,4 @@ struct drm_i915_private; >>  struct drm_i915_gem_object * >>  i915_gem_object_create_internal(struct drm_i915_private *i915, >>                                 phys_addr_t size); >> -struct drm_i915_gem_object * >> -__i915_gem_object_create_internal(struct drm_i915_private *i915, >> -                                 const struct >> drm_i915_gem_object_ops *ops, >> -                                 phys_addr_t size); >> - >>  #endif /* __I915_GEM_INTERNAL_H__ */ >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c >> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c >> index fdb3a1c18cb6..92195ead8c11 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c >> @@ -83,7 +83,7 @@ struct ttm_placement *i915_ttm_sys_placement(void) >>         return &i915_sys_placement; >>  } >> >> -static int i915_ttm_err_to_gem(int err) >> +int i915_ttm_err_to_gem(int err) >>  { >>         /* Fastpath */ >>         if (likely(!err)) >> @@ -745,8 +745,8 @@ struct ttm_device_funcs *i915_ttm_driver(void) >>         return &i915_ttm_bo_driver; >>  } >> >> -static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, >> -                               struct ttm_placement *placement) >> +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, >> +                        struct ttm_placement *placement) >>  { >>         struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); >>         struct ttm_operation_ctx ctx = { >> @@ -871,8 +871,8 @@ static int i915_ttm_migrate(struct >> drm_i915_gem_object *obj, >>         return __i915_ttm_migrate(obj, mr, obj->flags); >>  } >> >> -static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, >> -                              struct sg_table *st) >> +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, >> +                       struct sg_table *st) >>  { >>         /* >>          * We're currently not called from a shrinker, so put_pages() >> @@ -995,7 +995,7 @@ void i915_ttm_adjust_lru(struct >> drm_i915_gem_object *obj) >>   * it's not idle, and using the TTM destroyed list handling could >> help us >>   * benefit from that. >>   */ >> -static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) >> +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) >>  { >>         GEM_BUG_ON(!obj->ttm.created); >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h >> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h >> index 73e371aa3850..06701c46d8e2 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h >> @@ -26,6 +26,7 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) >>   * i915 ttm gem object destructor. Internal use only. >>   */ >>  void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); >> +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo); >> >>  /** >>   * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an >> embedding >> @@ -37,8 +38,10 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object >> *bo); >>  static inline struct drm_i915_gem_object * >>  i915_ttm_to_gem(struct ttm_buffer_object *bo) >>  { >> -       if (bo->destroy != i915_ttm_bo_destroy) >> +       if (bo->destroy != i915_ttm_bo_destroy && >> +           bo->destroy != i915_ttm_internal_bo_destroy) { >>                 return NULL; >> +       } >> >>         return container_of(bo, struct drm_i915_gem_object, >> __do_not_access); >>  } >> @@ -66,6 +69,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object >> *obj, >>                          struct ttm_resource *res); >> >>  void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); >> +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj); >> >>  int i915_ttm_purge(struct drm_i915_gem_object *obj); >> >> @@ -92,4 +96,10 @@ static inline bool i915_ttm_cpu_maps_iomem(struct >> ttm_resource *mem) >>         /* Once / if we support GGTT, this is also false for cached >> ttm_tts */ >>         return mem->mem_type != I915_PL_SYSTEM; >>  } >> + >> +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, >> +                        struct ttm_placement *placement); >> +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct >> sg_table *st); >> +int i915_ttm_err_to_gem(int err); >> + >>  #endif > > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 9CA03C433F5 for ; Mon, 23 May 2022 15:52:38 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0ED3C10FFA2; Mon, 23 May 2022 15:52:33 +0000 (UTC) Received: from bhuna.collabora.co.uk (bhuna.collabora.co.uk [46.235.227.227]) by gabe.freedesktop.org (Postfix) with ESMTPS id 21FA910FD99; Mon, 23 May 2022 15:52:31 +0000 (UTC) Received: from [127.0.0.1] (localhost [127.0.0.1]) (Authenticated sender: bbeckett) with ESMTPSA id 4B3311F43999 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=collabora.com; s=mail; t=1653321143; bh=DA/mN+1BU4Nd9tCudbkLz/qbQo49NA3cNG9K0ROzBek=; h=Date:Subject:To:Cc:References:From:In-Reply-To:From; b=PmHIqMTFfkEfEjXHQhKBoKfR/Bed1U7zykhrRhdueC2AqwcYG4OASMJbFwiX/06mf qitPcNGvzBgqUC1noiRGz6btaGdEAfVjdUC4ZoOHw4MZrI/r+6NGSz+iJylStaFcuM EsSPt3/FGiAI+drcfTA68KRJ0AjTTlNY1mQbU8i3LCde4mpsB0HwKcZP2olVkmSiEb kL9F8YSDitcajaQpC62d/QfNV8nJ67f4ydkJht9xu2NJPN6EyvHRCu6MSca6+dszTJ CbSnlVRWz72TQbiKJ+1DgOVNzem3UtPdT4BOGf4VFtLi/1+uSyE7gx1gAA5E276vdZ p+s5msBEfzXqg== Message-ID: <74e101c2-39c7-a169-f7c4-8947517dde8b@collabora.com> Date: Mon, 23 May 2022 16:52:21 +0100 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.8.1 Content-Language: en-US To: =?UTF-8?Q?Thomas_Hellstr=c3=b6m?= , dri-devel@lists.freedesktop.org, intel-gfx@lists.freedesktop.org, Jani Nikula , Joonas Lahtinen , Rodrigo Vivi , Tvrtko Ursulin , David Airlie , Daniel Vetter References: <20220503191316.1145124-1-bob.beckett@collabora.com> <20220503191316.1145124-5-bob.beckett@collabora.com> <4b4e59cf422819cd9dd18c7c73b7869b99ea4c65.camel@linux.intel.com> From: Robert Beckett In-Reply-To: <4b4e59cf422819cd9dd18c7c73b7869b99ea4c65.camel@linux.intel.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit Subject: Re: [Intel-gfx] [PATCH 4/4] drm/i915: internal buffers use ttm backend X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Matthew Auld , linux-kernel@vger.kernel.org Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" On 11/05/2022 15:14, Thomas Hellström wrote: > On Tue, 2022-05-03 at 19:13 +0000, Robert Beckett wrote: >> refactor internal buffer backend to allocate volatile pages via >> ttm pool allocator >> >> Signed-off-by: Robert Beckett >> --- >>  drivers/gpu/drm/i915/gem/i915_gem_internal.c | 264 ++++++++--------- >> -- >>  drivers/gpu/drm/i915/gem/i915_gem_internal.h |   5 - >>  drivers/gpu/drm/i915/gem/i915_gem_ttm.c      |  12 +- >>  drivers/gpu/drm/i915/gem/i915_gem_ttm.h      |  12 +- >>  4 files changed, 125 insertions(+), 168 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c >> b/drivers/gpu/drm/i915/gem/i915_gem_internal.c >> index c698f95af15f..815ec9466cc0 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c >> @@ -4,156 +4,119 @@ >>   * Copyright © 2014-2016 Intel Corporation >>   */ >> >> -#include >> -#include >> -#include >> - >> +#include >> +#include >> +#include "drm/ttm/ttm_bo_api.h" >> +#include "gem/i915_gem_internal.h" >> +#include "gem/i915_gem_region.h" >> +#include "gem/i915_gem_ttm.h" >>  #include "i915_drv.h" >> -#include "i915_gem.h" >> -#include "i915_gem_internal.h" >> -#include "i915_gem_object.h" >> -#include "i915_scatterlist.h" >> -#include "i915_utils.h" >> - >> -#define QUIET (__GFP_NORETRY | __GFP_NOWARN) >> -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) >> - >> -static void internal_free_pages(struct sg_table *st) >> -{ >> -       struct scatterlist *sg; >> - >> -       for (sg = st->sgl; sg; sg = __sg_next(sg)) { >> -               if (sg_page(sg)) >> -                       __free_pages(sg_page(sg), get_order(sg- >>> length)); >> -       } >> - >> -       sg_free_table(st); >> -       kfree(st); >> -} >> >> -static int i915_gem_object_get_pages_internal(struct >> drm_i915_gem_object *obj) >> +static int i915_internal_get_pages(struct drm_i915_gem_object *obj) >>  { >> -       struct drm_i915_private *i915 = to_i915(obj->base.dev); >> -       struct sg_table *st; >> -       struct scatterlist *sg; >> -       unsigned int sg_page_sizes; >> -       unsigned int npages; >> -       int max_order; >> -       gfp_t gfp; >> - >> -       max_order = MAX_ORDER; >> -#ifdef CONFIG_SWIOTLB >> -       if (is_swiotlb_active(obj->base.dev->dev)) { >> -               unsigned int max_segment; >> - >> -               max_segment = swiotlb_max_segment(); >> -               if (max_segment) { >> -                       max_segment = max_t(unsigned int, >> max_segment, >> -                                           PAGE_SIZE) >> PAGE_SHIFT; >> -                       max_order = min(max_order, >> ilog2(max_segment)); >> -               } >> +       struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); >> +       struct ttm_operation_ctx ctx = { >> +               .interruptible = true, >> +               .no_wait_gpu = false, >> +       }; >> +       struct ttm_place place = { >> +               .fpfn = 0, >> +               .lpfn = 0, >> +               .mem_type = I915_PL_SYSTEM, >> +               .flags = 0, >> +       }; >> +       struct ttm_placement placement = { >> +               .num_placement = 1, >> +               .placement = &place, >> +               .num_busy_placement = 0, >> +               .busy_placement = NULL, >> +       }; >> +       int ret; >> + >> +       ret = ttm_bo_validate(bo, &placement, &ctx); >> +       if (ret) { >> +               ret = i915_ttm_err_to_gem(ret); >> +               return ret; >>         } >> -#endif >> >> -       gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; >> -       if (IS_I965GM(i915) || IS_I965G(i915)) { >> -               /* 965gm cannot relocate objects above 4GiB. */ >> -               gfp &= ~__GFP_HIGHMEM; >> -               gfp |= __GFP_DMA32; > > > It looks like we're losing this restriction? > > There is a flag to ttm_device_init() to make TTM only do __GFP_DMA32 > allocations. agreed. will fix for v2 > >> +       if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { >> +               ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); >> +               if (ret) >> +                       return ret; >>         } >> >> -create_st: >> -       st = kmalloc(sizeof(*st), GFP_KERNEL); >> -       if (!st) >> -               return -ENOMEM; >> +       if (!i915_gem_object_has_pages(obj)) { >> +               struct i915_refct_sgt *rsgt = >> +                       i915_ttm_resource_get_st(obj, bo->resource); >> >> -       npages = obj->base.size / PAGE_SIZE; >> -       if (sg_alloc_table(st, npages, GFP_KERNEL)) { >> -               kfree(st); >> -               return -ENOMEM; >> -       } >> +               if (IS_ERR(rsgt)) >> +                       return PTR_ERR(rsgt); >> >> -       sg = st->sgl; >> -       st->nents = 0; >> -       sg_page_sizes = 0; >> - >> -       do { >> -               int order = min(fls(npages) - 1, max_order); >> -               struct page *page; >> - >> -               do { >> -                       page = alloc_pages(gfp | (order ? QUIET : >> MAYFAIL), >> -                                          order); >> -                       if (page) >> -                               break; >> -                       if (!order--) >> -                               goto err; >> - >> -                       /* Limit subsequent allocations as well */ >> -                       max_order = order; >> -               } while (1); >> - >> -               sg_set_page(sg, page, PAGE_SIZE << order, 0); >> -               sg_page_sizes |= PAGE_SIZE << order; >> -               st->nents++; >> - >> -               npages -= 1 << order; >> -               if (!npages) { >> -                       sg_mark_end(sg); >> -                       break; >> -               } >> - >> -               sg = __sg_next(sg); >> -       } while (1); >> - >> -       if (i915_gem_gtt_prepare_pages(obj, st)) { >> -               /* Failed to dma-map try again with single page sg >> segments */ >> -               if (get_order(st->sgl->length)) { >> -                       internal_free_pages(st); >> -                       max_order = 0; >> -                       goto create_st; >> -               } >> -               goto err; >> +               GEM_BUG_ON(obj->mm.rsgt); >> +               obj->mm.rsgt = rsgt; >> +               __i915_gem_object_set_pages(obj, &rsgt->table, >> +                                           i915_sg_dma_sizes(rsgt- >>> table.sgl)); >>         } >> >> -       __i915_gem_object_set_pages(obj, st, sg_page_sizes); >> +       GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo- >>> ttm->num_pages)); >> +       i915_ttm_adjust_lru(obj); >> >>         return 0; >> +} >> >> -err: >> -       sg_set_page(sg, NULL, 0, 0); >> -       sg_mark_end(sg); >> -       internal_free_pages(st); >> +static const struct drm_i915_gem_object_ops >> i915_gem_object_internal_ops = { >> +       .name = "i915_gem_object_ttm", >> +       .flags = I915_GEM_OBJECT_IS_SHRINKABLE, >> >> -       return -ENOMEM; >> -} >> +       .get_pages = i915_internal_get_pages, >> +       .put_pages = i915_ttm_put_pages, >> +       .adjust_lru = i915_ttm_adjust_lru, >> +       .delayed_free = i915_ttm_delayed_free, >> +}; >> >> -static void i915_gem_object_put_pages_internal(struct >> drm_i915_gem_object *obj, >> -                                              struct sg_table >> *pages) >> +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo) >>  { >> -       i915_gem_gtt_finish_pages(obj, pages); >> -       internal_free_pages(pages); >> +       struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); >> >> -       obj->mm.dirty = false; >> +       mutex_destroy(&obj->ttm.get_io_page.lock); >> >> -       __start_cpu_write(obj); >> -} >> +       if (obj->ttm.created) { >> +               /* This releases all gem object bindings to the >> backend. */ >> +               __i915_gem_free_object(obj); >> >> -static const struct drm_i915_gem_object_ops >> i915_gem_object_internal_ops = { >> -       .name = "i915_gem_object_internal", >> -       .flags = I915_GEM_OBJECT_IS_SHRINKABLE, >> -       .get_pages = i915_gem_object_get_pages_internal, >> -       .put_pages = i915_gem_object_put_pages_internal, >> -}; >> +               call_rcu(&obj->rcu, __i915_gem_free_object_rcu); >> +       } else { >> +               __i915_gem_object_fini(obj); >> +       } >> +} >> >> +/** >> + * i915_gem_object_create_internal: create an object with volatile >> pages >> + * @i915: the i915 device >> + * @size: the size in bytes of backing storage to allocate for the >> object >> + * >> + * Creates a new object that wraps some internal memory for private >> use. >> + * This object is not backed by swappable storage, and as such its >> contents >> + * are volatile and only valid whilst pinned. If the object is >> reaped by the >> + * shrinker, its pages and data will be discarded. Equally, it is >> not a full >> + * GEM object and so not valid for access from userspace. This makes >> it useful >> + * for hardware interfaces like ringbuffers (which are pinned from >> the time >> + * the request is written to the time the hardware stops accessing >> it), but >> + * not for contexts (which need to be preserved when not active for >> later >> + * reuse). Note that it is not cleared upon allocation. >> + */ >>  struct drm_i915_gem_object * >> -__i915_gem_object_create_internal(struct drm_i915_private *i915, >> -                                 const struct >> drm_i915_gem_object_ops *ops, >> -                                 phys_addr_t size) >> +i915_gem_object_create_internal(struct drm_i915_private *i915, >> +                               phys_addr_t size) >>  { >>         static struct lock_class_key lock_class; >>         struct drm_i915_gem_object *obj; >>         unsigned int cache_level; >> +       struct ttm_operation_ctx ctx = { >> +               .interruptible = true, >> +               .no_wait_gpu = false, >> +       }; >> +       int ret; >> >>         GEM_BUG_ON(!size); >>         GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); >> @@ -166,45 +129,34 @@ __i915_gem_object_create_internal(struct >> drm_i915_private *i915, >>                 return ERR_PTR(-ENOMEM); >> >>         drm_gem_private_object_init(&i915->drm, &obj->base, size); >> -       i915_gem_object_init(obj, ops, &lock_class, 0); >> -       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; >> +       i915_gem_object_init(obj, &i915_gem_object_internal_ops, >> &lock_class, >> +                            I915_BO_ALLOC_VOLATILE); >> + >> +       INIT_LIST_HEAD(&obj->mm.region_link); >> + >> +       INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | >> __GFP_NOWARN); >> +       mutex_init(&obj->ttm.get_io_page.lock); >> >> -       /* >> -        * Mark the object as volatile, such that the pages are >> marked as >> -        * dontneed whilst they are still pinned. As soon as they are >> unpinned >> -        * they are allowed to be reaped by the shrinker, and the >> caller is >> -        * expected to repopulate - the contents of this object are >> only valid >> -        * whilst active and pinned. >> -        */ >> -       i915_gem_object_set_volatile(obj); >> +       obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); >> >> +       ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), >> size, >> +                                  ttm_bo_type_kernel, >> i915_ttm_sys_placement(), >> +                                  0, &ctx, NULL, NULL, >> i915_ttm_internal_bo_destroy); >> +       if (ret) { >> +               ret = i915_ttm_err_to_gem(ret); >> +               i915_gem_object_free(obj); >> +               return ERR_PTR(ret); >> +       } >> + >> +       obj->ttm.created = true; >>         obj->read_domains = I915_GEM_DOMAIN_CPU; >>         obj->write_domain = I915_GEM_DOMAIN_CPU; >> - >> +       obj->mem_flags &= ~I915_BO_FLAG_IOMEM; >> +       obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; >>         cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : >> I915_CACHE_NONE; >>         i915_gem_object_set_cache_coherency(obj, cache_level); >> +       i915_gem_object_unlock(obj); >> >>         return obj; >>  } >> >> -/** >> - * i915_gem_object_create_internal: create an object with volatile >> pages >> - * @i915: the i915 device >> - * @size: the size in bytes of backing storage to allocate for the >> object >> - * >> - * Creates a new object that wraps some internal memory for private >> use. >> - * This object is not backed by swappable storage, and as such its >> contents >> - * are volatile and only valid whilst pinned. If the object is >> reaped by the >> - * shrinker, its pages and data will be discarded. Equally, it is >> not a full >> - * GEM object and so not valid for access from userspace. This makes >> it useful >> - * for hardware interfaces like ringbuffers (which are pinned from >> the time >> - * the request is written to the time the hardware stops accessing >> it), but >> - * not for contexts (which need to be preserved when not active for >> later >> - * reuse). Note that it is not cleared upon allocation. >> - */ >> -struct drm_i915_gem_object * >> -i915_gem_object_create_internal(struct drm_i915_private *i915, >> -                               phys_addr_t size) >> -{ >> -       return __i915_gem_object_create_internal(i915, >> &i915_gem_object_internal_ops, size); > > While we don't have a TTM shmem backend ready yet for internal, > > Did you consider setting up just yet another region, > INTEL_REGION_INTERNAL, > .class = INTEL_MEMORY_SYSTEM and > .instance = 1, > > And make it create a TTM system region on integrated, and use > same region as INTEL_REGION_SMEM on dgfx. > > I think ttm should automatically map that to I915_PL_SYSTEM and the > backwards mapping in i915_ttm_region() should never get called since > the object is never moved. > > Then I figure it should suffice to just call > __i915_gem_ttm_object_init() and we could drop a lot of code. > i briefly considered using a new fake region, but with current precedent mapping memory regions to real segemented memory areas I considered it an abuse of the semantics of memory regions. If we are happy to have fake regions, I can revert it back to a previous design of using system region for discreet and add a fake region setup for integrated. Would this be preferred over the current design? > /Thomas > > > > >> -} >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h >> b/drivers/gpu/drm/i915/gem/i915_gem_internal.h >> index 6664e06112fc..524e1042b20f 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.h >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h >> @@ -15,9 +15,4 @@ struct drm_i915_private; >>  struct drm_i915_gem_object * >>  i915_gem_object_create_internal(struct drm_i915_private *i915, >>                                 phys_addr_t size); >> -struct drm_i915_gem_object * >> -__i915_gem_object_create_internal(struct drm_i915_private *i915, >> -                                 const struct >> drm_i915_gem_object_ops *ops, >> -                                 phys_addr_t size); >> - >>  #endif /* __I915_GEM_INTERNAL_H__ */ >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c >> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c >> index fdb3a1c18cb6..92195ead8c11 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c >> @@ -83,7 +83,7 @@ struct ttm_placement *i915_ttm_sys_placement(void) >>         return &i915_sys_placement; >>  } >> >> -static int i915_ttm_err_to_gem(int err) >> +int i915_ttm_err_to_gem(int err) >>  { >>         /* Fastpath */ >>         if (likely(!err)) >> @@ -745,8 +745,8 @@ struct ttm_device_funcs *i915_ttm_driver(void) >>         return &i915_ttm_bo_driver; >>  } >> >> -static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, >> -                               struct ttm_placement *placement) >> +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, >> +                        struct ttm_placement *placement) >>  { >>         struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); >>         struct ttm_operation_ctx ctx = { >> @@ -871,8 +871,8 @@ static int i915_ttm_migrate(struct >> drm_i915_gem_object *obj, >>         return __i915_ttm_migrate(obj, mr, obj->flags); >>  } >> >> -static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, >> -                              struct sg_table *st) >> +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, >> +                       struct sg_table *st) >>  { >>         /* >>          * We're currently not called from a shrinker, so put_pages() >> @@ -995,7 +995,7 @@ void i915_ttm_adjust_lru(struct >> drm_i915_gem_object *obj) >>   * it's not idle, and using the TTM destroyed list handling could >> help us >>   * benefit from that. >>   */ >> -static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) >> +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) >>  { >>         GEM_BUG_ON(!obj->ttm.created); >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h >> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h >> index 73e371aa3850..06701c46d8e2 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h >> @@ -26,6 +26,7 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) >>   * i915 ttm gem object destructor. Internal use only. >>   */ >>  void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); >> +void i915_ttm_internal_bo_destroy(struct ttm_buffer_object *bo); >> >>  /** >>   * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an >> embedding >> @@ -37,8 +38,10 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object >> *bo); >>  static inline struct drm_i915_gem_object * >>  i915_ttm_to_gem(struct ttm_buffer_object *bo) >>  { >> -       if (bo->destroy != i915_ttm_bo_destroy) >> +       if (bo->destroy != i915_ttm_bo_destroy && >> +           bo->destroy != i915_ttm_internal_bo_destroy) { >>                 return NULL; >> +       } >> >>         return container_of(bo, struct drm_i915_gem_object, >> __do_not_access); >>  } >> @@ -66,6 +69,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object >> *obj, >>                          struct ttm_resource *res); >> >>  void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); >> +void i915_ttm_delayed_free(struct drm_i915_gem_object *obj); >> >>  int i915_ttm_purge(struct drm_i915_gem_object *obj); >> >> @@ -92,4 +96,10 @@ static inline bool i915_ttm_cpu_maps_iomem(struct >> ttm_resource *mem) >>         /* Once / if we support GGTT, this is also false for cached >> ttm_tts */ >>         return mem->mem_type != I915_PL_SYSTEM; >>  } >> + >> +int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, >> +                        struct ttm_placement *placement); >> +void i915_ttm_put_pages(struct drm_i915_gem_object *obj, struct >> sg_table *st); >> +int i915_ttm_err_to_gem(int err); >> + >>  #endif > >