All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Auld <matthew.william.auld@gmail.com>
To: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Cc: "Intel Graphics Development" <intel-gfx@lists.freedesktop.org>,
	"amd-gfx list" <amd-gfx@lists.freedesktop.org>,
	"ML dri-devel" <dri-devel@lists.freedesktop.org>,
	tzimmermann@suse.de, alexander.deucher@amd.com,
	"Christian König" <christian.koenig@amd.com>,
	"Matthew Auld" <matthew.auld@intel.com>
Subject: Re: [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
Date: Fri, 28 Jan 2022 14:18:38 +0000	[thread overview]
Message-ID: <CAM0jSHOdMDQvWxGyHfW01UAe-x_eefFQSJnzU43=t6qL_Ec77g@mail.gmail.com> (raw)
In-Reply-To: <20220127141107.173692-5-Arunpravin.PaneerSelvam@amd.com>

On Thu, 27 Jan 2022 at 14:11, Arunpravin
<Arunpravin.PaneerSelvam@amd.com> wrote:
>
> - Remove drm_mm references and replace with drm buddy functionalities
> - Add res cursor support for drm buddy
>
> v2(Matthew Auld):
>   - replace spinlock with mutex as we call kmem_cache_zalloc
>     (..., GFP_KERNEL) in drm_buddy_alloc() function
>
>   - lock drm_buddy_block_trim() function as it calls
>     mark_free/mark_split are all globally visible
>
> v3(Matthew Auld):
>   - remove trim method error handling as we address the failure case
>     at drm_buddy_block_trim() function
>
> v4:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> v5:
>   - fix merge conflict issue
>
> v6:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>  drivers/gpu/drm/Kconfig                       |   1 +
>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>  4 files changed, 231 insertions(+), 133 deletions(-)

<snip>

>
> -/**
> - * amdgpu_vram_mgr_virt_start - update virtual start address
> - *
> - * @mem: ttm_resource to update
> - * @node: just allocated node
> - *
> - * Calculate a virtual BO start address to easily check if everything is CPU
> - * accessible.
> - */
> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
> -                                      struct drm_mm_node *node)
> -{
> -       unsigned long start;
> -
> -       start = node->start + node->size;
> -       if (start > mem->num_pages)
> -               start -= mem->num_pages;
> -       else
> -               start = 0;
> -       mem->start = max(mem->start, start);
> -}
> -
>  /**
>   * amdgpu_vram_mgr_new - allocate new ranges
>   *
> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                                const struct ttm_place *place,
>                                struct ttm_resource **res)
>  {
> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
> -       struct ttm_range_mgr_node *node;
> -       struct drm_mm *mm = &mgr->mm;
> -       enum drm_mm_insert_mode mode;
> +       struct amdgpu_vram_mgr_node *node;
> +       struct drm_buddy *mm = &mgr->mm;
> +       struct drm_buddy_block *block;
>         unsigned i;
>         int r;
>
> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                 goto error_sub;
>         }
>
> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>                 pages_per_node = ~0ul;
> -               num_nodes = 1;
> -       } else {
> +       else {
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>                 pages_per_node = HPAGE_PMD_NR;
>  #else
> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>  #endif
>                 pages_per_node = max_t(uint32_t, pages_per_node,
>                                        tbo->page_alignment);
> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>         }
>
> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
> -                       GFP_KERNEL | __GFP_ZERO);
> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>         if (!node) {
>                 r = -ENOMEM;
>                 goto error_sub;
> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>
>         ttm_resource_init(tbo, place, &node->base);
>
> -       mode = DRM_MM_INSERT_BEST;
> +       INIT_LIST_HEAD(&node->blocks);
> +
>         if (place->flags & TTM_PL_FLAG_TOPDOWN)
> -               mode = DRM_MM_INSERT_HIGH;
> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
> +
> +       if (place->fpfn || lpfn != man->size)
> +               /* Allocate blocks in desired range */
> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
> +       min_page_size = mgr->default_page_size;
> +       BUG_ON(min_page_size < mm->chunk_size);
>
>         pages_left = node->base.num_pages;
>
> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>         pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>
>         i = 0;
> -       spin_lock(&mgr->lock);
>         while (pages_left) {
> -               uint32_t alignment = tbo->page_alignment;
> -
>                 if (pages >= pages_per_node)
> -                       alignment = pages_per_node;
> -
> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
> -                                               alignment, 0, place->fpfn,
> -                                               lpfn, mode);
> -               if (unlikely(r)) {
> -                       if (pages > pages_per_node) {
> -                               if (is_power_of_2(pages))
> -                                       pages = pages / 2;
> -                               else
> -                                       pages = rounddown_pow_of_two(pages);
> -                               continue;
> -                       }
> -                       goto error_free;
> +                       pages = pages_per_node;
> +
> +               n_pages = pages;
> +
> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +                       n_pages = roundup_pow_of_two(n_pages);
> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
> +
> +                       if (n_pages > lpfn)
> +                               lpfn = n_pages;
>                 }
>
> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
> +               mutex_lock(&mgr->lock);
> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> +                                          (u64)lpfn << PAGE_SHIFT,
> +                                          (u64)n_pages << PAGE_SHIFT,
> +                                          min_page_size,
> +                                          &node->blocks,
> +                                          node->flags);
> +               mutex_unlock(&mgr->lock);
> +               if (unlikely(r))
> +                       goto error_free_blocks;
> +
>                 pages_left -= pages;
>                 ++i;
>
>                 if (pages > pages_left)
>                         pages = pages_left;
>         }
> -       spin_unlock(&mgr->lock);
> +
> +       /* Free unused pages for contiguous allocation */
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
> +
> +               mutex_lock(&mgr->lock);
> +               drm_buddy_block_trim(mm,
> +                                    actual_size,
> +                                    &node->blocks);
> +               mutex_unlock(&mgr->lock);
> +       }
> +
> +       list_for_each_entry(block, &node->blocks, link)
> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
> +
> +       block = list_first_entry_or_null(&node->blocks,
> +                                        struct drm_buddy_block,
> +                                        link);
> +       if (!block) {
> +               r = -ENOENT;
> +               goto error_free_res;
> +       }
> +
> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;

Hmm, does this work? It looks like there are various places checking
that res->start + res->num_pages <= visible_size, which IIUC should
only return true when the entire object is placed in the mappable
portion. i915 is doing something similar. Also it looks like
ttm_resource_compat() is potentially relying on this, like when moving
something from non-mappable -> mappable in
amdgpu_bo_fault_reserve_notify()?

Perhaps something like:

if (vis_usage == num_pages)
    base.start = 0;
else
    base.start = visible_size;

Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?

WARNING: multiple messages have this Message-ID (diff)
From: Matthew Auld <matthew.william.auld@gmail.com>
To: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Cc: "Intel Graphics Development" <intel-gfx@lists.freedesktop.org>,
	"amd-gfx list" <amd-gfx@lists.freedesktop.org>,
	"ML dri-devel" <dri-devel@lists.freedesktop.org>,
	tzimmermann@suse.de, alexander.deucher@amd.com,
	"Christian König" <christian.koenig@amd.com>,
	"Matthew Auld" <matthew.auld@intel.com>
Subject: Re: [Intel-gfx] [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
Date: Fri, 28 Jan 2022 14:18:38 +0000	[thread overview]
Message-ID: <CAM0jSHOdMDQvWxGyHfW01UAe-x_eefFQSJnzU43=t6qL_Ec77g@mail.gmail.com> (raw)
In-Reply-To: <20220127141107.173692-5-Arunpravin.PaneerSelvam@amd.com>

On Thu, 27 Jan 2022 at 14:11, Arunpravin
<Arunpravin.PaneerSelvam@amd.com> wrote:
>
> - Remove drm_mm references and replace with drm buddy functionalities
> - Add res cursor support for drm buddy
>
> v2(Matthew Auld):
>   - replace spinlock with mutex as we call kmem_cache_zalloc
>     (..., GFP_KERNEL) in drm_buddy_alloc() function
>
>   - lock drm_buddy_block_trim() function as it calls
>     mark_free/mark_split are all globally visible
>
> v3(Matthew Auld):
>   - remove trim method error handling as we address the failure case
>     at drm_buddy_block_trim() function
>
> v4:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> v5:
>   - fix merge conflict issue
>
> v6:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>  drivers/gpu/drm/Kconfig                       |   1 +
>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>  4 files changed, 231 insertions(+), 133 deletions(-)

<snip>

>
> -/**
> - * amdgpu_vram_mgr_virt_start - update virtual start address
> - *
> - * @mem: ttm_resource to update
> - * @node: just allocated node
> - *
> - * Calculate a virtual BO start address to easily check if everything is CPU
> - * accessible.
> - */
> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
> -                                      struct drm_mm_node *node)
> -{
> -       unsigned long start;
> -
> -       start = node->start + node->size;
> -       if (start > mem->num_pages)
> -               start -= mem->num_pages;
> -       else
> -               start = 0;
> -       mem->start = max(mem->start, start);
> -}
> -
>  /**
>   * amdgpu_vram_mgr_new - allocate new ranges
>   *
> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                                const struct ttm_place *place,
>                                struct ttm_resource **res)
>  {
> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
> -       struct ttm_range_mgr_node *node;
> -       struct drm_mm *mm = &mgr->mm;
> -       enum drm_mm_insert_mode mode;
> +       struct amdgpu_vram_mgr_node *node;
> +       struct drm_buddy *mm = &mgr->mm;
> +       struct drm_buddy_block *block;
>         unsigned i;
>         int r;
>
> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                 goto error_sub;
>         }
>
> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>                 pages_per_node = ~0ul;
> -               num_nodes = 1;
> -       } else {
> +       else {
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>                 pages_per_node = HPAGE_PMD_NR;
>  #else
> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>  #endif
>                 pages_per_node = max_t(uint32_t, pages_per_node,
>                                        tbo->page_alignment);
> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>         }
>
> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
> -                       GFP_KERNEL | __GFP_ZERO);
> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>         if (!node) {
>                 r = -ENOMEM;
>                 goto error_sub;
> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>
>         ttm_resource_init(tbo, place, &node->base);
>
> -       mode = DRM_MM_INSERT_BEST;
> +       INIT_LIST_HEAD(&node->blocks);
> +
>         if (place->flags & TTM_PL_FLAG_TOPDOWN)
> -               mode = DRM_MM_INSERT_HIGH;
> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
> +
> +       if (place->fpfn || lpfn != man->size)
> +               /* Allocate blocks in desired range */
> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
> +       min_page_size = mgr->default_page_size;
> +       BUG_ON(min_page_size < mm->chunk_size);
>
>         pages_left = node->base.num_pages;
>
> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>         pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>
>         i = 0;
> -       spin_lock(&mgr->lock);
>         while (pages_left) {
> -               uint32_t alignment = tbo->page_alignment;
> -
>                 if (pages >= pages_per_node)
> -                       alignment = pages_per_node;
> -
> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
> -                                               alignment, 0, place->fpfn,
> -                                               lpfn, mode);
> -               if (unlikely(r)) {
> -                       if (pages > pages_per_node) {
> -                               if (is_power_of_2(pages))
> -                                       pages = pages / 2;
> -                               else
> -                                       pages = rounddown_pow_of_two(pages);
> -                               continue;
> -                       }
> -                       goto error_free;
> +                       pages = pages_per_node;
> +
> +               n_pages = pages;
> +
> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +                       n_pages = roundup_pow_of_two(n_pages);
> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
> +
> +                       if (n_pages > lpfn)
> +                               lpfn = n_pages;
>                 }
>
> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
> +               mutex_lock(&mgr->lock);
> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> +                                          (u64)lpfn << PAGE_SHIFT,
> +                                          (u64)n_pages << PAGE_SHIFT,
> +                                          min_page_size,
> +                                          &node->blocks,
> +                                          node->flags);
> +               mutex_unlock(&mgr->lock);
> +               if (unlikely(r))
> +                       goto error_free_blocks;
> +
>                 pages_left -= pages;
>                 ++i;
>
>                 if (pages > pages_left)
>                         pages = pages_left;
>         }
> -       spin_unlock(&mgr->lock);
> +
> +       /* Free unused pages for contiguous allocation */
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
> +
> +               mutex_lock(&mgr->lock);
> +               drm_buddy_block_trim(mm,
> +                                    actual_size,
> +                                    &node->blocks);
> +               mutex_unlock(&mgr->lock);
> +       }
> +
> +       list_for_each_entry(block, &node->blocks, link)
> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
> +
> +       block = list_first_entry_or_null(&node->blocks,
> +                                        struct drm_buddy_block,
> +                                        link);
> +       if (!block) {
> +               r = -ENOENT;
> +               goto error_free_res;
> +       }
> +
> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;

Hmm, does this work? It looks like there are various places checking
that res->start + res->num_pages <= visible_size, which IIUC should
only return true when the entire object is placed in the mappable
portion. i915 is doing something similar. Also it looks like
ttm_resource_compat() is potentially relying on this, like when moving
something from non-mappable -> mappable in
amdgpu_bo_fault_reserve_notify()?

Perhaps something like:

if (vis_usage == num_pages)
    base.start = 0;
else
    base.start = visible_size;

Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?

  reply	other threads:[~2022-01-28 14:19 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-27 14:11 [PATCH v11 1/5] drm: improve drm_buddy_alloc function Arunpravin
2022-01-27 14:11 ` Arunpravin
2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
2022-01-27 14:11 ` [PATCH v11 2/5] drm: implement top-down allocation method Arunpravin
2022-01-27 14:11   ` Arunpravin
2022-01-27 14:11   ` [Intel-gfx] " Arunpravin
2022-01-27 14:11 ` [PATCH v11 3/5] drm: implement a method to free unused pages Arunpravin
2022-01-27 14:11   ` Arunpravin
2022-01-27 14:11   ` [Intel-gfx] " Arunpravin
2022-01-27 14:11 ` [PATCH v11 4/5] drm/amdgpu: move vram inline functions into a header Arunpravin
2022-01-27 14:11   ` Arunpravin
2022-01-27 14:11   ` [Intel-gfx] " Arunpravin
2022-01-27 14:11 ` [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu Arunpravin
2022-01-27 14:11   ` Arunpravin
2022-01-27 14:11   ` [Intel-gfx] " Arunpravin
2022-01-28 14:18   ` Matthew Auld [this message]
2022-01-28 14:18     ` Matthew Auld
2022-02-04 11:22     ` Arunpravin
2022-02-04 11:22       ` [Intel-gfx] " Arunpravin
2022-02-04 13:23       ` Christian König
2022-02-04 13:23         ` [Intel-gfx] " Christian König
2022-02-08 11:20         ` Arunpravin
2022-02-08 11:20           ` [Intel-gfx] " Arunpravin
2022-02-10 17:52           ` Matthew Auld
2022-02-10 17:52             ` [Intel-gfx] " Matthew Auld
2022-01-27 15:52 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v11,1/5] drm: improve drm_buddy_alloc function Patchwork
2022-01-27 16:23 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-01-27 21:45 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2022-02-10 16:10 ` [PATCH v11 1/5] " Matthew Auld
2022-02-10 16:10   ` Matthew Auld
2022-02-10 16:10   ` [Intel-gfx] " Matthew Auld
2022-02-11  9:27   ` Arunpravin
2022-02-11  9:27     ` Arunpravin
2022-02-11  9:27     ` [Intel-gfx] " Arunpravin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAM0jSHOdMDQvWxGyHfW01UAe-x_eefFQSJnzU43=t6qL_Ec77g@mail.gmail.com' \
    --to=matthew.william.auld@gmail.com \
    --cc=Arunpravin.PaneerSelvam@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=tzimmermann@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.