All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v11 1/5] drm: improve drm_buddy_alloc function
@ 2022-01-27 14:11 ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

- Make drm_buddy_alloc a single function to handle
  range allocation and non-range allocation demands

- Implemented a new function alloc_range() which allocates
  the requested power-of-two block comply with range limitations

- Moved order computation and memory alignment logic from
  i915 driver to drm buddy

v2:
  merged below changes to keep the build unbroken
   - drm_buddy_alloc_range() becomes obsolete and may be removed
   - enable ttm range allocation (fpfn / lpfn) support in i915 driver
   - apply enhanced drm_buddy_alloc() function to i915 driver

v3(Matthew Auld):
  - Fix alignment issues and remove unnecessary list_empty check
  - add more validation checks for input arguments
  - make alloc_range() block allocations as bottom-up
  - optimize order computation logic
  - replace uint64_t with u64, which is preferred in the kernel

v4(Matthew Auld):
  - keep drm_buddy_alloc_range() function implementation for generic
    actual range allocations
  - keep alloc_range() implementation for end bias allocations

v5(Matthew Auld):
  - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
    as place->lpfn will currently always be zero for i915

v6(Matthew Auld):
  - fixup potential uaf - If we are unlucky and can't allocate
    enough memory when splitting blocks, where we temporarily
    end up with the given block and its buddy on the respective
    free list, then we need to ensure we delete both blocks,
    and no just the buddy, before potentially freeing them

  - fix warnings reported by kernel test robot <lkp@intel.com>

v7(Matthew Auld):
  - revert fixup potential uaf
  - keep __alloc_range() add node to the list logic same as
    drm_buddy_alloc_blocks() by having a temporary list variable
  - at drm_buddy_alloc_blocks() keep i915 range_overflows macro
    and add a new check for end variable

v8:
  - fix warnings reported by kernel test robot <lkp@intel.com>

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 315 +++++++++++++-----
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
 include/drm/drm_buddy.h                       |  13 +-
 4 files changed, 280 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d60878bc9c20..cfc160a1ef1a 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
 }
 EXPORT_SYMBOL(drm_buddy_free_list);
 
-/**
- * drm_buddy_alloc_blocks - allocate power-of-two blocks
- *
- * @mm: DRM buddy manager to allocate from
- * @order: size of the allocation
- *
- * The order value here translates to:
- *
- * 0 = 2^0 * mm->chunk_size
- * 1 = 2^1 * mm->chunk_size
- * 2 = 2^2 * mm->chunk_size
- *
- * Returns:
- * allocated ptr to the &drm_buddy_block on success
- */
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+	return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+	return s1 <= s2 && e1 >= e2;
+}
+
+static struct drm_buddy_block *
+alloc_range_bias(struct drm_buddy *mm,
+		 u64 start, u64 end,
+		 unsigned int order)
+{
+	struct drm_buddy_block *block;
+	struct drm_buddy_block *buddy;
+	LIST_HEAD(dfs);
+	int err;
+	int i;
+
+	end = end - 1;
+
+	for (i = 0; i < mm->n_roots; ++i)
+		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+	do {
+		u64 block_start;
+		u64 block_end;
+
+		block = list_first_entry_or_null(&dfs,
+						 struct drm_buddy_block,
+						 tmp_link);
+		if (!block)
+			break;
+
+		list_del(&block->tmp_link);
+
+		if (drm_buddy_block_order(block) < order)
+			continue;
+
+		block_start = drm_buddy_block_offset(block);
+		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+		if (!overlaps(start, end, block_start, block_end))
+			continue;
+
+		if (drm_buddy_block_is_allocated(block))
+			continue;
+
+		if (contains(start, end, block_start, block_end) &&
+		    order == drm_buddy_block_order(block)) {
+			/*
+			 * Find the free block within the range.
+			 */
+			if (drm_buddy_block_is_free(block))
+				return block;
+
+			continue;
+		}
+
+		if (!drm_buddy_block_is_split(block)) {
+			err = split_block(mm, block);
+			if (unlikely(err))
+				goto err_undo;
+		}
+
+		list_add(&block->right->tmp_link, &dfs);
+		list_add(&block->left->tmp_link, &dfs);
+	} while (1);
+
+	return ERR_PTR(-ENOSPC);
+
+err_undo:
+	/*
+	 * We really don't want to leave around a bunch of split blocks, since
+	 * bigger is better, so make sure we merge everything back before we
+	 * free the allocated blocks.
+	 */
+	buddy = get_buddy(block);
+	if (buddy &&
+	    (drm_buddy_block_is_free(block) &&
+	     drm_buddy_block_is_free(buddy)))
+		__drm_buddy_free(mm, block);
+	return ERR_PTR(err);
+}
+
+static struct drm_buddy_block *
+alloc_from_freelist(struct drm_buddy *mm,
+		    unsigned int order,
+		    unsigned long flags)
 {
 	struct drm_buddy_block *block = NULL;
 	unsigned int i;
@@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
 	while (i != order) {
 		err = split_block(mm, block);
 		if (unlikely(err))
-			goto out_free;
+			goto err_undo;
 
-		/* Go low */
-		block = block->left;
+		block = block->right;
 		i--;
 	}
-
-	mark_allocated(block);
-	mm->avail -= drm_buddy_block_size(mm, block);
-	kmemleak_update_trace(block);
 	return block;
 
-out_free:
+err_undo:
 	if (i != order)
 		__drm_buddy_free(mm, block);
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL(drm_buddy_alloc_blocks);
-
-static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
-{
-	return s1 <= e2 && e1 >= s2;
-}
 
-static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
-{
-	return s1 <= s2 && e1 >= e2;
-}
-
-/**
- * drm_buddy_alloc_range - allocate range
- *
- * @mm: DRM buddy manager to allocate from
- * @blocks: output list head to add allocated blocks
- * @start: start of the allowed range for this block
- * @size: size of the allocation
- *
- * Intended for pre-allocating portions of the address space, for example to
- * reserve a block for the initial framebuffer or similar, hence the expectation
- * here is that drm_buddy_alloc_blocks() is still the main vehicle for
- * allocations, so if that's not the case then the drm_mm range allocator is
- * probably a much better fit, and so you should probably go use that instead.
- *
- * Note that it's safe to chain together multiple alloc_ranges
- * with the same blocks list
- *
- * Returns:
- * 0 on success, error code on failure.
- */
-int drm_buddy_alloc_range(struct drm_buddy *mm,
-			  struct list_head *blocks,
-			  u64 start, u64 size)
+static int __alloc_range(struct drm_buddy *mm,
+			 struct list_head *dfs,
+			 u64 start, u64 size,
+			 struct list_head *blocks)
 {
 	struct drm_buddy_block *block;
 	struct drm_buddy_block *buddy;
 	LIST_HEAD(allocated);
-	LIST_HEAD(dfs);
 	u64 end;
 	int err;
-	int i;
-
-	if (size < mm->chunk_size)
-		return -EINVAL;
-
-	if (!IS_ALIGNED(size | start, mm->chunk_size))
-		return -EINVAL;
-
-	if (range_overflows(start, size, mm->size))
-		return -EINVAL;
-
-	for (i = 0; i < mm->n_roots; ++i)
-		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
 
 	end = start + size - 1;
 
@@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
 		u64 block_start;
 		u64 block_end;
 
-		block = list_first_entry_or_null(&dfs,
+		block = list_first_entry_or_null(dfs,
 						 struct drm_buddy_block,
 						 tmp_link);
 		if (!block)
@@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
 				goto err_undo;
 		}
 
-		list_add(&block->right->tmp_link, &dfs);
-		list_add(&block->left->tmp_link, &dfs);
+		list_add(&block->right->tmp_link, dfs);
+		list_add(&block->left->tmp_link, dfs);
 	} while (1);
 
 	list_splice_tail(&allocated, blocks);
@@ -459,7 +484,143 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
 	drm_buddy_free_list(mm, &allocated);
 	return err;
 }
-EXPORT_SYMBOL(drm_buddy_alloc_range);
+
+/**
+ * __drm_buddy_alloc_range - actual range allocation
+ *
+ * @mm: DRM buddy manager to allocate from
+ * @start: start of the allowed range for this block
+ * @size: size of the allocation
+ * @blocks: output list head to add allocated blocks
+ *
+ * Intended for pre-allocating portions of the address space, for example to
+ * reserve a block for the initial framebuffer or similar
+ *
+ * Note that it's safe to chain together multiple alloc_ranges
+ * with the same blocks list
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+static int __drm_buddy_alloc_range(struct drm_buddy *mm,
+				   u64 start,
+				   u64 size,
+				   struct list_head *blocks)
+{
+	LIST_HEAD(dfs);
+	int i;
+
+	for (i = 0; i < mm->n_roots; ++i)
+		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+	return __alloc_range(mm, &dfs, start, size, blocks);
+}
+
+/**
+ * drm_buddy_alloc_blocks - allocate power-of-two blocks
+ *
+ * @mm: DRM buddy manager to allocate from
+ * @start: start of the allowed range for this block
+ * @end: end of the allowed range for this block
+ * @size: size of the allocation
+ * @min_page_size: alignment of the allocation
+ * @blocks: output list head to add allocated blocks
+ * @flags: DRM_BUDDY_*_ALLOCATION flags
+ *
+ * alloc_range_bias() called on range limitations, which traverses
+ * the tree and returns the desired block.
+ *
+ * alloc_from_freelist() called when *no* range restrictions
+ * are enforced, which picks the block from the freelist.
+ *
+ * blocks are allocated in order, the order value here translates to:
+ *
+ * 0 = 2^0 * mm->chunk_size
+ * 1 = 2^1 * mm->chunk_size
+ * 2 = 2^2 * mm->chunk_size
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_page_size,
+			   struct list_head *blocks,
+			   unsigned long flags)
+{
+	struct drm_buddy_block *block = NULL;
+	unsigned int min_order, order;
+	unsigned long pages;
+	LIST_HEAD(allocated);
+	int err;
+
+	if (size < mm->chunk_size)
+		return -EINVAL;
+
+	if (min_page_size < mm->chunk_size)
+		return -EINVAL;
+
+	if (!is_power_of_2(min_page_size))
+		return -EINVAL;
+
+	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
+		return -EINVAL;
+
+	if (end > mm->size)
+		return -EINVAL;
+
+	if (range_overflows(start, size, mm->size))
+		return -EINVAL;
+
+	/* Actual range allocation */
+	if (start + size == end)
+		return __drm_buddy_alloc_range(mm, start, size, blocks);
+
+	pages = size >> ilog2(mm->chunk_size);
+	order = fls(pages) - 1;
+	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+
+	do {
+		order = min(order, (unsigned int)fls(pages) - 1);
+		BUG_ON(order > mm->max_order);
+		BUG_ON(order < min_order);
+
+		do {
+			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
+				/* Allocate traversing within the range */
+				block = alloc_range_bias(mm, start, end, order);
+			else
+				/* Allocate from freelist */
+				block = alloc_from_freelist(mm, order, flags);
+
+			if (!IS_ERR(block))
+				break;
+
+			if (order-- == min_order) {
+				err = -ENOSPC;
+				goto err_free;
+			}
+		} while (1);
+
+		mark_allocated(block);
+		mm->avail -= drm_buddy_block_size(mm, block);
+		kmemleak_update_trace(block);
+		list_add_tail(&block->link, &allocated);
+
+		pages -= BIT(order);
+
+		if (!pages)
+			break;
+	} while (1);
+
+	list_splice_tail(&allocated, blocks);
+	return 0;
+
+err_free:
+	drm_buddy_free_list(mm, &allocated);
+	return err;
+}
+EXPORT_SYMBOL(drm_buddy_alloc_blocks);
 
 /**
  * drm_buddy_block_print - print block information
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 247714bab044..a328a38fab07 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 	struct i915_ttm_buddy_resource *bman_res;
 	struct drm_buddy *mm = &bman->mm;
-	unsigned long n_pages;
-	unsigned int min_order;
+	unsigned long n_pages, lpfn;
 	u64 min_page_size;
 	u64 size;
 	int err;
 
-	GEM_BUG_ON(place->fpfn || place->lpfn);
+	lpfn = place->lpfn;
+	if (!lpfn)
+		lpfn = man->size;
 
 	bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
 	if (!bman_res)
@@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	INIT_LIST_HEAD(&bman_res->blocks);
 	bman_res->mm = mm;
 
+	if (place->fpfn || lpfn != man->size)
+		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
 	GEM_BUG_ON(!bman_res->base.num_pages);
 	size = bman_res->base.num_pages << PAGE_SHIFT;
 
@@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 		min_page_size = bo->page_alignment << PAGE_SHIFT;
 
 	GEM_BUG_ON(min_page_size < mm->chunk_size);
-	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		unsigned long pages;
+
 		size = roundup_pow_of_two(size);
-		min_order = ilog2(size) - ilog2(mm->chunk_size);
+		min_page_size = size;
+
+		pages = size >> ilog2(mm->chunk_size);
+		if (pages > lpfn)
+			lpfn = pages;
 	}
 
 	if (size > mm->size) {
@@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 
 	n_pages = size >> ilog2(mm->chunk_size);
 
-	do {
-		struct drm_buddy_block *block;
-		unsigned int order;
-
-		order = fls(n_pages) - 1;
-		GEM_BUG_ON(order > mm->max_order);
-		GEM_BUG_ON(order < min_order);
-
-		do {
-			mutex_lock(&bman->lock);
-			block = drm_buddy_alloc_blocks(mm, order);
-			mutex_unlock(&bman->lock);
-			if (!IS_ERR(block))
-				break;
-
-			if (order-- == min_order) {
-				err = -ENOSPC;
-				goto err_free_blocks;
-			}
-		} while (1);
-
-		n_pages -= BIT(order);
-
-		list_add_tail(&block->link, &bman_res->blocks);
-
-		if (!n_pages)
-			break;
-	} while (1);
+	mutex_lock(&bman->lock);
+	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+				     (u64)lpfn << PAGE_SHIFT,
+				     (u64)n_pages << PAGE_SHIFT,
+				     min_page_size,
+				     &bman_res->blocks,
+				     bman_res->flags);
+	mutex_unlock(&bman->lock);
+	if (unlikely(err))
+		goto err_free_blocks;
 
 	*res = &bman_res->base;
 	return 0;
@@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
 {
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 	struct drm_buddy *mm = &bman->mm;
+	unsigned long flags = 0;
 	int ret;
 
+	flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
 	mutex_lock(&bman->lock);
-	ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
+	ret = drm_buddy_alloc_blocks(mm, start,
+				     start + size,
+				     size, mm->chunk_size,
+				     &bman->reserved,
+				     flags);
 	mutex_unlock(&bman->lock);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
index 312077941411..72c90b432e87 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
@@ -20,6 +20,7 @@ struct drm_buddy;
  *
  * @base: struct ttm_resource base class we extend
  * @blocks: the list of struct i915_buddy_block for this resource/allocation
+ * @flags: DRM_BUDDY_*_ALLOCATION flags
  * @mm: the struct i915_buddy_mm for this resource
  *
  * Extends the struct ttm_resource to manage an address space allocation with
@@ -28,6 +29,7 @@ struct drm_buddy;
 struct i915_ttm_buddy_resource {
 	struct ttm_resource base;
 	struct list_head blocks;
+	unsigned long flags;
 	struct drm_buddy *mm;
 };
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index f524db152413..54f25a372f27 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -22,6 +22,8 @@
 	start__ >= max__ || size__ > max__ - start__; \
 })
 
+#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
+
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
 #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
@@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
 
 void drm_buddy_fini(struct drm_buddy *mm);
 
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
-
-int drm_buddy_alloc_range(struct drm_buddy *mm,
-			  struct list_head *blocks,
-			  u64 start, u64 size);
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_page_size,
+			   struct list_head *blocks,
+			   unsigned long flags);
 
 void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
 

base-commit: 627e2885b51ab503a98aa89f0a0bd68416c731fc
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v11 1/5] drm: improve drm_buddy_alloc function
@ 2022-01-27 14:11 ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

- Make drm_buddy_alloc a single function to handle
  range allocation and non-range allocation demands

- Implemented a new function alloc_range() which allocates
  the requested power-of-two block comply with range limitations

- Moved order computation and memory alignment logic from
  i915 driver to drm buddy

v2:
  merged below changes to keep the build unbroken
   - drm_buddy_alloc_range() becomes obsolete and may be removed
   - enable ttm range allocation (fpfn / lpfn) support in i915 driver
   - apply enhanced drm_buddy_alloc() function to i915 driver

v3(Matthew Auld):
  - Fix alignment issues and remove unnecessary list_empty check
  - add more validation checks for input arguments
  - make alloc_range() block allocations as bottom-up
  - optimize order computation logic
  - replace uint64_t with u64, which is preferred in the kernel

v4(Matthew Auld):
  - keep drm_buddy_alloc_range() function implementation for generic
    actual range allocations
  - keep alloc_range() implementation for end bias allocations

v5(Matthew Auld):
  - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
    as place->lpfn will currently always be zero for i915

v6(Matthew Auld):
  - fixup potential uaf - If we are unlucky and can't allocate
    enough memory when splitting blocks, where we temporarily
    end up with the given block and its buddy on the respective
    free list, then we need to ensure we delete both blocks,
    and no just the buddy, before potentially freeing them

  - fix warnings reported by kernel test robot <lkp@intel.com>

v7(Matthew Auld):
  - revert fixup potential uaf
  - keep __alloc_range() add node to the list logic same as
    drm_buddy_alloc_blocks() by having a temporary list variable
  - at drm_buddy_alloc_blocks() keep i915 range_overflows macro
    and add a new check for end variable

v8:
  - fix warnings reported by kernel test robot <lkp@intel.com>

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 315 +++++++++++++-----
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
 include/drm/drm_buddy.h                       |  13 +-
 4 files changed, 280 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d60878bc9c20..cfc160a1ef1a 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
 }
 EXPORT_SYMBOL(drm_buddy_free_list);
 
-/**
- * drm_buddy_alloc_blocks - allocate power-of-two blocks
- *
- * @mm: DRM buddy manager to allocate from
- * @order: size of the allocation
- *
- * The order value here translates to:
- *
- * 0 = 2^0 * mm->chunk_size
- * 1 = 2^1 * mm->chunk_size
- * 2 = 2^2 * mm->chunk_size
- *
- * Returns:
- * allocated ptr to the &drm_buddy_block on success
- */
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+	return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+	return s1 <= s2 && e1 >= e2;
+}
+
+static struct drm_buddy_block *
+alloc_range_bias(struct drm_buddy *mm,
+		 u64 start, u64 end,
+		 unsigned int order)
+{
+	struct drm_buddy_block *block;
+	struct drm_buddy_block *buddy;
+	LIST_HEAD(dfs);
+	int err;
+	int i;
+
+	end = end - 1;
+
+	for (i = 0; i < mm->n_roots; ++i)
+		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+	do {
+		u64 block_start;
+		u64 block_end;
+
+		block = list_first_entry_or_null(&dfs,
+						 struct drm_buddy_block,
+						 tmp_link);
+		if (!block)
+			break;
+
+		list_del(&block->tmp_link);
+
+		if (drm_buddy_block_order(block) < order)
+			continue;
+
+		block_start = drm_buddy_block_offset(block);
+		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+		if (!overlaps(start, end, block_start, block_end))
+			continue;
+
+		if (drm_buddy_block_is_allocated(block))
+			continue;
+
+		if (contains(start, end, block_start, block_end) &&
+		    order == drm_buddy_block_order(block)) {
+			/*
+			 * Find the free block within the range.
+			 */
+			if (drm_buddy_block_is_free(block))
+				return block;
+
+			continue;
+		}
+
+		if (!drm_buddy_block_is_split(block)) {
+			err = split_block(mm, block);
+			if (unlikely(err))
+				goto err_undo;
+		}
+
+		list_add(&block->right->tmp_link, &dfs);
+		list_add(&block->left->tmp_link, &dfs);
+	} while (1);
+
+	return ERR_PTR(-ENOSPC);
+
+err_undo:
+	/*
+	 * We really don't want to leave around a bunch of split blocks, since
+	 * bigger is better, so make sure we merge everything back before we
+	 * free the allocated blocks.
+	 */
+	buddy = get_buddy(block);
+	if (buddy &&
+	    (drm_buddy_block_is_free(block) &&
+	     drm_buddy_block_is_free(buddy)))
+		__drm_buddy_free(mm, block);
+	return ERR_PTR(err);
+}
+
+static struct drm_buddy_block *
+alloc_from_freelist(struct drm_buddy *mm,
+		    unsigned int order,
+		    unsigned long flags)
 {
 	struct drm_buddy_block *block = NULL;
 	unsigned int i;
@@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
 	while (i != order) {
 		err = split_block(mm, block);
 		if (unlikely(err))
-			goto out_free;
+			goto err_undo;
 
-		/* Go low */
-		block = block->left;
+		block = block->right;
 		i--;
 	}
-
-	mark_allocated(block);
-	mm->avail -= drm_buddy_block_size(mm, block);
-	kmemleak_update_trace(block);
 	return block;
 
-out_free:
+err_undo:
 	if (i != order)
 		__drm_buddy_free(mm, block);
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL(drm_buddy_alloc_blocks);
-
-static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
-{
-	return s1 <= e2 && e1 >= s2;
-}
 
-static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
-{
-	return s1 <= s2 && e1 >= e2;
-}
-
-/**
- * drm_buddy_alloc_range - allocate range
- *
- * @mm: DRM buddy manager to allocate from
- * @blocks: output list head to add allocated blocks
- * @start: start of the allowed range for this block
- * @size: size of the allocation
- *
- * Intended for pre-allocating portions of the address space, for example to
- * reserve a block for the initial framebuffer or similar, hence the expectation
- * here is that drm_buddy_alloc_blocks() is still the main vehicle for
- * allocations, so if that's not the case then the drm_mm range allocator is
- * probably a much better fit, and so you should probably go use that instead.
- *
- * Note that it's safe to chain together multiple alloc_ranges
- * with the same blocks list
- *
- * Returns:
- * 0 on success, error code on failure.
- */
-int drm_buddy_alloc_range(struct drm_buddy *mm,
-			  struct list_head *blocks,
-			  u64 start, u64 size)
+static int __alloc_range(struct drm_buddy *mm,
+			 struct list_head *dfs,
+			 u64 start, u64 size,
+			 struct list_head *blocks)
 {
 	struct drm_buddy_block *block;
 	struct drm_buddy_block *buddy;
 	LIST_HEAD(allocated);
-	LIST_HEAD(dfs);
 	u64 end;
 	int err;
-	int i;
-
-	if (size < mm->chunk_size)
-		return -EINVAL;
-
-	if (!IS_ALIGNED(size | start, mm->chunk_size))
-		return -EINVAL;
-
-	if (range_overflows(start, size, mm->size))
-		return -EINVAL;
-
-	for (i = 0; i < mm->n_roots; ++i)
-		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
 
 	end = start + size - 1;
 
@@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
 		u64 block_start;
 		u64 block_end;
 
-		block = list_first_entry_or_null(&dfs,
+		block = list_first_entry_or_null(dfs,
 						 struct drm_buddy_block,
 						 tmp_link);
 		if (!block)
@@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
 				goto err_undo;
 		}
 
-		list_add(&block->right->tmp_link, &dfs);
-		list_add(&block->left->tmp_link, &dfs);
+		list_add(&block->right->tmp_link, dfs);
+		list_add(&block->left->tmp_link, dfs);
 	} while (1);
 
 	list_splice_tail(&allocated, blocks);
@@ -459,7 +484,143 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
 	drm_buddy_free_list(mm, &allocated);
 	return err;
 }
-EXPORT_SYMBOL(drm_buddy_alloc_range);
+
+/**
+ * __drm_buddy_alloc_range - actual range allocation
+ *
+ * @mm: DRM buddy manager to allocate from
+ * @start: start of the allowed range for this block
+ * @size: size of the allocation
+ * @blocks: output list head to add allocated blocks
+ *
+ * Intended for pre-allocating portions of the address space, for example to
+ * reserve a block for the initial framebuffer or similar
+ *
+ * Note that it's safe to chain together multiple alloc_ranges
+ * with the same blocks list
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+static int __drm_buddy_alloc_range(struct drm_buddy *mm,
+				   u64 start,
+				   u64 size,
+				   struct list_head *blocks)
+{
+	LIST_HEAD(dfs);
+	int i;
+
+	for (i = 0; i < mm->n_roots; ++i)
+		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+	return __alloc_range(mm, &dfs, start, size, blocks);
+}
+
+/**
+ * drm_buddy_alloc_blocks - allocate power-of-two blocks
+ *
+ * @mm: DRM buddy manager to allocate from
+ * @start: start of the allowed range for this block
+ * @end: end of the allowed range for this block
+ * @size: size of the allocation
+ * @min_page_size: alignment of the allocation
+ * @blocks: output list head to add allocated blocks
+ * @flags: DRM_BUDDY_*_ALLOCATION flags
+ *
+ * alloc_range_bias() called on range limitations, which traverses
+ * the tree and returns the desired block.
+ *
+ * alloc_from_freelist() called when *no* range restrictions
+ * are enforced, which picks the block from the freelist.
+ *
+ * blocks are allocated in order, the order value here translates to:
+ *
+ * 0 = 2^0 * mm->chunk_size
+ * 1 = 2^1 * mm->chunk_size
+ * 2 = 2^2 * mm->chunk_size
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_page_size,
+			   struct list_head *blocks,
+			   unsigned long flags)
+{
+	struct drm_buddy_block *block = NULL;
+	unsigned int min_order, order;
+	unsigned long pages;
+	LIST_HEAD(allocated);
+	int err;
+
+	if (size < mm->chunk_size)
+		return -EINVAL;
+
+	if (min_page_size < mm->chunk_size)
+		return -EINVAL;
+
+	if (!is_power_of_2(min_page_size))
+		return -EINVAL;
+
+	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
+		return -EINVAL;
+
+	if (end > mm->size)
+		return -EINVAL;
+
+	if (range_overflows(start, size, mm->size))
+		return -EINVAL;
+
+	/* Actual range allocation */
+	if (start + size == end)
+		return __drm_buddy_alloc_range(mm, start, size, blocks);
+
+	pages = size >> ilog2(mm->chunk_size);
+	order = fls(pages) - 1;
+	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+
+	do {
+		order = min(order, (unsigned int)fls(pages) - 1);
+		BUG_ON(order > mm->max_order);
+		BUG_ON(order < min_order);
+
+		do {
+			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
+				/* Allocate traversing within the range */
+				block = alloc_range_bias(mm, start, end, order);
+			else
+				/* Allocate from freelist */
+				block = alloc_from_freelist(mm, order, flags);
+
+			if (!IS_ERR(block))
+				break;
+
+			if (order-- == min_order) {
+				err = -ENOSPC;
+				goto err_free;
+			}
+		} while (1);
+
+		mark_allocated(block);
+		mm->avail -= drm_buddy_block_size(mm, block);
+		kmemleak_update_trace(block);
+		list_add_tail(&block->link, &allocated);
+
+		pages -= BIT(order);
+
+		if (!pages)
+			break;
+	} while (1);
+
+	list_splice_tail(&allocated, blocks);
+	return 0;
+
+err_free:
+	drm_buddy_free_list(mm, &allocated);
+	return err;
+}
+EXPORT_SYMBOL(drm_buddy_alloc_blocks);
 
 /**
  * drm_buddy_block_print - print block information
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 247714bab044..a328a38fab07 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 	struct i915_ttm_buddy_resource *bman_res;
 	struct drm_buddy *mm = &bman->mm;
-	unsigned long n_pages;
-	unsigned int min_order;
+	unsigned long n_pages, lpfn;
 	u64 min_page_size;
 	u64 size;
 	int err;
 
-	GEM_BUG_ON(place->fpfn || place->lpfn);
+	lpfn = place->lpfn;
+	if (!lpfn)
+		lpfn = man->size;
 
 	bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
 	if (!bman_res)
@@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	INIT_LIST_HEAD(&bman_res->blocks);
 	bman_res->mm = mm;
 
+	if (place->fpfn || lpfn != man->size)
+		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
 	GEM_BUG_ON(!bman_res->base.num_pages);
 	size = bman_res->base.num_pages << PAGE_SHIFT;
 
@@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 		min_page_size = bo->page_alignment << PAGE_SHIFT;
 
 	GEM_BUG_ON(min_page_size < mm->chunk_size);
-	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		unsigned long pages;
+
 		size = roundup_pow_of_two(size);
-		min_order = ilog2(size) - ilog2(mm->chunk_size);
+		min_page_size = size;
+
+		pages = size >> ilog2(mm->chunk_size);
+		if (pages > lpfn)
+			lpfn = pages;
 	}
 
 	if (size > mm->size) {
@@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 
 	n_pages = size >> ilog2(mm->chunk_size);
 
-	do {
-		struct drm_buddy_block *block;
-		unsigned int order;
-
-		order = fls(n_pages) - 1;
-		GEM_BUG_ON(order > mm->max_order);
-		GEM_BUG_ON(order < min_order);
-
-		do {
-			mutex_lock(&bman->lock);
-			block = drm_buddy_alloc_blocks(mm, order);
-			mutex_unlock(&bman->lock);
-			if (!IS_ERR(block))
-				break;
-
-			if (order-- == min_order) {
-				err = -ENOSPC;
-				goto err_free_blocks;
-			}
-		} while (1);
-
-		n_pages -= BIT(order);
-
-		list_add_tail(&block->link, &bman_res->blocks);
-
-		if (!n_pages)
-			break;
-	} while (1);
+	mutex_lock(&bman->lock);
+	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+				     (u64)lpfn << PAGE_SHIFT,
+				     (u64)n_pages << PAGE_SHIFT,
+				     min_page_size,
+				     &bman_res->blocks,
+				     bman_res->flags);
+	mutex_unlock(&bman->lock);
+	if (unlikely(err))
+		goto err_free_blocks;
 
 	*res = &bman_res->base;
 	return 0;
@@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
 {
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 	struct drm_buddy *mm = &bman->mm;
+	unsigned long flags = 0;
 	int ret;
 
+	flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
 	mutex_lock(&bman->lock);
-	ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
+	ret = drm_buddy_alloc_blocks(mm, start,
+				     start + size,
+				     size, mm->chunk_size,
+				     &bman->reserved,
+				     flags);
 	mutex_unlock(&bman->lock);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
index 312077941411..72c90b432e87 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
@@ -20,6 +20,7 @@ struct drm_buddy;
  *
  * @base: struct ttm_resource base class we extend
  * @blocks: the list of struct i915_buddy_block for this resource/allocation
+ * @flags: DRM_BUDDY_*_ALLOCATION flags
  * @mm: the struct i915_buddy_mm for this resource
  *
  * Extends the struct ttm_resource to manage an address space allocation with
@@ -28,6 +29,7 @@ struct drm_buddy;
 struct i915_ttm_buddy_resource {
 	struct ttm_resource base;
 	struct list_head blocks;
+	unsigned long flags;
 	struct drm_buddy *mm;
 };
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index f524db152413..54f25a372f27 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -22,6 +22,8 @@
 	start__ >= max__ || size__ > max__ - start__; \
 })
 
+#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
+
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
 #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
@@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
 
 void drm_buddy_fini(struct drm_buddy *mm);
 
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
-
-int drm_buddy_alloc_range(struct drm_buddy *mm,
-			  struct list_head *blocks,
-			  u64 start, u64 size);
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_page_size,
+			   struct list_head *blocks,
+			   unsigned long flags);
 
 void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
 

base-commit: 627e2885b51ab503a98aa89f0a0bd68416c731fc
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v11 1/5] drm: improve drm_buddy_alloc function
@ 2022-01-27 14:11 ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: daniel, Arunpravin, jani.nikula, matthew.auld, tzimmermann,
	alexander.deucher, christian.koenig

- Make drm_buddy_alloc a single function to handle
  range allocation and non-range allocation demands

- Implemented a new function alloc_range() which allocates
  the requested power-of-two block comply with range limitations

- Moved order computation and memory alignment logic from
  i915 driver to drm buddy

v2:
  merged below changes to keep the build unbroken
   - drm_buddy_alloc_range() becomes obsolete and may be removed
   - enable ttm range allocation (fpfn / lpfn) support in i915 driver
   - apply enhanced drm_buddy_alloc() function to i915 driver

v3(Matthew Auld):
  - Fix alignment issues and remove unnecessary list_empty check
  - add more validation checks for input arguments
  - make alloc_range() block allocations as bottom-up
  - optimize order computation logic
  - replace uint64_t with u64, which is preferred in the kernel

v4(Matthew Auld):
  - keep drm_buddy_alloc_range() function implementation for generic
    actual range allocations
  - keep alloc_range() implementation for end bias allocations

v5(Matthew Auld):
  - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
    as place->lpfn will currently always be zero for i915

v6(Matthew Auld):
  - fixup potential uaf - If we are unlucky and can't allocate
    enough memory when splitting blocks, where we temporarily
    end up with the given block and its buddy on the respective
    free list, then we need to ensure we delete both blocks,
    and no just the buddy, before potentially freeing them

  - fix warnings reported by kernel test robot <lkp@intel.com>

v7(Matthew Auld):
  - revert fixup potential uaf
  - keep __alloc_range() add node to the list logic same as
    drm_buddy_alloc_blocks() by having a temporary list variable
  - at drm_buddy_alloc_blocks() keep i915 range_overflows macro
    and add a new check for end variable

v8:
  - fix warnings reported by kernel test robot <lkp@intel.com>

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 315 +++++++++++++-----
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
 include/drm/drm_buddy.h                       |  13 +-
 4 files changed, 280 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d60878bc9c20..cfc160a1ef1a 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
 }
 EXPORT_SYMBOL(drm_buddy_free_list);
 
-/**
- * drm_buddy_alloc_blocks - allocate power-of-two blocks
- *
- * @mm: DRM buddy manager to allocate from
- * @order: size of the allocation
- *
- * The order value here translates to:
- *
- * 0 = 2^0 * mm->chunk_size
- * 1 = 2^1 * mm->chunk_size
- * 2 = 2^2 * mm->chunk_size
- *
- * Returns:
- * allocated ptr to the &drm_buddy_block on success
- */
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+	return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+	return s1 <= s2 && e1 >= e2;
+}
+
+static struct drm_buddy_block *
+alloc_range_bias(struct drm_buddy *mm,
+		 u64 start, u64 end,
+		 unsigned int order)
+{
+	struct drm_buddy_block *block;
+	struct drm_buddy_block *buddy;
+	LIST_HEAD(dfs);
+	int err;
+	int i;
+
+	end = end - 1;
+
+	for (i = 0; i < mm->n_roots; ++i)
+		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+	do {
+		u64 block_start;
+		u64 block_end;
+
+		block = list_first_entry_or_null(&dfs,
+						 struct drm_buddy_block,
+						 tmp_link);
+		if (!block)
+			break;
+
+		list_del(&block->tmp_link);
+
+		if (drm_buddy_block_order(block) < order)
+			continue;
+
+		block_start = drm_buddy_block_offset(block);
+		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+		if (!overlaps(start, end, block_start, block_end))
+			continue;
+
+		if (drm_buddy_block_is_allocated(block))
+			continue;
+
+		if (contains(start, end, block_start, block_end) &&
+		    order == drm_buddy_block_order(block)) {
+			/*
+			 * Find the free block within the range.
+			 */
+			if (drm_buddy_block_is_free(block))
+				return block;
+
+			continue;
+		}
+
+		if (!drm_buddy_block_is_split(block)) {
+			err = split_block(mm, block);
+			if (unlikely(err))
+				goto err_undo;
+		}
+
+		list_add(&block->right->tmp_link, &dfs);
+		list_add(&block->left->tmp_link, &dfs);
+	} while (1);
+
+	return ERR_PTR(-ENOSPC);
+
+err_undo:
+	/*
+	 * We really don't want to leave around a bunch of split blocks, since
+	 * bigger is better, so make sure we merge everything back before we
+	 * free the allocated blocks.
+	 */
+	buddy = get_buddy(block);
+	if (buddy &&
+	    (drm_buddy_block_is_free(block) &&
+	     drm_buddy_block_is_free(buddy)))
+		__drm_buddy_free(mm, block);
+	return ERR_PTR(err);
+}
+
+static struct drm_buddy_block *
+alloc_from_freelist(struct drm_buddy *mm,
+		    unsigned int order,
+		    unsigned long flags)
 {
 	struct drm_buddy_block *block = NULL;
 	unsigned int i;
@@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
 	while (i != order) {
 		err = split_block(mm, block);
 		if (unlikely(err))
-			goto out_free;
+			goto err_undo;
 
-		/* Go low */
-		block = block->left;
+		block = block->right;
 		i--;
 	}
-
-	mark_allocated(block);
-	mm->avail -= drm_buddy_block_size(mm, block);
-	kmemleak_update_trace(block);
 	return block;
 
-out_free:
+err_undo:
 	if (i != order)
 		__drm_buddy_free(mm, block);
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL(drm_buddy_alloc_blocks);
-
-static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
-{
-	return s1 <= e2 && e1 >= s2;
-}
 
-static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
-{
-	return s1 <= s2 && e1 >= e2;
-}
-
-/**
- * drm_buddy_alloc_range - allocate range
- *
- * @mm: DRM buddy manager to allocate from
- * @blocks: output list head to add allocated blocks
- * @start: start of the allowed range for this block
- * @size: size of the allocation
- *
- * Intended for pre-allocating portions of the address space, for example to
- * reserve a block for the initial framebuffer or similar, hence the expectation
- * here is that drm_buddy_alloc_blocks() is still the main vehicle for
- * allocations, so if that's not the case then the drm_mm range allocator is
- * probably a much better fit, and so you should probably go use that instead.
- *
- * Note that it's safe to chain together multiple alloc_ranges
- * with the same blocks list
- *
- * Returns:
- * 0 on success, error code on failure.
- */
-int drm_buddy_alloc_range(struct drm_buddy *mm,
-			  struct list_head *blocks,
-			  u64 start, u64 size)
+static int __alloc_range(struct drm_buddy *mm,
+			 struct list_head *dfs,
+			 u64 start, u64 size,
+			 struct list_head *blocks)
 {
 	struct drm_buddy_block *block;
 	struct drm_buddy_block *buddy;
 	LIST_HEAD(allocated);
-	LIST_HEAD(dfs);
 	u64 end;
 	int err;
-	int i;
-
-	if (size < mm->chunk_size)
-		return -EINVAL;
-
-	if (!IS_ALIGNED(size | start, mm->chunk_size))
-		return -EINVAL;
-
-	if (range_overflows(start, size, mm->size))
-		return -EINVAL;
-
-	for (i = 0; i < mm->n_roots; ++i)
-		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
 
 	end = start + size - 1;
 
@@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
 		u64 block_start;
 		u64 block_end;
 
-		block = list_first_entry_or_null(&dfs,
+		block = list_first_entry_or_null(dfs,
 						 struct drm_buddy_block,
 						 tmp_link);
 		if (!block)
@@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
 				goto err_undo;
 		}
 
-		list_add(&block->right->tmp_link, &dfs);
-		list_add(&block->left->tmp_link, &dfs);
+		list_add(&block->right->tmp_link, dfs);
+		list_add(&block->left->tmp_link, dfs);
 	} while (1);
 
 	list_splice_tail(&allocated, blocks);
@@ -459,7 +484,143 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
 	drm_buddy_free_list(mm, &allocated);
 	return err;
 }
-EXPORT_SYMBOL(drm_buddy_alloc_range);
+
+/**
+ * __drm_buddy_alloc_range - actual range allocation
+ *
+ * @mm: DRM buddy manager to allocate from
+ * @start: start of the allowed range for this block
+ * @size: size of the allocation
+ * @blocks: output list head to add allocated blocks
+ *
+ * Intended for pre-allocating portions of the address space, for example to
+ * reserve a block for the initial framebuffer or similar
+ *
+ * Note that it's safe to chain together multiple alloc_ranges
+ * with the same blocks list
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+static int __drm_buddy_alloc_range(struct drm_buddy *mm,
+				   u64 start,
+				   u64 size,
+				   struct list_head *blocks)
+{
+	LIST_HEAD(dfs);
+	int i;
+
+	for (i = 0; i < mm->n_roots; ++i)
+		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+	return __alloc_range(mm, &dfs, start, size, blocks);
+}
+
+/**
+ * drm_buddy_alloc_blocks - allocate power-of-two blocks
+ *
+ * @mm: DRM buddy manager to allocate from
+ * @start: start of the allowed range for this block
+ * @end: end of the allowed range for this block
+ * @size: size of the allocation
+ * @min_page_size: alignment of the allocation
+ * @blocks: output list head to add allocated blocks
+ * @flags: DRM_BUDDY_*_ALLOCATION flags
+ *
+ * alloc_range_bias() called on range limitations, which traverses
+ * the tree and returns the desired block.
+ *
+ * alloc_from_freelist() called when *no* range restrictions
+ * are enforced, which picks the block from the freelist.
+ *
+ * blocks are allocated in order, the order value here translates to:
+ *
+ * 0 = 2^0 * mm->chunk_size
+ * 1 = 2^1 * mm->chunk_size
+ * 2 = 2^2 * mm->chunk_size
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_page_size,
+			   struct list_head *blocks,
+			   unsigned long flags)
+{
+	struct drm_buddy_block *block = NULL;
+	unsigned int min_order, order;
+	unsigned long pages;
+	LIST_HEAD(allocated);
+	int err;
+
+	if (size < mm->chunk_size)
+		return -EINVAL;
+
+	if (min_page_size < mm->chunk_size)
+		return -EINVAL;
+
+	if (!is_power_of_2(min_page_size))
+		return -EINVAL;
+
+	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
+		return -EINVAL;
+
+	if (end > mm->size)
+		return -EINVAL;
+
+	if (range_overflows(start, size, mm->size))
+		return -EINVAL;
+
+	/* Actual range allocation */
+	if (start + size == end)
+		return __drm_buddy_alloc_range(mm, start, size, blocks);
+
+	pages = size >> ilog2(mm->chunk_size);
+	order = fls(pages) - 1;
+	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+
+	do {
+		order = min(order, (unsigned int)fls(pages) - 1);
+		BUG_ON(order > mm->max_order);
+		BUG_ON(order < min_order);
+
+		do {
+			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
+				/* Allocate traversing within the range */
+				block = alloc_range_bias(mm, start, end, order);
+			else
+				/* Allocate from freelist */
+				block = alloc_from_freelist(mm, order, flags);
+
+			if (!IS_ERR(block))
+				break;
+
+			if (order-- == min_order) {
+				err = -ENOSPC;
+				goto err_free;
+			}
+		} while (1);
+
+		mark_allocated(block);
+		mm->avail -= drm_buddy_block_size(mm, block);
+		kmemleak_update_trace(block);
+		list_add_tail(&block->link, &allocated);
+
+		pages -= BIT(order);
+
+		if (!pages)
+			break;
+	} while (1);
+
+	list_splice_tail(&allocated, blocks);
+	return 0;
+
+err_free:
+	drm_buddy_free_list(mm, &allocated);
+	return err;
+}
+EXPORT_SYMBOL(drm_buddy_alloc_blocks);
 
 /**
  * drm_buddy_block_print - print block information
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 247714bab044..a328a38fab07 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 	struct i915_ttm_buddy_resource *bman_res;
 	struct drm_buddy *mm = &bman->mm;
-	unsigned long n_pages;
-	unsigned int min_order;
+	unsigned long n_pages, lpfn;
 	u64 min_page_size;
 	u64 size;
 	int err;
 
-	GEM_BUG_ON(place->fpfn || place->lpfn);
+	lpfn = place->lpfn;
+	if (!lpfn)
+		lpfn = man->size;
 
 	bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
 	if (!bman_res)
@@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	INIT_LIST_HEAD(&bman_res->blocks);
 	bman_res->mm = mm;
 
+	if (place->fpfn || lpfn != man->size)
+		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
 	GEM_BUG_ON(!bman_res->base.num_pages);
 	size = bman_res->base.num_pages << PAGE_SHIFT;
 
@@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 		min_page_size = bo->page_alignment << PAGE_SHIFT;
 
 	GEM_BUG_ON(min_page_size < mm->chunk_size);
-	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		unsigned long pages;
+
 		size = roundup_pow_of_two(size);
-		min_order = ilog2(size) - ilog2(mm->chunk_size);
+		min_page_size = size;
+
+		pages = size >> ilog2(mm->chunk_size);
+		if (pages > lpfn)
+			lpfn = pages;
 	}
 
 	if (size > mm->size) {
@@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 
 	n_pages = size >> ilog2(mm->chunk_size);
 
-	do {
-		struct drm_buddy_block *block;
-		unsigned int order;
-
-		order = fls(n_pages) - 1;
-		GEM_BUG_ON(order > mm->max_order);
-		GEM_BUG_ON(order < min_order);
-
-		do {
-			mutex_lock(&bman->lock);
-			block = drm_buddy_alloc_blocks(mm, order);
-			mutex_unlock(&bman->lock);
-			if (!IS_ERR(block))
-				break;
-
-			if (order-- == min_order) {
-				err = -ENOSPC;
-				goto err_free_blocks;
-			}
-		} while (1);
-
-		n_pages -= BIT(order);
-
-		list_add_tail(&block->link, &bman_res->blocks);
-
-		if (!n_pages)
-			break;
-	} while (1);
+	mutex_lock(&bman->lock);
+	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+				     (u64)lpfn << PAGE_SHIFT,
+				     (u64)n_pages << PAGE_SHIFT,
+				     min_page_size,
+				     &bman_res->blocks,
+				     bman_res->flags);
+	mutex_unlock(&bman->lock);
+	if (unlikely(err))
+		goto err_free_blocks;
 
 	*res = &bman_res->base;
 	return 0;
@@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
 {
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 	struct drm_buddy *mm = &bman->mm;
+	unsigned long flags = 0;
 	int ret;
 
+	flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
 	mutex_lock(&bman->lock);
-	ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
+	ret = drm_buddy_alloc_blocks(mm, start,
+				     start + size,
+				     size, mm->chunk_size,
+				     &bman->reserved,
+				     flags);
 	mutex_unlock(&bman->lock);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
index 312077941411..72c90b432e87 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
@@ -20,6 +20,7 @@ struct drm_buddy;
  *
  * @base: struct ttm_resource base class we extend
  * @blocks: the list of struct i915_buddy_block for this resource/allocation
+ * @flags: DRM_BUDDY_*_ALLOCATION flags
  * @mm: the struct i915_buddy_mm for this resource
  *
  * Extends the struct ttm_resource to manage an address space allocation with
@@ -28,6 +29,7 @@ struct drm_buddy;
 struct i915_ttm_buddy_resource {
 	struct ttm_resource base;
 	struct list_head blocks;
+	unsigned long flags;
 	struct drm_buddy *mm;
 };
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index f524db152413..54f25a372f27 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -22,6 +22,8 @@
 	start__ >= max__ || size__ > max__ - start__; \
 })
 
+#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
+
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
 #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
@@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
 
 void drm_buddy_fini(struct drm_buddy *mm);
 
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
-
-int drm_buddy_alloc_range(struct drm_buddy *mm,
-			  struct list_head *blocks,
-			  u64 start, u64 size);
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_page_size,
+			   struct list_head *blocks,
+			   unsigned long flags);
 
 void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
 

base-commit: 627e2885b51ab503a98aa89f0a0bd68416c731fc
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v11 2/5] drm: implement top-down allocation method
  2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
  (?)
@ 2022-01-27 14:11   ` Arunpravin
  -1 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

Implemented a function which walk through the order list,
compares the offset and returns the maximum offset block,
this method is unpredictable in obtaining the high range
address blocks which depends on allocation and deallocation.
for instance, if driver requests address at a low specific
range, allocator traverses from the root block and splits
the larger blocks until it reaches the specific block and
in the process of splitting, lower orders in the freelist
are occupied with low range address blocks and for the
subsequent TOPDOWN memory request we may return the low
range blocks.To overcome this issue, we may go with the
below approach.

The other approach, sorting each order list entries in
ascending order and compares the last entry of each
order list in the freelist and return the max block.
This creates sorting overhead on every drm_buddy_free()
request and split up of larger blocks for a single page
request.

v2:
  - Fix alignment issues(Matthew Auld)
  - Remove unnecessary list_empty check(Matthew Auld)
  - merged the below patch to see the feature in action
     - add top-down alloc support to i915 driver

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 36 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  3 ++
 include/drm/drm_buddy.h                       |  1 +
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index cfc160a1ef1a..30cad939a112 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -369,6 +369,26 @@ alloc_range_bias(struct drm_buddy *mm,
 	return ERR_PTR(err);
 }
 
+static struct drm_buddy_block *
+get_maxblock(struct list_head *head)
+{
+	struct drm_buddy_block *max_block = NULL, *node;
+
+	max_block = list_first_entry_or_null(head,
+					     struct drm_buddy_block,
+					     link);
+	if (!max_block)
+		return NULL;
+
+	list_for_each_entry(node, head, link) {
+		if (drm_buddy_block_offset(node) >
+		    drm_buddy_block_offset(max_block))
+			max_block = node;
+	}
+
+	return max_block;
+}
+
 static struct drm_buddy_block *
 alloc_from_freelist(struct drm_buddy *mm,
 		    unsigned int order,
@@ -379,11 +399,17 @@ alloc_from_freelist(struct drm_buddy *mm,
 	int err;
 
 	for (i = order; i <= mm->max_order; ++i) {
-		block = list_first_entry_or_null(&mm->free_list[i],
-						 struct drm_buddy_block,
-						 link);
-		if (block)
-			break;
+		if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+			block = get_maxblock(&mm->free_list[i]);
+			if (block)
+				break;
+		} else {
+			block = list_first_entry_or_null(&mm->free_list[i],
+							 struct drm_buddy_block,
+							 link);
+			if (block)
+				break;
+		}
 	}
 
 	if (!block)
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index a328a38fab07..7f553cceb6c0 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -53,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	INIT_LIST_HEAD(&bman_res->blocks);
 	bman_res->mm = mm;
 
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
 	if (place->fpfn || lpfn != man->size)
 		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 54f25a372f27..f0378fb48d06 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -23,6 +23,7 @@
 })
 
 #define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
+#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
 
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v11 2/5] drm: implement top-down allocation method
@ 2022-01-27 14:11   ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

Implemented a function which walk through the order list,
compares the offset and returns the maximum offset block,
this method is unpredictable in obtaining the high range
address blocks which depends on allocation and deallocation.
for instance, if driver requests address at a low specific
range, allocator traverses from the root block and splits
the larger blocks until it reaches the specific block and
in the process of splitting, lower orders in the freelist
are occupied with low range address blocks and for the
subsequent TOPDOWN memory request we may return the low
range blocks.To overcome this issue, we may go with the
below approach.

The other approach, sorting each order list entries in
ascending order and compares the last entry of each
order list in the freelist and return the max block.
This creates sorting overhead on every drm_buddy_free()
request and split up of larger blocks for a single page
request.

v2:
  - Fix alignment issues(Matthew Auld)
  - Remove unnecessary list_empty check(Matthew Auld)
  - merged the below patch to see the feature in action
     - add top-down alloc support to i915 driver

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 36 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  3 ++
 include/drm/drm_buddy.h                       |  1 +
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index cfc160a1ef1a..30cad939a112 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -369,6 +369,26 @@ alloc_range_bias(struct drm_buddy *mm,
 	return ERR_PTR(err);
 }
 
+static struct drm_buddy_block *
+get_maxblock(struct list_head *head)
+{
+	struct drm_buddy_block *max_block = NULL, *node;
+
+	max_block = list_first_entry_or_null(head,
+					     struct drm_buddy_block,
+					     link);
+	if (!max_block)
+		return NULL;
+
+	list_for_each_entry(node, head, link) {
+		if (drm_buddy_block_offset(node) >
+		    drm_buddy_block_offset(max_block))
+			max_block = node;
+	}
+
+	return max_block;
+}
+
 static struct drm_buddy_block *
 alloc_from_freelist(struct drm_buddy *mm,
 		    unsigned int order,
@@ -379,11 +399,17 @@ alloc_from_freelist(struct drm_buddy *mm,
 	int err;
 
 	for (i = order; i <= mm->max_order; ++i) {
-		block = list_first_entry_or_null(&mm->free_list[i],
-						 struct drm_buddy_block,
-						 link);
-		if (block)
-			break;
+		if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+			block = get_maxblock(&mm->free_list[i]);
+			if (block)
+				break;
+		} else {
+			block = list_first_entry_or_null(&mm->free_list[i],
+							 struct drm_buddy_block,
+							 link);
+			if (block)
+				break;
+		}
 	}
 
 	if (!block)
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index a328a38fab07..7f553cceb6c0 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -53,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	INIT_LIST_HEAD(&bman_res->blocks);
 	bman_res->mm = mm;
 
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
 	if (place->fpfn || lpfn != man->size)
 		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 54f25a372f27..f0378fb48d06 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -23,6 +23,7 @@
 })
 
 #define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
+#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
 
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v11 2/5] drm: implement top-down allocation method
@ 2022-01-27 14:11   ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: daniel, Arunpravin, jani.nikula, matthew.auld, tzimmermann,
	alexander.deucher, christian.koenig

Implemented a function which walk through the order list,
compares the offset and returns the maximum offset block,
this method is unpredictable in obtaining the high range
address blocks which depends on allocation and deallocation.
for instance, if driver requests address at a low specific
range, allocator traverses from the root block and splits
the larger blocks until it reaches the specific block and
in the process of splitting, lower orders in the freelist
are occupied with low range address blocks and for the
subsequent TOPDOWN memory request we may return the low
range blocks.To overcome this issue, we may go with the
below approach.

The other approach, sorting each order list entries in
ascending order and compares the last entry of each
order list in the freelist and return the max block.
This creates sorting overhead on every drm_buddy_free()
request and split up of larger blocks for a single page
request.

v2:
  - Fix alignment issues(Matthew Auld)
  - Remove unnecessary list_empty check(Matthew Auld)
  - merged the below patch to see the feature in action
     - add top-down alloc support to i915 driver

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 36 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  3 ++
 include/drm/drm_buddy.h                       |  1 +
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index cfc160a1ef1a..30cad939a112 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -369,6 +369,26 @@ alloc_range_bias(struct drm_buddy *mm,
 	return ERR_PTR(err);
 }
 
+static struct drm_buddy_block *
+get_maxblock(struct list_head *head)
+{
+	struct drm_buddy_block *max_block = NULL, *node;
+
+	max_block = list_first_entry_or_null(head,
+					     struct drm_buddy_block,
+					     link);
+	if (!max_block)
+		return NULL;
+
+	list_for_each_entry(node, head, link) {
+		if (drm_buddy_block_offset(node) >
+		    drm_buddy_block_offset(max_block))
+			max_block = node;
+	}
+
+	return max_block;
+}
+
 static struct drm_buddy_block *
 alloc_from_freelist(struct drm_buddy *mm,
 		    unsigned int order,
@@ -379,11 +399,17 @@ alloc_from_freelist(struct drm_buddy *mm,
 	int err;
 
 	for (i = order; i <= mm->max_order; ++i) {
-		block = list_first_entry_or_null(&mm->free_list[i],
-						 struct drm_buddy_block,
-						 link);
-		if (block)
-			break;
+		if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+			block = get_maxblock(&mm->free_list[i]);
+			if (block)
+				break;
+		} else {
+			block = list_first_entry_or_null(&mm->free_list[i],
+							 struct drm_buddy_block,
+							 link);
+			if (block)
+				break;
+		}
 	}
 
 	if (!block)
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index a328a38fab07..7f553cceb6c0 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -53,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	INIT_LIST_HEAD(&bman_res->blocks);
 	bman_res->mm = mm;
 
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
 	if (place->fpfn || lpfn != man->size)
 		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 54f25a372f27..f0378fb48d06 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -23,6 +23,7 @@
 })
 
 #define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
+#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
 
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v11 3/5] drm: implement a method to free unused pages
  2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
  (?)
@ 2022-01-27 14:11   ` Arunpravin
  -1 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

On contiguous allocation, we round up the size
to the *next* power of 2, implement a function
to free the unused pages after the newly allocate block.

v2(Matthew Auld):
  - replace function name 'drm_buddy_free_unused_pages' with
    drm_buddy_block_trim
  - replace input argument name 'actual_size' with 'new_size'
  - add more validation checks for input arguments
  - add overlaps check to avoid needless searching and splitting
  - merged the below patch to see the feature in action
     - add free unused pages support to i915 driver
  - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
    are all globally visible

v3(Matthew Auld):
  - remove trim method error handling as we address the failure case
    at drm_buddy_block_trim() function

v4:
  - in case of trim, at __alloc_range() split_block failure path
    marks the block as free and removes it from the original list,
    potentially also freeing it, to overcome this problem, we turn
    the drm_buddy_block_trim() input node into a temporary node to
    prevent recursively freeing itself, but still retain the
    un-splitting/freeing of the other nodes(Matthew Auld)

  - modify the drm_buddy_block_trim() function return type

v5(Matthew Auld):
  - revert drm_buddy_block_trim() function return type changes in v4
  - modify drm_buddy_block_trim() passing argument n_pages to original_size
    as n_pages has already been rounded up to the next power-of-two and
    passing n_pages results noop

v6:
  - fix warnings reported by kernel test robot <lkp@intel.com>

v7:
  - modify drm_buddy_block_trim() function doc description
  - at drm_buddy_block_trim() handle non-allocated block as
    a serious programmer error
  - fix a typo

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 69 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 10 +++
 include/drm/drm_buddy.h                       |  4 ++
 3 files changed, 83 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 30cad939a112..4845ef784b5e 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -542,6 +542,75 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
 	return __alloc_range(mm, &dfs, start, size, blocks);
 }
 
+/**
+ * drm_buddy_block_trim - free unused pages
+ *
+ * @mm: DRM buddy manager
+ * @new_size: original size requested
+ * @blocks: Input and output list of allocated blocks.
+ * MUST contain single block as input to be trimmed.
+ * On success will contain the newly allocated blocks
+ * making up the @new_size. Blocks always appear in
+ * ascending order
+ *
+ * For contiguous allocation, we round up the size to the nearest
+ * power of two value, drivers consume *actual* size, so remaining
+ * portions are unused and can be optionally freed with this function
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 new_size,
+			 struct list_head *blocks)
+{
+	struct drm_buddy_block *parent;
+	struct drm_buddy_block *block;
+	LIST_HEAD(dfs);
+	u64 new_start;
+	int err;
+
+	if (!list_is_singular(blocks))
+		return -EINVAL;
+
+	block = list_first_entry(blocks,
+				 struct drm_buddy_block,
+				 link);
+
+	if (WARN_ON(!drm_buddy_block_is_allocated(block)))
+		return -EINVAL;
+
+	if (new_size > drm_buddy_block_size(mm, block))
+		return -EINVAL;
+
+	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
+		return -EINVAL;
+
+	if (new_size == drm_buddy_block_size(mm, block))
+		return 0;
+
+	list_del(&block->link);
+	mark_free(mm, block);
+	mm->avail += drm_buddy_block_size(mm, block);
+
+	/* Prevent recursively freeing this node */
+	parent = block->parent;
+	block->parent = NULL;
+
+	new_start = drm_buddy_block_offset(block);
+	list_add(&block->tmp_link, &dfs);
+	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks);
+	if (err) {
+		mark_allocated(block);
+		mm->avail -= drm_buddy_block_size(mm, block);
+		list_add(&block->link, blocks);
+	}
+
+	block->parent = parent;
+	return err;
+}
+EXPORT_SYMBOL(drm_buddy_block_trim);
+
 /**
  * drm_buddy_alloc_blocks - allocate power-of-two blocks
  *
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 7f553cceb6c0..76d5211c25eb 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -97,6 +97,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	if (unlikely(err))
 		goto err_free_blocks;
 
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		u64 original_size = (u64)bman_res->base.num_pages << PAGE_SHIFT;
+
+		mutex_lock(&bman->lock);
+		drm_buddy_block_trim(mm,
+				     original_size,
+				     &bman_res->blocks);
+		mutex_unlock(&bman->lock);
+	}
+
 	*res = &bman_res->base;
 	return 0;
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index f0378fb48d06..bd21f9dfd15e 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -140,6 +140,10 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			   struct list_head *blocks,
 			   unsigned long flags);
 
+int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 new_size,
+			 struct list_head *blocks);
+
 void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
 
 void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v11 3/5] drm: implement a method to free unused pages
@ 2022-01-27 14:11   ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

On contiguous allocation, we round up the size
to the *next* power of 2, implement a function
to free the unused pages after the newly allocate block.

v2(Matthew Auld):
  - replace function name 'drm_buddy_free_unused_pages' with
    drm_buddy_block_trim
  - replace input argument name 'actual_size' with 'new_size'
  - add more validation checks for input arguments
  - add overlaps check to avoid needless searching and splitting
  - merged the below patch to see the feature in action
     - add free unused pages support to i915 driver
  - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
    are all globally visible

v3(Matthew Auld):
  - remove trim method error handling as we address the failure case
    at drm_buddy_block_trim() function

v4:
  - in case of trim, at __alloc_range() split_block failure path
    marks the block as free and removes it from the original list,
    potentially also freeing it, to overcome this problem, we turn
    the drm_buddy_block_trim() input node into a temporary node to
    prevent recursively freeing itself, but still retain the
    un-splitting/freeing of the other nodes(Matthew Auld)

  - modify the drm_buddy_block_trim() function return type

v5(Matthew Auld):
  - revert drm_buddy_block_trim() function return type changes in v4
  - modify drm_buddy_block_trim() passing argument n_pages to original_size
    as n_pages has already been rounded up to the next power-of-two and
    passing n_pages results noop

v6:
  - fix warnings reported by kernel test robot <lkp@intel.com>

v7:
  - modify drm_buddy_block_trim() function doc description
  - at drm_buddy_block_trim() handle non-allocated block as
    a serious programmer error
  - fix a typo

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 69 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 10 +++
 include/drm/drm_buddy.h                       |  4 ++
 3 files changed, 83 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 30cad939a112..4845ef784b5e 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -542,6 +542,75 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
 	return __alloc_range(mm, &dfs, start, size, blocks);
 }
 
+/**
+ * drm_buddy_block_trim - free unused pages
+ *
+ * @mm: DRM buddy manager
+ * @new_size: original size requested
+ * @blocks: Input and output list of allocated blocks.
+ * MUST contain single block as input to be trimmed.
+ * On success will contain the newly allocated blocks
+ * making up the @new_size. Blocks always appear in
+ * ascending order
+ *
+ * For contiguous allocation, we round up the size to the nearest
+ * power of two value, drivers consume *actual* size, so remaining
+ * portions are unused and can be optionally freed with this function
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 new_size,
+			 struct list_head *blocks)
+{
+	struct drm_buddy_block *parent;
+	struct drm_buddy_block *block;
+	LIST_HEAD(dfs);
+	u64 new_start;
+	int err;
+
+	if (!list_is_singular(blocks))
+		return -EINVAL;
+
+	block = list_first_entry(blocks,
+				 struct drm_buddy_block,
+				 link);
+
+	if (WARN_ON(!drm_buddy_block_is_allocated(block)))
+		return -EINVAL;
+
+	if (new_size > drm_buddy_block_size(mm, block))
+		return -EINVAL;
+
+	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
+		return -EINVAL;
+
+	if (new_size == drm_buddy_block_size(mm, block))
+		return 0;
+
+	list_del(&block->link);
+	mark_free(mm, block);
+	mm->avail += drm_buddy_block_size(mm, block);
+
+	/* Prevent recursively freeing this node */
+	parent = block->parent;
+	block->parent = NULL;
+
+	new_start = drm_buddy_block_offset(block);
+	list_add(&block->tmp_link, &dfs);
+	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks);
+	if (err) {
+		mark_allocated(block);
+		mm->avail -= drm_buddy_block_size(mm, block);
+		list_add(&block->link, blocks);
+	}
+
+	block->parent = parent;
+	return err;
+}
+EXPORT_SYMBOL(drm_buddy_block_trim);
+
 /**
  * drm_buddy_alloc_blocks - allocate power-of-two blocks
  *
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 7f553cceb6c0..76d5211c25eb 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -97,6 +97,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	if (unlikely(err))
 		goto err_free_blocks;
 
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		u64 original_size = (u64)bman_res->base.num_pages << PAGE_SHIFT;
+
+		mutex_lock(&bman->lock);
+		drm_buddy_block_trim(mm,
+				     original_size,
+				     &bman_res->blocks);
+		mutex_unlock(&bman->lock);
+	}
+
 	*res = &bman_res->base;
 	return 0;
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index f0378fb48d06..bd21f9dfd15e 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -140,6 +140,10 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			   struct list_head *blocks,
 			   unsigned long flags);
 
+int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 new_size,
+			 struct list_head *blocks);
+
 void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
 
 void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v11 3/5] drm: implement a method to free unused pages
@ 2022-01-27 14:11   ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: daniel, Arunpravin, jani.nikula, matthew.auld, tzimmermann,
	alexander.deucher, christian.koenig

On contiguous allocation, we round up the size
to the *next* power of 2, implement a function
to free the unused pages after the newly allocate block.

v2(Matthew Auld):
  - replace function name 'drm_buddy_free_unused_pages' with
    drm_buddy_block_trim
  - replace input argument name 'actual_size' with 'new_size'
  - add more validation checks for input arguments
  - add overlaps check to avoid needless searching and splitting
  - merged the below patch to see the feature in action
     - add free unused pages support to i915 driver
  - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
    are all globally visible

v3(Matthew Auld):
  - remove trim method error handling as we address the failure case
    at drm_buddy_block_trim() function

v4:
  - in case of trim, at __alloc_range() split_block failure path
    marks the block as free and removes it from the original list,
    potentially also freeing it, to overcome this problem, we turn
    the drm_buddy_block_trim() input node into a temporary node to
    prevent recursively freeing itself, but still retain the
    un-splitting/freeing of the other nodes(Matthew Auld)

  - modify the drm_buddy_block_trim() function return type

v5(Matthew Auld):
  - revert drm_buddy_block_trim() function return type changes in v4
  - modify drm_buddy_block_trim() passing argument n_pages to original_size
    as n_pages has already been rounded up to the next power-of-two and
    passing n_pages results noop

v6:
  - fix warnings reported by kernel test robot <lkp@intel.com>

v7:
  - modify drm_buddy_block_trim() function doc description
  - at drm_buddy_block_trim() handle non-allocated block as
    a serious programmer error
  - fix a typo

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/drm_buddy.c                   | 69 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 10 +++
 include/drm/drm_buddy.h                       |  4 ++
 3 files changed, 83 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 30cad939a112..4845ef784b5e 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -542,6 +542,75 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
 	return __alloc_range(mm, &dfs, start, size, blocks);
 }
 
+/**
+ * drm_buddy_block_trim - free unused pages
+ *
+ * @mm: DRM buddy manager
+ * @new_size: original size requested
+ * @blocks: Input and output list of allocated blocks.
+ * MUST contain single block as input to be trimmed.
+ * On success will contain the newly allocated blocks
+ * making up the @new_size. Blocks always appear in
+ * ascending order
+ *
+ * For contiguous allocation, we round up the size to the nearest
+ * power of two value, drivers consume *actual* size, so remaining
+ * portions are unused and can be optionally freed with this function
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 new_size,
+			 struct list_head *blocks)
+{
+	struct drm_buddy_block *parent;
+	struct drm_buddy_block *block;
+	LIST_HEAD(dfs);
+	u64 new_start;
+	int err;
+
+	if (!list_is_singular(blocks))
+		return -EINVAL;
+
+	block = list_first_entry(blocks,
+				 struct drm_buddy_block,
+				 link);
+
+	if (WARN_ON(!drm_buddy_block_is_allocated(block)))
+		return -EINVAL;
+
+	if (new_size > drm_buddy_block_size(mm, block))
+		return -EINVAL;
+
+	if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
+		return -EINVAL;
+
+	if (new_size == drm_buddy_block_size(mm, block))
+		return 0;
+
+	list_del(&block->link);
+	mark_free(mm, block);
+	mm->avail += drm_buddy_block_size(mm, block);
+
+	/* Prevent recursively freeing this node */
+	parent = block->parent;
+	block->parent = NULL;
+
+	new_start = drm_buddy_block_offset(block);
+	list_add(&block->tmp_link, &dfs);
+	err =  __alloc_range(mm, &dfs, new_start, new_size, blocks);
+	if (err) {
+		mark_allocated(block);
+		mm->avail -= drm_buddy_block_size(mm, block);
+		list_add(&block->link, blocks);
+	}
+
+	block->parent = parent;
+	return err;
+}
+EXPORT_SYMBOL(drm_buddy_block_trim);
+
 /**
  * drm_buddy_alloc_blocks - allocate power-of-two blocks
  *
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 7f553cceb6c0..76d5211c25eb 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -97,6 +97,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	if (unlikely(err))
 		goto err_free_blocks;
 
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		u64 original_size = (u64)bman_res->base.num_pages << PAGE_SHIFT;
+
+		mutex_lock(&bman->lock);
+		drm_buddy_block_trim(mm,
+				     original_size,
+				     &bman_res->blocks);
+		mutex_unlock(&bman->lock);
+	}
+
 	*res = &bman_res->base;
 	return 0;
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index f0378fb48d06..bd21f9dfd15e 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -140,6 +140,10 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 			   struct list_head *blocks,
 			   unsigned long flags);
 
+int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 new_size,
+			 struct list_head *blocks);
+
 void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
 
 void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v11 4/5] drm/amdgpu: move vram inline functions into a header
  2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
  (?)
@ 2022-01-27 14:11   ` Arunpravin
  -1 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

Move shared vram inline functions and structs
into a header file

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 51 ++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
new file mode 100644
index 000000000000..59983464cce5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VRAM_MGR_H__
+#define __AMDGPU_VRAM_MGR_H__
+
+#include <drm/drm_buddy.h>
+
+struct amdgpu_vram_mgr_node {
+	struct ttm_resource base;
+	struct list_head blocks;
+	unsigned long flags;
+};
+
+static inline u64 amdgpu_node_start(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_offset(block);
+}
+
+static inline u64 amdgpu_node_size(struct drm_buddy_block *block)
+{
+	return PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline struct amdgpu_vram_mgr_node *
+to_amdgpu_vram_mgr_node(struct ttm_resource *res)
+{
+	return container_of(res, struct amdgpu_vram_mgr_node, base);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v11 4/5] drm/amdgpu: move vram inline functions into a header
@ 2022-01-27 14:11   ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

Move shared vram inline functions and structs
into a header file

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 51 ++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
new file mode 100644
index 000000000000..59983464cce5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VRAM_MGR_H__
+#define __AMDGPU_VRAM_MGR_H__
+
+#include <drm/drm_buddy.h>
+
+struct amdgpu_vram_mgr_node {
+	struct ttm_resource base;
+	struct list_head blocks;
+	unsigned long flags;
+};
+
+static inline u64 amdgpu_node_start(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_offset(block);
+}
+
+static inline u64 amdgpu_node_size(struct drm_buddy_block *block)
+{
+	return PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline struct amdgpu_vram_mgr_node *
+to_amdgpu_vram_mgr_node(struct ttm_resource *res)
+{
+	return container_of(res, struct amdgpu_vram_mgr_node, base);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v11 4/5] drm/amdgpu: move vram inline functions into a header
@ 2022-01-27 14:11   ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: daniel, Arunpravin, jani.nikula, matthew.auld, tzimmermann,
	alexander.deucher, christian.koenig

Move shared vram inline functions and structs
into a header file

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 51 ++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
new file mode 100644
index 000000000000..59983464cce5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VRAM_MGR_H__
+#define __AMDGPU_VRAM_MGR_H__
+
+#include <drm/drm_buddy.h>
+
+struct amdgpu_vram_mgr_node {
+	struct ttm_resource base;
+	struct list_head blocks;
+	unsigned long flags;
+};
+
+static inline u64 amdgpu_node_start(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_offset(block);
+}
+
+static inline u64 amdgpu_node_size(struct drm_buddy_block *block)
+{
+	return PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline struct amdgpu_vram_mgr_node *
+to_amdgpu_vram_mgr_node(struct ttm_resource *res)
+{
+	return container_of(res, struct amdgpu_vram_mgr_node, base);
+}
+
+#endif
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
  2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
  (?)
@ 2022-01-27 14:11   ` Arunpravin
  -1 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

- Remove drm_mm references and replace with drm buddy functionalities
- Add res cursor support for drm buddy

v2(Matthew Auld):
  - replace spinlock with mutex as we call kmem_cache_zalloc
    (..., GFP_KERNEL) in drm_buddy_alloc() function

  - lock drm_buddy_block_trim() function as it calls
    mark_free/mark_split are all globally visible

v3(Matthew Auld):
  - remove trim method error handling as we address the failure case
    at drm_buddy_block_trim() function

v4:
  - fix warnings reported by kernel test robot <lkp@intel.com>

v5:
  - fix merge conflict issue

v6:
  - fix warnings reported by kernel test robot <lkp@intel.com>

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/Kconfig                       |   1 +
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
 4 files changed, 231 insertions(+), 133 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index dfdd3ec5f793..eb5a57ae3c5c 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -279,6 +279,7 @@ config DRM_AMDGPU
 	select HWMON
 	select BACKLIGHT_CLASS_DEVICE
 	select INTERVAL_TREE
+	select DRM_BUDDY
 	help
 	  Choose this option if you have a recent AMD Radeon graphics card.
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index acfa207cf970..da12b4ff2e45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -30,12 +30,15 @@
 #include <drm/ttm/ttm_resource.h>
 #include <drm/ttm/ttm_range_manager.h>
 
+#include "amdgpu_vram_mgr.h"
+
 /* state back for walking over vram_mgr and gtt_mgr allocations */
 struct amdgpu_res_cursor {
 	uint64_t		start;
 	uint64_t		size;
 	uint64_t		remaining;
-	struct drm_mm_node	*node;
+	void			*node;
+	uint32_t		mem_type;
 };
 
 /**
@@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
 				    uint64_t start, uint64_t size,
 				    struct amdgpu_res_cursor *cur)
 {
+	struct drm_buddy_block *block;
+	struct list_head *head, *next;
 	struct drm_mm_node *node;
 
-	if (!res || res->mem_type == TTM_PL_SYSTEM) {
-		cur->start = start;
-		cur->size = size;
-		cur->remaining = size;
-		cur->node = NULL;
-		WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
-		return;
-	}
+	if (!res)
+		goto err_out;
 
 	BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
 
-	node = to_ttm_range_mgr_node(res)->mm_nodes;
-	while (start >= node->size << PAGE_SHIFT)
-		start -= node++->size << PAGE_SHIFT;
+	cur->mem_type = res->mem_type;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		head = &to_amdgpu_vram_mgr_node(res)->blocks;
+
+		block = list_first_entry_or_null(head,
+						 struct drm_buddy_block,
+						 link);
+		if (!block)
+			goto err_out;
+
+		while (start >= amdgpu_node_size(block)) {
+			start -= amdgpu_node_size(block);
+
+			next = block->link.next;
+			if (next != head)
+				block = list_entry(next, struct drm_buddy_block, link);
+		}
+
+		cur->start = amdgpu_node_start(block) + start;
+		cur->size = min(amdgpu_node_size(block) - start, size);
+		cur->remaining = size;
+		cur->node = block;
+		break;
+	case TTM_PL_TT:
+		node = to_ttm_range_mgr_node(res)->mm_nodes;
+		while (start >= node->size << PAGE_SHIFT)
+			start -= node++->size << PAGE_SHIFT;
+
+		cur->start = (node->start << PAGE_SHIFT) + start;
+		cur->size = min((node->size << PAGE_SHIFT) - start, size);
+		cur->remaining = size;
+		cur->node = node;
+		break;
+	default:
+		goto err_out;
+	}
 
-	cur->start = (node->start << PAGE_SHIFT) + start;
-	cur->size = min((node->size << PAGE_SHIFT) - start, size);
+	return;
+
+err_out:
+	cur->start = start;
+	cur->size = size;
 	cur->remaining = size;
-	cur->node = node;
+	cur->node = NULL;
+	WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
+	return;
 }
 
 /**
@@ -85,7 +124,9 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
  */
 static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 {
-	struct drm_mm_node *node = cur->node;
+	struct drm_buddy_block *block;
+	struct drm_mm_node *node;
+	struct list_head *next;
 
 	BUG_ON(size > cur->remaining);
 
@@ -99,9 +140,27 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 		return;
 	}
 
-	cur->node = ++node;
-	cur->start = node->start << PAGE_SHIFT;
-	cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		next = block->link.next;
+		block = list_entry(next, struct drm_buddy_block, link);
+
+		cur->node = block;
+		cur->start = amdgpu_node_start(block);
+		cur->size = min(amdgpu_node_size(block), cur->remaining);
+		break;
+	case TTM_PL_TT:
+		node = cur->node;
+
+		cur->node = ++node;
+		cur->start = node->start << PAGE_SHIFT;
+		cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+		break;
+	default:
+		return;
+	}
 }
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index f8f48be16d80..1a5cbd5ef738 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,6 +26,7 @@
 
 #include <linux/dma-direction.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_buddy.h>
 #include "amdgpu.h"
 
 #define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
@@ -40,12 +41,14 @@
 
 struct amdgpu_vram_mgr {
 	struct ttm_resource_manager manager;
-	struct drm_mm mm;
-	spinlock_t lock;
+	struct drm_buddy mm;
+	/* protects access to buffer objects */
+	struct mutex lock;
 	struct list_head reservations_pending;
 	struct list_head reserved_pages;
 	atomic64_t usage;
 	atomic64_t vis_usage;
+	u64 default_page_size;
 };
 
 struct amdgpu_gtt_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 7442095f089c..64ce442c9119 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -32,8 +32,11 @@
 #include "atom.h"
 
 struct amdgpu_vram_reservation {
+	u64 start;
+	u64 size;
+	unsigned long flags;
+	struct list_head block;
 	struct list_head node;
-	struct drm_mm_node mm_node;
 };
 
 static inline struct amdgpu_vram_mgr *
@@ -194,10 +197,10 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = {
  * Calculate how many bytes of the MM node are inside visible VRAM
  */
 static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
-				    struct drm_mm_node *node)
+				    struct drm_buddy_block *block)
 {
-	uint64_t start = node->start << PAGE_SHIFT;
-	uint64_t end = (node->size + node->start) << PAGE_SHIFT;
+	u64 start = amdgpu_node_start(block);
+	u64 end = start + amdgpu_node_size(block);
 
 	if (start >= adev->gmc.visible_vram_size)
 		return 0;
@@ -218,9 +221,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct ttm_resource *res = bo->tbo.resource;
-	unsigned pages = res->num_pages;
-	struct drm_mm_node *mm;
-	u64 usage;
+	struct amdgpu_vram_mgr_node *node = to_amdgpu_vram_mgr_node(res);
+	struct drm_buddy_block *block;
+	u64 usage = 0;
 
 	if (amdgpu_gmc_vram_full_visible(&adev->gmc))
 		return amdgpu_bo_size(bo);
@@ -228,9 +231,8 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 	if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		return 0;
 
-	mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0];
-	for (usage = 0; pages; pages -= mm->size, mm++)
-		usage += amdgpu_vram_mgr_vis_size(adev, mm);
+	list_for_each_entry(block, &node->blocks, link)
+		usage += amdgpu_vram_mgr_vis_size(adev, block);
 
 	return usage;
 }
@@ -240,21 +242,29 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	struct drm_mm *mm = &mgr->mm;
+	struct drm_buddy *mm = &mgr->mm;
 	struct amdgpu_vram_reservation *rsv, *temp;
+	struct drm_buddy_block *block;
 	uint64_t vis_usage;
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
-		if (drm_mm_reserve_node(mm, &rsv->mm_node))
+		if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+					   rsv->size, mm->chunk_size, &rsv->block,
+					   rsv->flags))
 			continue;
 
-		dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
-			rsv->mm_node.start, rsv->mm_node.size);
-
-		vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
-		atomic64_add(vis_usage, &mgr->vis_usage);
-		atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage);
-		list_move(&rsv->node, &mgr->reserved_pages);
+		block = list_first_entry_or_null(&rsv->block,
+						 struct drm_buddy_block,
+						 link);
+		if (block) {
+			dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
+				rsv->start, rsv->size);
+
+			vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
+			atomic64_add(vis_usage, &mgr->vis_usage);
+			atomic64_add(rsv->size, &mgr->usage);
+			list_move(&rsv->node, &mgr->reserved_pages);
+		}
 	}
 }
 
@@ -277,13 +287,16 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&rsv->node);
-	rsv->mm_node.start = start >> PAGE_SHIFT;
-	rsv->mm_node.size = size >> PAGE_SHIFT;
+	INIT_LIST_HEAD(&rsv->block);
+
+	rsv->start = start;
+	rsv->size = size;
+	rsv->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
-	spin_lock(&mgr->lock);
-	list_add_tail(&mgr->reservations_pending, &rsv->node);
+	mutex_lock(&mgr->lock);
+	list_add_tail(&rsv->node, &mgr->reservations_pending);
 	amdgpu_vram_mgr_do_reserve(&mgr->manager);
-	spin_unlock(&mgr->lock);
+	mutex_unlock(&mgr->lock);
 
 	return 0;
 }
@@ -305,19 +318,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
 	struct amdgpu_vram_reservation *rsv;
 	int ret;
 
-	spin_lock(&mgr->lock);
+	mutex_lock(&mgr->lock);
 
 	list_for_each_entry(rsv, &mgr->reservations_pending, node) {
-		if ((rsv->mm_node.start <= start) &&
-		    (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
 			ret = -EBUSY;
 			goto out;
 		}
 	}
 
 	list_for_each_entry(rsv, &mgr->reserved_pages, node) {
-		if ((rsv->mm_node.start <= start) &&
-		    (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
 			ret = 0;
 			goto out;
 		}
@@ -325,32 +338,10 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
 
 	ret = -ENOENT;
 out:
-	spin_unlock(&mgr->lock);
+	mutex_unlock(&mgr->lock);
 	return ret;
 }
 
-/**
- * amdgpu_vram_mgr_virt_start - update virtual start address
- *
- * @mem: ttm_resource to update
- * @node: just allocated node
- *
- * Calculate a virtual BO start address to easily check if everything is CPU
- * accessible.
- */
-static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
-				       struct drm_mm_node *node)
-{
-	unsigned long start;
-
-	start = node->start + node->size;
-	if (start > mem->num_pages)
-		start -= mem->num_pages;
-	else
-		start = 0;
-	mem->start = max(mem->start, start);
-}
-
 /**
  * amdgpu_vram_mgr_new - allocate new ranges
  *
@@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_resource **res)
 {
-	unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
+	unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
+	u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	uint64_t vis_usage = 0, mem_bytes, max_bytes;
-	struct ttm_range_mgr_node *node;
-	struct drm_mm *mm = &mgr->mm;
-	enum drm_mm_insert_mode mode;
+	struct amdgpu_vram_mgr_node *node;
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
 	unsigned i;
 	int r;
 
@@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 		goto error_sub;
 	}
 
-	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
 		pages_per_node = ~0ul;
-		num_nodes = 1;
-	} else {
+	else {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		pages_per_node = HPAGE_PMD_NR;
 #else
@@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 #endif
 		pages_per_node = max_t(uint32_t, pages_per_node,
 				       tbo->page_alignment);
-		num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
 	}
 
-	node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
-			GFP_KERNEL | __GFP_ZERO);
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
 	if (!node) {
 		r = -ENOMEM;
 		goto error_sub;
@@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 
 	ttm_resource_init(tbo, place, &node->base);
 
-	mode = DRM_MM_INSERT_BEST;
+	INIT_LIST_HEAD(&node->blocks);
+
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
-		mode = DRM_MM_INSERT_HIGH;
+		node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+	if (place->fpfn || lpfn != man->size)
+		/* Allocate blocks in desired range */
+		node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+	min_page_size = mgr->default_page_size;
+	BUG_ON(min_page_size < mm->chunk_size);
 
 	pages_left = node->base.num_pages;
 
@@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
 
 	i = 0;
-	spin_lock(&mgr->lock);
 	while (pages_left) {
-		uint32_t alignment = tbo->page_alignment;
-
 		if (pages >= pages_per_node)
-			alignment = pages_per_node;
-
-		r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
-						alignment, 0, place->fpfn,
-						lpfn, mode);
-		if (unlikely(r)) {
-			if (pages > pages_per_node) {
-				if (is_power_of_2(pages))
-					pages = pages / 2;
-				else
-					pages = rounddown_pow_of_two(pages);
-				continue;
-			}
-			goto error_free;
+			pages = pages_per_node;
+
+		n_pages = pages;
+
+		if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+			n_pages = roundup_pow_of_two(n_pages);
+			min_page_size = (u64)n_pages << PAGE_SHIFT;
+
+			if (n_pages > lpfn)
+				lpfn = n_pages;
 		}
 
-		vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
-		amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
+		mutex_lock(&mgr->lock);
+		r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+					   (u64)lpfn << PAGE_SHIFT,
+					   (u64)n_pages << PAGE_SHIFT,
+					   min_page_size,
+					   &node->blocks,
+					   node->flags);
+		mutex_unlock(&mgr->lock);
+		if (unlikely(r))
+			goto error_free_blocks;
+
 		pages_left -= pages;
 		++i;
 
 		if (pages > pages_left)
 			pages = pages_left;
 	}
-	spin_unlock(&mgr->lock);
+
+	/* Free unused pages for contiguous allocation */
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
+
+		mutex_lock(&mgr->lock);
+		drm_buddy_block_trim(mm,
+				     actual_size,
+				     &node->blocks);
+		mutex_unlock(&mgr->lock);
+	}
+
+	list_for_each_entry(block, &node->blocks, link)
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+	block = list_first_entry_or_null(&node->blocks,
+					 struct drm_buddy_block,
+					 link);
+	if (!block) {
+		r = -ENOENT;
+		goto error_free_res;
+	}
+
+	node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
 
 	if (i == 1)
 		node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
@@ -468,13 +489,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	*res = &node->base;
 	return 0;
 
-error_free:
-	while (i--)
-		drm_mm_remove_node(&node->mm_nodes[i]);
-	spin_unlock(&mgr->lock);
+error_free_blocks:
+	mutex_lock(&mgr->lock);
+	drm_buddy_free_list(mm, &node->blocks);
+	mutex_unlock(&mgr->lock);
+error_free_res:
 	ttm_resource_fini(man, &node->base);
-	kvfree(node);
-
+	kfree(node);
 error_sub:
 	atomic64_sub(mem_bytes, &mgr->usage);
 	return r;
@@ -491,29 +512,29 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 				struct ttm_resource *res)
 {
-	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+	struct amdgpu_vram_mgr_node *node = to_amdgpu_vram_mgr_node(res);
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
 	uint64_t usage = 0, vis_usage = 0;
-	unsigned i, pages;
 
-	spin_lock(&mgr->lock);
-	for (i = 0, pages = res->num_pages; pages;
-	     pages -= node->mm_nodes[i].size, ++i) {
-		struct drm_mm_node *mm = &node->mm_nodes[i];
-
-		drm_mm_remove_node(mm);
-		usage += mm->size << PAGE_SHIFT;
-		vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
+	mutex_lock(&mgr->lock);
+	list_for_each_entry(block, &node->blocks, link) {
+		usage += amdgpu_node_size(block);
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
 	}
+
 	amdgpu_vram_mgr_do_reserve(man);
-	spin_unlock(&mgr->lock);
+
+	drm_buddy_free_list(mm, &node->blocks);
+	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(usage, &mgr->usage);
 	atomic64_sub(vis_usage, &mgr->vis_usage);
 
 	ttm_resource_fini(man, res);
-	kvfree(node);
+	kfree(node);
 }
 
 /**
@@ -663,10 +684,19 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
 				  struct drm_printer *printer)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	mutex_lock(&mgr->lock);
+	drm_printf(printer, "default_page_size: %lluKiB\n",
+		   mgr->default_page_size >> 10);
 
-	spin_lock(&mgr->lock);
-	drm_mm_print(&mgr->mm, printer);
-	spin_unlock(&mgr->lock);
+	drm_buddy_print(mm, printer);
+
+	drm_printf(printer, "reserved:\n");
+	list_for_each_entry(block, &mgr->reserved_pages, link)
+		drm_buddy_block_print(mm, block, printer);
+	mutex_unlock(&mgr->lock);
 
 	drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
 		   man->size, amdgpu_vram_mgr_usage(mgr) >> 20,
@@ -690,16 +720,21 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
 	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
 
 	ttm_resource_manager_init(man, &adev->mman.bdev,
 				  adev->gmc.real_vram_size >> PAGE_SHIFT);
 
 	man->func = &amdgpu_vram_mgr_func;
 
-	drm_mm_init(&mgr->mm, 0, man->size);
-	spin_lock_init(&mgr->lock);
+	err = drm_buddy_init(&mgr->mm, man->size << PAGE_SHIFT, PAGE_SIZE);
+	if (err)
+		return err;
+
+	mutex_init(&mgr->lock);
 	INIT_LIST_HEAD(&mgr->reservations_pending);
 	INIT_LIST_HEAD(&mgr->reserved_pages);
+	mgr->default_page_size = PAGE_SIZE;
 
 	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
 	ttm_resource_manager_set_used(man, true);
@@ -727,16 +762,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 	if (ret)
 		return;
 
-	spin_lock(&mgr->lock);
+	mutex_lock(&mgr->lock);
 	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
 		kfree(rsv);
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
-		drm_mm_remove_node(&rsv->mm_node);
+		drm_buddy_free_list(&mgr->mm, &rsv->block);
 		kfree(rsv);
 	}
-	drm_mm_takedown(&mgr->mm);
-	spin_unlock(&mgr->lock);
+	drm_buddy_fini(&mgr->mm);
+	mutex_unlock(&mgr->lock);
 
 	ttm_resource_manager_cleanup(man);
 	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
@ 2022-01-27 14:11   ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: Arunpravin, matthew.auld, tzimmermann, alexander.deucher,
	christian.koenig

- Remove drm_mm references and replace with drm buddy functionalities
- Add res cursor support for drm buddy

v2(Matthew Auld):
  - replace spinlock with mutex as we call kmem_cache_zalloc
    (..., GFP_KERNEL) in drm_buddy_alloc() function

  - lock drm_buddy_block_trim() function as it calls
    mark_free/mark_split are all globally visible

v3(Matthew Auld):
  - remove trim method error handling as we address the failure case
    at drm_buddy_block_trim() function

v4:
  - fix warnings reported by kernel test robot <lkp@intel.com>

v5:
  - fix merge conflict issue

v6:
  - fix warnings reported by kernel test robot <lkp@intel.com>

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/Kconfig                       |   1 +
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
 4 files changed, 231 insertions(+), 133 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index dfdd3ec5f793..eb5a57ae3c5c 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -279,6 +279,7 @@ config DRM_AMDGPU
 	select HWMON
 	select BACKLIGHT_CLASS_DEVICE
 	select INTERVAL_TREE
+	select DRM_BUDDY
 	help
 	  Choose this option if you have a recent AMD Radeon graphics card.
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index acfa207cf970..da12b4ff2e45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -30,12 +30,15 @@
 #include <drm/ttm/ttm_resource.h>
 #include <drm/ttm/ttm_range_manager.h>
 
+#include "amdgpu_vram_mgr.h"
+
 /* state back for walking over vram_mgr and gtt_mgr allocations */
 struct amdgpu_res_cursor {
 	uint64_t		start;
 	uint64_t		size;
 	uint64_t		remaining;
-	struct drm_mm_node	*node;
+	void			*node;
+	uint32_t		mem_type;
 };
 
 /**
@@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
 				    uint64_t start, uint64_t size,
 				    struct amdgpu_res_cursor *cur)
 {
+	struct drm_buddy_block *block;
+	struct list_head *head, *next;
 	struct drm_mm_node *node;
 
-	if (!res || res->mem_type == TTM_PL_SYSTEM) {
-		cur->start = start;
-		cur->size = size;
-		cur->remaining = size;
-		cur->node = NULL;
-		WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
-		return;
-	}
+	if (!res)
+		goto err_out;
 
 	BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
 
-	node = to_ttm_range_mgr_node(res)->mm_nodes;
-	while (start >= node->size << PAGE_SHIFT)
-		start -= node++->size << PAGE_SHIFT;
+	cur->mem_type = res->mem_type;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		head = &to_amdgpu_vram_mgr_node(res)->blocks;
+
+		block = list_first_entry_or_null(head,
+						 struct drm_buddy_block,
+						 link);
+		if (!block)
+			goto err_out;
+
+		while (start >= amdgpu_node_size(block)) {
+			start -= amdgpu_node_size(block);
+
+			next = block->link.next;
+			if (next != head)
+				block = list_entry(next, struct drm_buddy_block, link);
+		}
+
+		cur->start = amdgpu_node_start(block) + start;
+		cur->size = min(amdgpu_node_size(block) - start, size);
+		cur->remaining = size;
+		cur->node = block;
+		break;
+	case TTM_PL_TT:
+		node = to_ttm_range_mgr_node(res)->mm_nodes;
+		while (start >= node->size << PAGE_SHIFT)
+			start -= node++->size << PAGE_SHIFT;
+
+		cur->start = (node->start << PAGE_SHIFT) + start;
+		cur->size = min((node->size << PAGE_SHIFT) - start, size);
+		cur->remaining = size;
+		cur->node = node;
+		break;
+	default:
+		goto err_out;
+	}
 
-	cur->start = (node->start << PAGE_SHIFT) + start;
-	cur->size = min((node->size << PAGE_SHIFT) - start, size);
+	return;
+
+err_out:
+	cur->start = start;
+	cur->size = size;
 	cur->remaining = size;
-	cur->node = node;
+	cur->node = NULL;
+	WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
+	return;
 }
 
 /**
@@ -85,7 +124,9 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
  */
 static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 {
-	struct drm_mm_node *node = cur->node;
+	struct drm_buddy_block *block;
+	struct drm_mm_node *node;
+	struct list_head *next;
 
 	BUG_ON(size > cur->remaining);
 
@@ -99,9 +140,27 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 		return;
 	}
 
-	cur->node = ++node;
-	cur->start = node->start << PAGE_SHIFT;
-	cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		next = block->link.next;
+		block = list_entry(next, struct drm_buddy_block, link);
+
+		cur->node = block;
+		cur->start = amdgpu_node_start(block);
+		cur->size = min(amdgpu_node_size(block), cur->remaining);
+		break;
+	case TTM_PL_TT:
+		node = cur->node;
+
+		cur->node = ++node;
+		cur->start = node->start << PAGE_SHIFT;
+		cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+		break;
+	default:
+		return;
+	}
 }
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index f8f48be16d80..1a5cbd5ef738 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,6 +26,7 @@
 
 #include <linux/dma-direction.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_buddy.h>
 #include "amdgpu.h"
 
 #define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
@@ -40,12 +41,14 @@
 
 struct amdgpu_vram_mgr {
 	struct ttm_resource_manager manager;
-	struct drm_mm mm;
-	spinlock_t lock;
+	struct drm_buddy mm;
+	/* protects access to buffer objects */
+	struct mutex lock;
 	struct list_head reservations_pending;
 	struct list_head reserved_pages;
 	atomic64_t usage;
 	atomic64_t vis_usage;
+	u64 default_page_size;
 };
 
 struct amdgpu_gtt_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 7442095f089c..64ce442c9119 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -32,8 +32,11 @@
 #include "atom.h"
 
 struct amdgpu_vram_reservation {
+	u64 start;
+	u64 size;
+	unsigned long flags;
+	struct list_head block;
 	struct list_head node;
-	struct drm_mm_node mm_node;
 };
 
 static inline struct amdgpu_vram_mgr *
@@ -194,10 +197,10 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = {
  * Calculate how many bytes of the MM node are inside visible VRAM
  */
 static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
-				    struct drm_mm_node *node)
+				    struct drm_buddy_block *block)
 {
-	uint64_t start = node->start << PAGE_SHIFT;
-	uint64_t end = (node->size + node->start) << PAGE_SHIFT;
+	u64 start = amdgpu_node_start(block);
+	u64 end = start + amdgpu_node_size(block);
 
 	if (start >= adev->gmc.visible_vram_size)
 		return 0;
@@ -218,9 +221,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct ttm_resource *res = bo->tbo.resource;
-	unsigned pages = res->num_pages;
-	struct drm_mm_node *mm;
-	u64 usage;
+	struct amdgpu_vram_mgr_node *node = to_amdgpu_vram_mgr_node(res);
+	struct drm_buddy_block *block;
+	u64 usage = 0;
 
 	if (amdgpu_gmc_vram_full_visible(&adev->gmc))
 		return amdgpu_bo_size(bo);
@@ -228,9 +231,8 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 	if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		return 0;
 
-	mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0];
-	for (usage = 0; pages; pages -= mm->size, mm++)
-		usage += amdgpu_vram_mgr_vis_size(adev, mm);
+	list_for_each_entry(block, &node->blocks, link)
+		usage += amdgpu_vram_mgr_vis_size(adev, block);
 
 	return usage;
 }
@@ -240,21 +242,29 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	struct drm_mm *mm = &mgr->mm;
+	struct drm_buddy *mm = &mgr->mm;
 	struct amdgpu_vram_reservation *rsv, *temp;
+	struct drm_buddy_block *block;
 	uint64_t vis_usage;
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
-		if (drm_mm_reserve_node(mm, &rsv->mm_node))
+		if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+					   rsv->size, mm->chunk_size, &rsv->block,
+					   rsv->flags))
 			continue;
 
-		dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
-			rsv->mm_node.start, rsv->mm_node.size);
-
-		vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
-		atomic64_add(vis_usage, &mgr->vis_usage);
-		atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage);
-		list_move(&rsv->node, &mgr->reserved_pages);
+		block = list_first_entry_or_null(&rsv->block,
+						 struct drm_buddy_block,
+						 link);
+		if (block) {
+			dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
+				rsv->start, rsv->size);
+
+			vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
+			atomic64_add(vis_usage, &mgr->vis_usage);
+			atomic64_add(rsv->size, &mgr->usage);
+			list_move(&rsv->node, &mgr->reserved_pages);
+		}
 	}
 }
 
@@ -277,13 +287,16 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&rsv->node);
-	rsv->mm_node.start = start >> PAGE_SHIFT;
-	rsv->mm_node.size = size >> PAGE_SHIFT;
+	INIT_LIST_HEAD(&rsv->block);
+
+	rsv->start = start;
+	rsv->size = size;
+	rsv->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
-	spin_lock(&mgr->lock);
-	list_add_tail(&mgr->reservations_pending, &rsv->node);
+	mutex_lock(&mgr->lock);
+	list_add_tail(&rsv->node, &mgr->reservations_pending);
 	amdgpu_vram_mgr_do_reserve(&mgr->manager);
-	spin_unlock(&mgr->lock);
+	mutex_unlock(&mgr->lock);
 
 	return 0;
 }
@@ -305,19 +318,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
 	struct amdgpu_vram_reservation *rsv;
 	int ret;
 
-	spin_lock(&mgr->lock);
+	mutex_lock(&mgr->lock);
 
 	list_for_each_entry(rsv, &mgr->reservations_pending, node) {
-		if ((rsv->mm_node.start <= start) &&
-		    (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
 			ret = -EBUSY;
 			goto out;
 		}
 	}
 
 	list_for_each_entry(rsv, &mgr->reserved_pages, node) {
-		if ((rsv->mm_node.start <= start) &&
-		    (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
 			ret = 0;
 			goto out;
 		}
@@ -325,32 +338,10 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
 
 	ret = -ENOENT;
 out:
-	spin_unlock(&mgr->lock);
+	mutex_unlock(&mgr->lock);
 	return ret;
 }
 
-/**
- * amdgpu_vram_mgr_virt_start - update virtual start address
- *
- * @mem: ttm_resource to update
- * @node: just allocated node
- *
- * Calculate a virtual BO start address to easily check if everything is CPU
- * accessible.
- */
-static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
-				       struct drm_mm_node *node)
-{
-	unsigned long start;
-
-	start = node->start + node->size;
-	if (start > mem->num_pages)
-		start -= mem->num_pages;
-	else
-		start = 0;
-	mem->start = max(mem->start, start);
-}
-
 /**
  * amdgpu_vram_mgr_new - allocate new ranges
  *
@@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_resource **res)
 {
-	unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
+	unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
+	u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	uint64_t vis_usage = 0, mem_bytes, max_bytes;
-	struct ttm_range_mgr_node *node;
-	struct drm_mm *mm = &mgr->mm;
-	enum drm_mm_insert_mode mode;
+	struct amdgpu_vram_mgr_node *node;
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
 	unsigned i;
 	int r;
 
@@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 		goto error_sub;
 	}
 
-	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
 		pages_per_node = ~0ul;
-		num_nodes = 1;
-	} else {
+	else {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		pages_per_node = HPAGE_PMD_NR;
 #else
@@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 #endif
 		pages_per_node = max_t(uint32_t, pages_per_node,
 				       tbo->page_alignment);
-		num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
 	}
 
-	node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
-			GFP_KERNEL | __GFP_ZERO);
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
 	if (!node) {
 		r = -ENOMEM;
 		goto error_sub;
@@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 
 	ttm_resource_init(tbo, place, &node->base);
 
-	mode = DRM_MM_INSERT_BEST;
+	INIT_LIST_HEAD(&node->blocks);
+
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
-		mode = DRM_MM_INSERT_HIGH;
+		node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+	if (place->fpfn || lpfn != man->size)
+		/* Allocate blocks in desired range */
+		node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+	min_page_size = mgr->default_page_size;
+	BUG_ON(min_page_size < mm->chunk_size);
 
 	pages_left = node->base.num_pages;
 
@@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
 
 	i = 0;
-	spin_lock(&mgr->lock);
 	while (pages_left) {
-		uint32_t alignment = tbo->page_alignment;
-
 		if (pages >= pages_per_node)
-			alignment = pages_per_node;
-
-		r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
-						alignment, 0, place->fpfn,
-						lpfn, mode);
-		if (unlikely(r)) {
-			if (pages > pages_per_node) {
-				if (is_power_of_2(pages))
-					pages = pages / 2;
-				else
-					pages = rounddown_pow_of_two(pages);
-				continue;
-			}
-			goto error_free;
+			pages = pages_per_node;
+
+		n_pages = pages;
+
+		if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+			n_pages = roundup_pow_of_two(n_pages);
+			min_page_size = (u64)n_pages << PAGE_SHIFT;
+
+			if (n_pages > lpfn)
+				lpfn = n_pages;
 		}
 
-		vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
-		amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
+		mutex_lock(&mgr->lock);
+		r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+					   (u64)lpfn << PAGE_SHIFT,
+					   (u64)n_pages << PAGE_SHIFT,
+					   min_page_size,
+					   &node->blocks,
+					   node->flags);
+		mutex_unlock(&mgr->lock);
+		if (unlikely(r))
+			goto error_free_blocks;
+
 		pages_left -= pages;
 		++i;
 
 		if (pages > pages_left)
 			pages = pages_left;
 	}
-	spin_unlock(&mgr->lock);
+
+	/* Free unused pages for contiguous allocation */
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
+
+		mutex_lock(&mgr->lock);
+		drm_buddy_block_trim(mm,
+				     actual_size,
+				     &node->blocks);
+		mutex_unlock(&mgr->lock);
+	}
+
+	list_for_each_entry(block, &node->blocks, link)
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+	block = list_first_entry_or_null(&node->blocks,
+					 struct drm_buddy_block,
+					 link);
+	if (!block) {
+		r = -ENOENT;
+		goto error_free_res;
+	}
+
+	node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
 
 	if (i == 1)
 		node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
@@ -468,13 +489,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	*res = &node->base;
 	return 0;
 
-error_free:
-	while (i--)
-		drm_mm_remove_node(&node->mm_nodes[i]);
-	spin_unlock(&mgr->lock);
+error_free_blocks:
+	mutex_lock(&mgr->lock);
+	drm_buddy_free_list(mm, &node->blocks);
+	mutex_unlock(&mgr->lock);
+error_free_res:
 	ttm_resource_fini(man, &node->base);
-	kvfree(node);
-
+	kfree(node);
 error_sub:
 	atomic64_sub(mem_bytes, &mgr->usage);
 	return r;
@@ -491,29 +512,29 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 				struct ttm_resource *res)
 {
-	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+	struct amdgpu_vram_mgr_node *node = to_amdgpu_vram_mgr_node(res);
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
 	uint64_t usage = 0, vis_usage = 0;
-	unsigned i, pages;
 
-	spin_lock(&mgr->lock);
-	for (i = 0, pages = res->num_pages; pages;
-	     pages -= node->mm_nodes[i].size, ++i) {
-		struct drm_mm_node *mm = &node->mm_nodes[i];
-
-		drm_mm_remove_node(mm);
-		usage += mm->size << PAGE_SHIFT;
-		vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
+	mutex_lock(&mgr->lock);
+	list_for_each_entry(block, &node->blocks, link) {
+		usage += amdgpu_node_size(block);
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
 	}
+
 	amdgpu_vram_mgr_do_reserve(man);
-	spin_unlock(&mgr->lock);
+
+	drm_buddy_free_list(mm, &node->blocks);
+	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(usage, &mgr->usage);
 	atomic64_sub(vis_usage, &mgr->vis_usage);
 
 	ttm_resource_fini(man, res);
-	kvfree(node);
+	kfree(node);
 }
 
 /**
@@ -663,10 +684,19 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
 				  struct drm_printer *printer)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	mutex_lock(&mgr->lock);
+	drm_printf(printer, "default_page_size: %lluKiB\n",
+		   mgr->default_page_size >> 10);
 
-	spin_lock(&mgr->lock);
-	drm_mm_print(&mgr->mm, printer);
-	spin_unlock(&mgr->lock);
+	drm_buddy_print(mm, printer);
+
+	drm_printf(printer, "reserved:\n");
+	list_for_each_entry(block, &mgr->reserved_pages, link)
+		drm_buddy_block_print(mm, block, printer);
+	mutex_unlock(&mgr->lock);
 
 	drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
 		   man->size, amdgpu_vram_mgr_usage(mgr) >> 20,
@@ -690,16 +720,21 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
 	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
 
 	ttm_resource_manager_init(man, &adev->mman.bdev,
 				  adev->gmc.real_vram_size >> PAGE_SHIFT);
 
 	man->func = &amdgpu_vram_mgr_func;
 
-	drm_mm_init(&mgr->mm, 0, man->size);
-	spin_lock_init(&mgr->lock);
+	err = drm_buddy_init(&mgr->mm, man->size << PAGE_SHIFT, PAGE_SIZE);
+	if (err)
+		return err;
+
+	mutex_init(&mgr->lock);
 	INIT_LIST_HEAD(&mgr->reservations_pending);
 	INIT_LIST_HEAD(&mgr->reserved_pages);
+	mgr->default_page_size = PAGE_SIZE;
 
 	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
 	ttm_resource_manager_set_used(man, true);
@@ -727,16 +762,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 	if (ret)
 		return;
 
-	spin_lock(&mgr->lock);
+	mutex_lock(&mgr->lock);
 	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
 		kfree(rsv);
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
-		drm_mm_remove_node(&rsv->mm_node);
+		drm_buddy_free_list(&mgr->mm, &rsv->block);
 		kfree(rsv);
 	}
-	drm_mm_takedown(&mgr->mm);
-	spin_unlock(&mgr->lock);
+	drm_buddy_fini(&mgr->mm);
+	mutex_unlock(&mgr->lock);
 
 	ttm_resource_manager_cleanup(man);
 	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
@ 2022-01-27 14:11   ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-01-27 14:11 UTC (permalink / raw)
  To: dri-devel, intel-gfx, amd-gfx
  Cc: daniel, Arunpravin, jani.nikula, matthew.auld, tzimmermann,
	alexander.deucher, christian.koenig

- Remove drm_mm references and replace with drm buddy functionalities
- Add res cursor support for drm buddy

v2(Matthew Auld):
  - replace spinlock with mutex as we call kmem_cache_zalloc
    (..., GFP_KERNEL) in drm_buddy_alloc() function

  - lock drm_buddy_block_trim() function as it calls
    mark_free/mark_split are all globally visible

v3(Matthew Auld):
  - remove trim method error handling as we address the failure case
    at drm_buddy_block_trim() function

v4:
  - fix warnings reported by kernel test robot <lkp@intel.com>

v5:
  - fix merge conflict issue

v6:
  - fix warnings reported by kernel test robot <lkp@intel.com>

Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/Kconfig                       |   1 +
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
 4 files changed, 231 insertions(+), 133 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index dfdd3ec5f793..eb5a57ae3c5c 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -279,6 +279,7 @@ config DRM_AMDGPU
 	select HWMON
 	select BACKLIGHT_CLASS_DEVICE
 	select INTERVAL_TREE
+	select DRM_BUDDY
 	help
 	  Choose this option if you have a recent AMD Radeon graphics card.
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index acfa207cf970..da12b4ff2e45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -30,12 +30,15 @@
 #include <drm/ttm/ttm_resource.h>
 #include <drm/ttm/ttm_range_manager.h>
 
+#include "amdgpu_vram_mgr.h"
+
 /* state back for walking over vram_mgr and gtt_mgr allocations */
 struct amdgpu_res_cursor {
 	uint64_t		start;
 	uint64_t		size;
 	uint64_t		remaining;
-	struct drm_mm_node	*node;
+	void			*node;
+	uint32_t		mem_type;
 };
 
 /**
@@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
 				    uint64_t start, uint64_t size,
 				    struct amdgpu_res_cursor *cur)
 {
+	struct drm_buddy_block *block;
+	struct list_head *head, *next;
 	struct drm_mm_node *node;
 
-	if (!res || res->mem_type == TTM_PL_SYSTEM) {
-		cur->start = start;
-		cur->size = size;
-		cur->remaining = size;
-		cur->node = NULL;
-		WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
-		return;
-	}
+	if (!res)
+		goto err_out;
 
 	BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
 
-	node = to_ttm_range_mgr_node(res)->mm_nodes;
-	while (start >= node->size << PAGE_SHIFT)
-		start -= node++->size << PAGE_SHIFT;
+	cur->mem_type = res->mem_type;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		head = &to_amdgpu_vram_mgr_node(res)->blocks;
+
+		block = list_first_entry_or_null(head,
+						 struct drm_buddy_block,
+						 link);
+		if (!block)
+			goto err_out;
+
+		while (start >= amdgpu_node_size(block)) {
+			start -= amdgpu_node_size(block);
+
+			next = block->link.next;
+			if (next != head)
+				block = list_entry(next, struct drm_buddy_block, link);
+		}
+
+		cur->start = amdgpu_node_start(block) + start;
+		cur->size = min(amdgpu_node_size(block) - start, size);
+		cur->remaining = size;
+		cur->node = block;
+		break;
+	case TTM_PL_TT:
+		node = to_ttm_range_mgr_node(res)->mm_nodes;
+		while (start >= node->size << PAGE_SHIFT)
+			start -= node++->size << PAGE_SHIFT;
+
+		cur->start = (node->start << PAGE_SHIFT) + start;
+		cur->size = min((node->size << PAGE_SHIFT) - start, size);
+		cur->remaining = size;
+		cur->node = node;
+		break;
+	default:
+		goto err_out;
+	}
 
-	cur->start = (node->start << PAGE_SHIFT) + start;
-	cur->size = min((node->size << PAGE_SHIFT) - start, size);
+	return;
+
+err_out:
+	cur->start = start;
+	cur->size = size;
 	cur->remaining = size;
-	cur->node = node;
+	cur->node = NULL;
+	WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
+	return;
 }
 
 /**
@@ -85,7 +124,9 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
  */
 static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 {
-	struct drm_mm_node *node = cur->node;
+	struct drm_buddy_block *block;
+	struct drm_mm_node *node;
+	struct list_head *next;
 
 	BUG_ON(size > cur->remaining);
 
@@ -99,9 +140,27 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 		return;
 	}
 
-	cur->node = ++node;
-	cur->start = node->start << PAGE_SHIFT;
-	cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		next = block->link.next;
+		block = list_entry(next, struct drm_buddy_block, link);
+
+		cur->node = block;
+		cur->start = amdgpu_node_start(block);
+		cur->size = min(amdgpu_node_size(block), cur->remaining);
+		break;
+	case TTM_PL_TT:
+		node = cur->node;
+
+		cur->node = ++node;
+		cur->start = node->start << PAGE_SHIFT;
+		cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+		break;
+	default:
+		return;
+	}
 }
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index f8f48be16d80..1a5cbd5ef738 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,6 +26,7 @@
 
 #include <linux/dma-direction.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_buddy.h>
 #include "amdgpu.h"
 
 #define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
@@ -40,12 +41,14 @@
 
 struct amdgpu_vram_mgr {
 	struct ttm_resource_manager manager;
-	struct drm_mm mm;
-	spinlock_t lock;
+	struct drm_buddy mm;
+	/* protects access to buffer objects */
+	struct mutex lock;
 	struct list_head reservations_pending;
 	struct list_head reserved_pages;
 	atomic64_t usage;
 	atomic64_t vis_usage;
+	u64 default_page_size;
 };
 
 struct amdgpu_gtt_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 7442095f089c..64ce442c9119 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -32,8 +32,11 @@
 #include "atom.h"
 
 struct amdgpu_vram_reservation {
+	u64 start;
+	u64 size;
+	unsigned long flags;
+	struct list_head block;
 	struct list_head node;
-	struct drm_mm_node mm_node;
 };
 
 static inline struct amdgpu_vram_mgr *
@@ -194,10 +197,10 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = {
  * Calculate how many bytes of the MM node are inside visible VRAM
  */
 static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
-				    struct drm_mm_node *node)
+				    struct drm_buddy_block *block)
 {
-	uint64_t start = node->start << PAGE_SHIFT;
-	uint64_t end = (node->size + node->start) << PAGE_SHIFT;
+	u64 start = amdgpu_node_start(block);
+	u64 end = start + amdgpu_node_size(block);
 
 	if (start >= adev->gmc.visible_vram_size)
 		return 0;
@@ -218,9 +221,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct ttm_resource *res = bo->tbo.resource;
-	unsigned pages = res->num_pages;
-	struct drm_mm_node *mm;
-	u64 usage;
+	struct amdgpu_vram_mgr_node *node = to_amdgpu_vram_mgr_node(res);
+	struct drm_buddy_block *block;
+	u64 usage = 0;
 
 	if (amdgpu_gmc_vram_full_visible(&adev->gmc))
 		return amdgpu_bo_size(bo);
@@ -228,9 +231,8 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
 	if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		return 0;
 
-	mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0];
-	for (usage = 0; pages; pages -= mm->size, mm++)
-		usage += amdgpu_vram_mgr_vis_size(adev, mm);
+	list_for_each_entry(block, &node->blocks, link)
+		usage += amdgpu_vram_mgr_vis_size(adev, block);
 
 	return usage;
 }
@@ -240,21 +242,29 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	struct drm_mm *mm = &mgr->mm;
+	struct drm_buddy *mm = &mgr->mm;
 	struct amdgpu_vram_reservation *rsv, *temp;
+	struct drm_buddy_block *block;
 	uint64_t vis_usage;
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
-		if (drm_mm_reserve_node(mm, &rsv->mm_node))
+		if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+					   rsv->size, mm->chunk_size, &rsv->block,
+					   rsv->flags))
 			continue;
 
-		dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
-			rsv->mm_node.start, rsv->mm_node.size);
-
-		vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
-		atomic64_add(vis_usage, &mgr->vis_usage);
-		atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage);
-		list_move(&rsv->node, &mgr->reserved_pages);
+		block = list_first_entry_or_null(&rsv->block,
+						 struct drm_buddy_block,
+						 link);
+		if (block) {
+			dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
+				rsv->start, rsv->size);
+
+			vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
+			atomic64_add(vis_usage, &mgr->vis_usage);
+			atomic64_add(rsv->size, &mgr->usage);
+			list_move(&rsv->node, &mgr->reserved_pages);
+		}
 	}
 }
 
@@ -277,13 +287,16 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&rsv->node);
-	rsv->mm_node.start = start >> PAGE_SHIFT;
-	rsv->mm_node.size = size >> PAGE_SHIFT;
+	INIT_LIST_HEAD(&rsv->block);
+
+	rsv->start = start;
+	rsv->size = size;
+	rsv->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
-	spin_lock(&mgr->lock);
-	list_add_tail(&mgr->reservations_pending, &rsv->node);
+	mutex_lock(&mgr->lock);
+	list_add_tail(&rsv->node, &mgr->reservations_pending);
 	amdgpu_vram_mgr_do_reserve(&mgr->manager);
-	spin_unlock(&mgr->lock);
+	mutex_unlock(&mgr->lock);
 
 	return 0;
 }
@@ -305,19 +318,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
 	struct amdgpu_vram_reservation *rsv;
 	int ret;
 
-	spin_lock(&mgr->lock);
+	mutex_lock(&mgr->lock);
 
 	list_for_each_entry(rsv, &mgr->reservations_pending, node) {
-		if ((rsv->mm_node.start <= start) &&
-		    (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
 			ret = -EBUSY;
 			goto out;
 		}
 	}
 
 	list_for_each_entry(rsv, &mgr->reserved_pages, node) {
-		if ((rsv->mm_node.start <= start) &&
-		    (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
 			ret = 0;
 			goto out;
 		}
@@ -325,32 +338,10 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
 
 	ret = -ENOENT;
 out:
-	spin_unlock(&mgr->lock);
+	mutex_unlock(&mgr->lock);
 	return ret;
 }
 
-/**
- * amdgpu_vram_mgr_virt_start - update virtual start address
- *
- * @mem: ttm_resource to update
- * @node: just allocated node
- *
- * Calculate a virtual BO start address to easily check if everything is CPU
- * accessible.
- */
-static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
-				       struct drm_mm_node *node)
-{
-	unsigned long start;
-
-	start = node->start + node->size;
-	if (start > mem->num_pages)
-		start -= mem->num_pages;
-	else
-		start = 0;
-	mem->start = max(mem->start, start);
-}
-
 /**
  * amdgpu_vram_mgr_new - allocate new ranges
  *
@@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_resource **res)
 {
-	unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
+	unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
+	u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
-	uint64_t vis_usage = 0, mem_bytes, max_bytes;
-	struct ttm_range_mgr_node *node;
-	struct drm_mm *mm = &mgr->mm;
-	enum drm_mm_insert_mode mode;
+	struct amdgpu_vram_mgr_node *node;
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
 	unsigned i;
 	int r;
 
@@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 		goto error_sub;
 	}
 
-	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
 		pages_per_node = ~0ul;
-		num_nodes = 1;
-	} else {
+	else {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		pages_per_node = HPAGE_PMD_NR;
 #else
@@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 #endif
 		pages_per_node = max_t(uint32_t, pages_per_node,
 				       tbo->page_alignment);
-		num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
 	}
 
-	node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
-			GFP_KERNEL | __GFP_ZERO);
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
 	if (!node) {
 		r = -ENOMEM;
 		goto error_sub;
@@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 
 	ttm_resource_init(tbo, place, &node->base);
 
-	mode = DRM_MM_INSERT_BEST;
+	INIT_LIST_HEAD(&node->blocks);
+
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
-		mode = DRM_MM_INSERT_HIGH;
+		node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+	if (place->fpfn || lpfn != man->size)
+		/* Allocate blocks in desired range */
+		node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+	min_page_size = mgr->default_page_size;
+	BUG_ON(min_page_size < mm->chunk_size);
 
 	pages_left = node->base.num_pages;
 
@@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
 
 	i = 0;
-	spin_lock(&mgr->lock);
 	while (pages_left) {
-		uint32_t alignment = tbo->page_alignment;
-
 		if (pages >= pages_per_node)
-			alignment = pages_per_node;
-
-		r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
-						alignment, 0, place->fpfn,
-						lpfn, mode);
-		if (unlikely(r)) {
-			if (pages > pages_per_node) {
-				if (is_power_of_2(pages))
-					pages = pages / 2;
-				else
-					pages = rounddown_pow_of_two(pages);
-				continue;
-			}
-			goto error_free;
+			pages = pages_per_node;
+
+		n_pages = pages;
+
+		if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+			n_pages = roundup_pow_of_two(n_pages);
+			min_page_size = (u64)n_pages << PAGE_SHIFT;
+
+			if (n_pages > lpfn)
+				lpfn = n_pages;
 		}
 
-		vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
-		amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
+		mutex_lock(&mgr->lock);
+		r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+					   (u64)lpfn << PAGE_SHIFT,
+					   (u64)n_pages << PAGE_SHIFT,
+					   min_page_size,
+					   &node->blocks,
+					   node->flags);
+		mutex_unlock(&mgr->lock);
+		if (unlikely(r))
+			goto error_free_blocks;
+
 		pages_left -= pages;
 		++i;
 
 		if (pages > pages_left)
 			pages = pages_left;
 	}
-	spin_unlock(&mgr->lock);
+
+	/* Free unused pages for contiguous allocation */
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+		u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
+
+		mutex_lock(&mgr->lock);
+		drm_buddy_block_trim(mm,
+				     actual_size,
+				     &node->blocks);
+		mutex_unlock(&mgr->lock);
+	}
+
+	list_for_each_entry(block, &node->blocks, link)
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+	block = list_first_entry_or_null(&node->blocks,
+					 struct drm_buddy_block,
+					 link);
+	if (!block) {
+		r = -ENOENT;
+		goto error_free_res;
+	}
+
+	node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
 
 	if (i == 1)
 		node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
@@ -468,13 +489,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	*res = &node->base;
 	return 0;
 
-error_free:
-	while (i--)
-		drm_mm_remove_node(&node->mm_nodes[i]);
-	spin_unlock(&mgr->lock);
+error_free_blocks:
+	mutex_lock(&mgr->lock);
+	drm_buddy_free_list(mm, &node->blocks);
+	mutex_unlock(&mgr->lock);
+error_free_res:
 	ttm_resource_fini(man, &node->base);
-	kvfree(node);
-
+	kfree(node);
 error_sub:
 	atomic64_sub(mem_bytes, &mgr->usage);
 	return r;
@@ -491,29 +512,29 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 				struct ttm_resource *res)
 {
-	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+	struct amdgpu_vram_mgr_node *node = to_amdgpu_vram_mgr_node(res);
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
 	uint64_t usage = 0, vis_usage = 0;
-	unsigned i, pages;
 
-	spin_lock(&mgr->lock);
-	for (i = 0, pages = res->num_pages; pages;
-	     pages -= node->mm_nodes[i].size, ++i) {
-		struct drm_mm_node *mm = &node->mm_nodes[i];
-
-		drm_mm_remove_node(mm);
-		usage += mm->size << PAGE_SHIFT;
-		vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
+	mutex_lock(&mgr->lock);
+	list_for_each_entry(block, &node->blocks, link) {
+		usage += amdgpu_node_size(block);
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
 	}
+
 	amdgpu_vram_mgr_do_reserve(man);
-	spin_unlock(&mgr->lock);
+
+	drm_buddy_free_list(mm, &node->blocks);
+	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(usage, &mgr->usage);
 	atomic64_sub(vis_usage, &mgr->vis_usage);
 
 	ttm_resource_fini(man, res);
-	kvfree(node);
+	kfree(node);
 }
 
 /**
@@ -663,10 +684,19 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
 				  struct drm_printer *printer)
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+
+	mutex_lock(&mgr->lock);
+	drm_printf(printer, "default_page_size: %lluKiB\n",
+		   mgr->default_page_size >> 10);
 
-	spin_lock(&mgr->lock);
-	drm_mm_print(&mgr->mm, printer);
-	spin_unlock(&mgr->lock);
+	drm_buddy_print(mm, printer);
+
+	drm_printf(printer, "reserved:\n");
+	list_for_each_entry(block, &mgr->reserved_pages, link)
+		drm_buddy_block_print(mm, block, printer);
+	mutex_unlock(&mgr->lock);
 
 	drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
 		   man->size, amdgpu_vram_mgr_usage(mgr) >> 20,
@@ -690,16 +720,21 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
 	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
 
 	ttm_resource_manager_init(man, &adev->mman.bdev,
 				  adev->gmc.real_vram_size >> PAGE_SHIFT);
 
 	man->func = &amdgpu_vram_mgr_func;
 
-	drm_mm_init(&mgr->mm, 0, man->size);
-	spin_lock_init(&mgr->lock);
+	err = drm_buddy_init(&mgr->mm, man->size << PAGE_SHIFT, PAGE_SIZE);
+	if (err)
+		return err;
+
+	mutex_init(&mgr->lock);
 	INIT_LIST_HEAD(&mgr->reservations_pending);
 	INIT_LIST_HEAD(&mgr->reserved_pages);
+	mgr->default_page_size = PAGE_SIZE;
 
 	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
 	ttm_resource_manager_set_used(man, true);
@@ -727,16 +762,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 	if (ret)
 		return;
 
-	spin_lock(&mgr->lock);
+	mutex_lock(&mgr->lock);
 	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
 		kfree(rsv);
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
-		drm_mm_remove_node(&rsv->mm_node);
+		drm_buddy_free_list(&mgr->mm, &rsv->block);
 		kfree(rsv);
 	}
-	drm_mm_takedown(&mgr->mm);
-	spin_unlock(&mgr->lock);
+	drm_buddy_fini(&mgr->mm);
+	mutex_unlock(&mgr->lock);
 
 	ttm_resource_manager_cleanup(man);
 	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v11,1/5] drm: improve drm_buddy_alloc function
  2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
                   ` (5 preceding siblings ...)
  (?)
@ 2022-01-27 15:52 ` Patchwork
  -1 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2022-01-27 15:52 UTC (permalink / raw)
  To: Arunpravin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v11,1/5] drm: improve drm_buddy_alloc function
URL   : https://patchwork.freedesktop.org/series/99430/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
00470383ca08 drm: improve drm_buddy_alloc function
-:386: WARNING:AVOID_BUG: Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON()
#386: FILE: drivers/gpu/drm/drm_buddy.c:585:
+		BUG_ON(order > mm->max_order);

-:387: WARNING:AVOID_BUG: Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON()
#387: FILE: drivers/gpu/drm/drm_buddy.c:586:
+		BUG_ON(order < min_order);

total: 0 errors, 2 warnings, 0 checks, 506 lines checked
50185f5f7e57 drm: implement top-down allocation method
957142d57613 drm: implement a method to free unused pages
f26779496cc3 drm/amdgpu: move vram inline functions into a header
-:12: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#12: 
new file mode 100644

total: 0 errors, 1 warnings, 0 checks, 51 lines checked
a629aca723cd drm/amdgpu: add drm buddy support to amdgpu
-:60: CHECK:PREFER_KERNEL_TYPES: Prefer kernel type 'u32' over 'uint32_t'
#60: FILE: drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h:41:
+	uint32_t		mem_type;

-:417: CHECK:BRACES: Unbalanced braces around else statement
#417: FILE: drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:387:
+	else {

-:450: WARNING:AVOID_BUG: Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON()
#450: FILE: drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c:416:
+	BUG_ON(min_page_size < mm->chunk_size);

total: 0 errors, 1 warnings, 2 checks, 594 lines checked



^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v11,1/5] drm: improve drm_buddy_alloc function
  2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
                   ` (6 preceding siblings ...)
  (?)
@ 2022-01-27 16:23 ` Patchwork
  -1 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2022-01-27 16:23 UTC (permalink / raw)
  To: Arunpravin; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 2758 bytes --]

== Series Details ==

Series: series starting with [v11,1/5] drm: improve drm_buddy_alloc function
URL   : https://patchwork.freedesktop.org/series/99430/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11151 -> Patchwork_22128
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/index.html

Participating hosts (46 -> 39)
------------------------------

  Missing    (7): bat-adls-5 fi-hsw-4200u fi-bsw-cyan fi-ctg-p8600 fi-kbl-8809g fi-pnv-d510 fi-bdw-samus 

Known issues
------------

  Here are the changes found in Patchwork_22128 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_psr@primary_page_flip:
    - fi-skl-6600u:       [PASS][1] -> [FAIL][2] ([i915#4547])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/fi-skl-6600u/igt@kms_psr@primary_page_flip.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/fi-skl-6600u/igt@kms_psr@primary_page_flip.html

  * igt@runner@aborted:
    - fi-skl-6600u:       NOTRUN -> [FAIL][3] ([i915#4312])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/fi-skl-6600u/igt@runner@aborted.html

  
#### Warnings ####

  * igt@i915_selftest@live@hangcheck:
    - bat-dg1-5:          [DMESG-FAIL][4] ([i915#4957]) -> [DMESG-FAIL][5] ([i915#4494] / [i915#4957])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/bat-dg1-5/igt@i915_selftest@live@hangcheck.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/bat-dg1-5/igt@i915_selftest@live@hangcheck.html

  
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#4494]: https://gitlab.freedesktop.org/drm/intel/issues/4494
  [i915#4547]: https://gitlab.freedesktop.org/drm/intel/issues/4547
  [i915#4957]: https://gitlab.freedesktop.org/drm/intel/issues/4957


Build changes
-------------

  * Linux: CI_DRM_11151 -> Patchwork_22128

  CI-20190529: 20190529
  CI_DRM_11151: 627e2885b51ab503a98aa89f0a0bd68416c731fc @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6336: ae2eb9e18bc58a4c45f28cfd80962938198dec3c @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_22128: a629aca723cdab39652e834b30f0513757f78441 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

a629aca723cd drm/amdgpu: add drm buddy support to amdgpu
f26779496cc3 drm/amdgpu: move vram inline functions into a header
957142d57613 drm: implement a method to free unused pages
50185f5f7e57 drm: implement top-down allocation method
00470383ca08 drm: improve drm_buddy_alloc function

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/index.html

[-- Attachment #2: Type: text/html, Size: 3500 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [v11,1/5] drm: improve drm_buddy_alloc function
  2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
                   ` (7 preceding siblings ...)
  (?)
@ 2022-01-27 21:45 ` Patchwork
  -1 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2022-01-27 21:45 UTC (permalink / raw)
  To: Arunpravin; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 30291 bytes --]

== Series Details ==

Series: series starting with [v11,1/5] drm: improve drm_buddy_alloc function
URL   : https://patchwork.freedesktop.org/series/99430/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11151_full -> Patchwork_22128_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts

Known issues
------------

  Here are the changes found in Patchwork_22128_full that come from known issues:

### CI changes ###

#### Possible fixes ####

  * boot:
    - shard-apl:          ([PASS][1], [FAIL][2], [PASS][3], [PASS][4], [PASS][5], [PASS][6], [PASS][7], [PASS][8], [PASS][9], [PASS][10], [PASS][11], [PASS][12], [PASS][13], [PASS][14], [PASS][15], [PASS][16], [PASS][17], [PASS][18], [PASS][19], [PASS][20], [PASS][21], [PASS][22], [PASS][23], [PASS][24], [PASS][25]) ([i915#4386]) -> ([PASS][26], [PASS][27], [PASS][28], [PASS][29], [PASS][30], [PASS][31], [PASS][32], [PASS][33], [PASS][34], [PASS][35], [PASS][36], [PASS][37], [PASS][38], [PASS][39], [PASS][40], [PASS][41], [PASS][42], [PASS][43], [PASS][44], [PASS][45], [PASS][46], [PASS][47], [PASS][48], [PASS][49], [PASS][50])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl3/boot.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl1/boot.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl1/boot.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl1/boot.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl8/boot.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl8/boot.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl8/boot.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl8/boot.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl7/boot.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl7/boot.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl7/boot.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl6/boot.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl2/boot.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl6/boot.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl6/boot.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl6/boot.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl4/boot.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl4/boot.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl4/boot.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl3/boot.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl2/boot.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl2/boot.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl3/boot.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl2/boot.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl1/boot.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/boot.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/boot.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/boot.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/boot.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl7/boot.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl7/boot.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl7/boot.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl6/boot.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl6/boot.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl6/boot.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl4/boot.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl4/boot.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl4/boot.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl4/boot.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl3/boot.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl3/boot.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl3/boot.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl3/boot.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl2/boot.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl2/boot.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl2/boot.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/boot.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/boot.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/boot.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/boot.html

  

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_isolation@preservation-s3@bcs0:
    - shard-apl:          [PASS][51] -> [DMESG-WARN][52] ([i915#180]) +1 similar issue
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl1/igt@gem_ctx_isolation@preservation-s3@bcs0.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/igt@gem_ctx_isolation@preservation-s3@bcs0.html

  * igt@gem_ctx_persistence@many-contexts:
    - shard-tglb:         [PASS][53] -> [FAIL][54] ([i915#2410])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-tglb7/igt@gem_ctx_persistence@many-contexts.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb3/igt@gem_ctx_persistence@many-contexts.html

  * igt@gem_ctx_sseu@invalid-args:
    - shard-tglb:         NOTRUN -> [SKIP][55] ([i915#280])
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@gem_ctx_sseu@invalid-args.html

  * igt@gem_exec_balancer@parallel-out-fence:
    - shard-iclb:         [PASS][56] -> [SKIP][57] ([i915#4525]) +1 similar issue
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb1/igt@gem_exec_balancer@parallel-out-fence.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb5/igt@gem_exec_balancer@parallel-out-fence.html

  * igt@gem_exec_fair@basic-none-share@rcs0:
    - shard-iclb:         [PASS][58] -> [FAIL][59] ([i915#2842])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb8/igt@gem_exec_fair@basic-none-share@rcs0.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb3/igt@gem_exec_fair@basic-none-share@rcs0.html

  * igt@gem_exec_fair@basic-none-solo@rcs0:
    - shard-glk:          NOTRUN -> [FAIL][60] ([i915#2842])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk2/igt@gem_exec_fair@basic-none-solo@rcs0.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - shard-tglb:         NOTRUN -> [FAIL][61] ([i915#2842])
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_exec_fair@basic-pace@rcs0:
    - shard-glk:          [PASS][62] -> [FAIL][63] ([i915#2842])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-glk6/igt@gem_exec_fair@basic-pace@rcs0.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk4/igt@gem_exec_fair@basic-pace@rcs0.html

  * igt@gem_exec_fair@basic-pace@vcs1:
    - shard-iclb:         NOTRUN -> [FAIL][64] ([i915#2842])
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb2/igt@gem_exec_fair@basic-pace@vcs1.html

  * igt@gem_exec_fair@basic-pace@vecs0:
    - shard-kbl:          [PASS][65] -> [FAIL][66] ([i915#2842])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-kbl3/igt@gem_exec_fair@basic-pace@vecs0.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-kbl7/igt@gem_exec_fair@basic-pace@vecs0.html

  * igt@gem_exec_fair@basic-throttle@rcs0:
    - shard-iclb:         [PASS][67] -> [FAIL][68] ([i915#2849])
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb7/igt@gem_exec_fair@basic-throttle@rcs0.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb8/igt@gem_exec_fair@basic-throttle@rcs0.html

  * igt@gem_huc_copy@huc-copy:
    - shard-apl:          NOTRUN -> [SKIP][69] ([fdo#109271] / [i915#2190])
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/igt@gem_huc_copy@huc-copy.html

  * igt@gem_lmem_swapping@heavy-random:
    - shard-skl:          NOTRUN -> [SKIP][70] ([fdo#109271] / [i915#4613])
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl7/igt@gem_lmem_swapping@heavy-random.html

  * igt@gem_lmem_swapping@random:
    - shard-apl:          NOTRUN -> [SKIP][71] ([fdo#109271] / [i915#4613])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/igt@gem_lmem_swapping@random.html

  * igt@gem_pread@exhaustion:
    - shard-skl:          NOTRUN -> [WARN][72] ([i915#2658])
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl7/igt@gem_pread@exhaustion.html

  * igt@gem_userptr_blits@input-checking:
    - shard-skl:          NOTRUN -> [DMESG-WARN][73] ([i915#4990])
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl1/igt@gem_userptr_blits@input-checking.html
    - shard-apl:          NOTRUN -> [DMESG-WARN][74] ([i915#4990])
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/igt@gem_userptr_blits@input-checking.html

  * igt@kms_big_fb@linear-16bpp-rotate-90:
    - shard-tglb:         NOTRUN -> [SKIP][75] ([fdo#111614])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@kms_big_fb@linear-16bpp-rotate-90.html

  * igt@kms_big_fb@y-tiled-32bpp-rotate-0:
    - shard-glk:          [PASS][76] -> [DMESG-WARN][77] ([i915#118])
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-glk7/igt@kms_big_fb@y-tiled-32bpp-rotate-0.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk9/igt@kms_big_fb@y-tiled-32bpp-rotate-0.html

  * igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-0-hflip:
    - shard-apl:          NOTRUN -> [SKIP][78] ([fdo#109271] / [i915#3777])
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-0-hflip.html

  * igt@kms_big_fb@yf-tiled-64bpp-rotate-270:
    - shard-tglb:         NOTRUN -> [SKIP][79] ([fdo#111615])
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@kms_big_fb@yf-tiled-64bpp-rotate-270.html

  * igt@kms_big_fb@yf-tiled-max-hw-stride-64bpp-rotate-0:
    - shard-apl:          NOTRUN -> [SKIP][80] ([fdo#109271]) +82 similar issues
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/igt@kms_big_fb@yf-tiled-max-hw-stride-64bpp-rotate-0.html

  * igt@kms_ccs@pipe-a-crc-primary-rotation-180-y_tiled_gen12_rc_ccs:
    - shard-glk:          NOTRUN -> [SKIP][81] ([fdo#109271]) +16 similar issues
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk2/igt@kms_ccs@pipe-a-crc-primary-rotation-180-y_tiled_gen12_rc_ccs.html

  * igt@kms_ccs@pipe-b-crc-primary-basic-y_tiled_gen12_mc_ccs:
    - shard-skl:          NOTRUN -> [SKIP][82] ([fdo#109271] / [i915#3886]) +3 similar issues
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl7/igt@kms_ccs@pipe-b-crc-primary-basic-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-b-missing-ccs-buffer-y_tiled_gen12_mc_ccs:
    - shard-glk:          NOTRUN -> [SKIP][83] ([fdo#109271] / [i915#3886])
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk2/igt@kms_ccs@pipe-b-missing-ccs-buffer-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-c-ccs-on-another-bo-y_tiled_gen12_mc_ccs:
    - shard-apl:          NOTRUN -> [SKIP][84] ([fdo#109271] / [i915#3886]) +1 similar issue
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/igt@kms_ccs@pipe-c-ccs-on-another-bo-y_tiled_gen12_mc_ccs.html

  * igt@kms_chamelium@dp-crc-multiple:
    - shard-apl:          NOTRUN -> [SKIP][85] ([fdo#109271] / [fdo#111827]) +5 similar issues
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/igt@kms_chamelium@dp-crc-multiple.html

  * igt@kms_chamelium@hdmi-crc-multiple:
    - shard-tglb:         NOTRUN -> [SKIP][86] ([fdo#109284] / [fdo#111827])
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@kms_chamelium@hdmi-crc-multiple.html

  * igt@kms_color_chamelium@pipe-b-ctm-0-25:
    - shard-skl:          NOTRUN -> [SKIP][87] ([fdo#109271] / [fdo#111827]) +6 similar issues
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl1/igt@kms_color_chamelium@pipe-b-ctm-0-25.html

  * igt@kms_content_protection@lic:
    - shard-tglb:         NOTRUN -> [SKIP][88] ([i915#1063])
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@kms_content_protection@lic.html

  * igt@kms_content_protection@srm:
    - shard-apl:          NOTRUN -> [TIMEOUT][89] ([i915#1319]) +1 similar issue
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/igt@kms_content_protection@srm.html

  * igt@kms_cursor_crc@pipe-a-cursor-max-size-offscreen:
    - shard-tglb:         NOTRUN -> [SKIP][90] ([i915#3359]) +1 similar issue
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@kms_cursor_crc@pipe-a-cursor-max-size-offscreen.html

  * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic:
    - shard-tglb:         NOTRUN -> [SKIP][91] ([fdo#109274] / [fdo#111825])
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1:
    - shard-skl:          [PASS][92] -> [FAIL][93] ([i915#79]) +1 similar issue
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-skl8/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl9/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html

  * igt@kms_flip@flip-vs-expired-vblank@c-edp1:
    - shard-skl:          [PASS][94] -> [FAIL][95] ([i915#2122])
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-skl1/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl8/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html

  * igt@kms_flip@flip-vs-suspend-interruptible@d-edp1:
    - shard-tglb:         [PASS][96] -> [DMESG-WARN][97] ([i915#2411] / [i915#2867])
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-tglb8/igt@kms_flip@flip-vs-suspend-interruptible@d-edp1.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb2/igt@kms_flip@flip-vs-suspend-interruptible@d-edp1.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-mmap-wc:
    - shard-glk:          [PASS][98] -> [FAIL][99] ([i915#2546])
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-glk7/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-mmap-wc.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk9/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-render:
    - shard-tglb:         NOTRUN -> [SKIP][100] ([fdo#109280] / [fdo#111825]) +1 similar issue
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-indfb-draw-blt:
    - shard-skl:          NOTRUN -> [SKIP][101] ([fdo#109271]) +76 similar issues
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl1/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-indfb-draw-blt.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - shard-skl:          NOTRUN -> [SKIP][102] ([fdo#109271] / [i915#533])
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl7/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b:
    - shard-iclb:         [PASS][103] -> [FAIL][104] ([i915#1888])
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb7/igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b.html
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb3/igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb:
    - shard-apl:          NOTRUN -> [FAIL][105] ([fdo#108145] / [i915#265]) +1 similar issue
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb.html

  * igt@kms_plane_alpha_blend@pipe-c-alpha-opaque-fb:
    - shard-skl:          NOTRUN -> [FAIL][106] ([fdo#108145] / [i915#265]) +1 similar issue
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl7/igt@kms_plane_alpha_blend@pipe-c-alpha-opaque-fb.html

  * igt@kms_psr@psr2_sprite_blt:
    - shard-iclb:         [PASS][107] -> [SKIP][108] ([fdo#109441]) +1 similar issue
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb2/igt@kms_psr@psr2_sprite_blt.html
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb7/igt@kms_psr@psr2_sprite_blt.html

  * igt@kms_setmode@basic:
    - shard-glk:          [PASS][109] -> [FAIL][110] ([i915#31])
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-glk6/igt@kms_setmode@basic.html
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk4/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend:
    - shard-skl:          [PASS][111] -> [INCOMPLETE][112] ([i915#4939])
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-skl9/igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend.html
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl4/igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend.html

  * igt@kms_vblank@pipe-b-wait-busy-hang:
    - shard-kbl:          [PASS][113] -> [INCOMPLETE][114] ([i915#794])
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-kbl4/igt@kms_vblank@pipe-b-wait-busy-hang.html
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-kbl6/igt@kms_vblank@pipe-b-wait-busy-hang.html

  * igt@kms_vblank@pipe-d-wait-idle:
    - shard-apl:          NOTRUN -> [SKIP][115] ([fdo#109271] / [i915#533])
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/igt@kms_vblank@pipe-d-wait-idle.html

  * igt@perf@per-context-mode-unprivileged:
    - shard-tglb:         NOTRUN -> [SKIP][116] ([fdo#109289])
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@perf@per-context-mode-unprivileged.html

  * igt@prime_nv_pcopy@test3_3:
    - shard-tglb:         NOTRUN -> [SKIP][117] ([fdo#109291])
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb1/igt@prime_nv_pcopy@test3_3.html

  * igt@sysfs_clients@recycle-many:
    - shard-apl:          NOTRUN -> [SKIP][118] ([fdo#109271] / [i915#2994])
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl1/igt@sysfs_clients@recycle-many.html

  * igt@sysfs_clients@split-10:
    - shard-skl:          NOTRUN -> [SKIP][119] ([fdo#109271] / [i915#2994])
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl7/igt@sysfs_clients@split-10.html

  
#### Possible fixes ####

  * igt@gem_exec_balancer@parallel-bb-first:
    - shard-iclb:         [SKIP][120] ([i915#4525]) -> [PASS][121]
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb6/igt@gem_exec_balancer@parallel-bb-first.html
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb2/igt@gem_exec_balancer@parallel-bb-first.html

  * igt@gem_exec_endless@dispatch@vcs0:
    - shard-tglb:         [INCOMPLETE][122] ([i915#3778]) -> [PASS][123]
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-tglb5/igt@gem_exec_endless@dispatch@vcs0.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb2/igt@gem_exec_endless@dispatch@vcs0.html

  * igt@gem_exec_fair@basic-none@vcs0:
    - shard-kbl:          [FAIL][124] ([i915#2842]) -> [PASS][125] +1 similar issue
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-kbl7/igt@gem_exec_fair@basic-none@vcs0.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-kbl1/igt@gem_exec_fair@basic-none@vcs0.html
    - shard-apl:          [FAIL][126] ([i915#2842]) -> [PASS][127]
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl6/igt@gem_exec_fair@basic-none@vcs0.html
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl4/igt@gem_exec_fair@basic-none@vcs0.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
    - shard-glk:          [FAIL][128] ([i915#2842]) -> [PASS][129]
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-glk9/igt@gem_exec_fair@basic-pace-share@rcs0.html
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk3/igt@gem_exec_fair@basic-pace-share@rcs0.html

  * igt@gem_exec_fair@basic-pace@vecs0:
    - shard-iclb:         [FAIL][130] ([i915#2842]) -> [PASS][131]
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb5/igt@gem_exec_fair@basic-pace@vecs0.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb2/igt@gem_exec_fair@basic-pace@vecs0.html

  * igt@gem_exec_schedule@smoketest@vecs0:
    - shard-skl:          [DMESG-WARN][132] ([i915#1982]) -> [PASS][133]
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-skl1/igt@gem_exec_schedule@smoketest@vecs0.html
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl8/igt@gem_exec_schedule@smoketest@vecs0.html

  * igt@gem_ppgtt@flink-and-close-vma-leak:
    - shard-apl:          [FAIL][134] ([i915#644]) -> [PASS][135]
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl4/igt@gem_ppgtt@flink-and-close-vma-leak.html
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl6/igt@gem_ppgtt@flink-and-close-vma-leak.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-glk:          [DMESG-WARN][136] ([i915#1436] / [i915#716]) -> [PASS][137]
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-glk2/igt@gen9_exec_parse@allowed-all.html
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk2/igt@gen9_exec_parse@allowed-all.html

  * igt@i915_suspend@fence-restore-untiled:
    - shard-apl:          [DMESG-WARN][138] ([i915#180]) -> [PASS][139] +1 similar issue
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-apl3/igt@i915_suspend@fence-restore-untiled.html
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-apl8/igt@i915_suspend@fence-restore-untiled.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-skl:          [INCOMPLETE][140] ([i915#2828] / [i915#300]) -> [PASS][141]
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-skl1/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl1/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-downscaling:
    - shard-iclb:         [SKIP][142] ([i915#3701]) -> [PASS][143] +1 similar issue
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb2/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-downscaling.html
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb7/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-downscaling.html

  * igt@kms_hdr@bpc-switch-suspend:
    - shard-skl:          [FAIL][144] ([i915#1188]) -> [PASS][145]
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-skl9/igt@kms_hdr@bpc-switch-suspend.html
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl7/igt@kms_hdr@bpc-switch-suspend.html

  * igt@kms_psr@psr2_cursor_plane_onoff:
    - shard-iclb:         [SKIP][146] ([fdo#109441]) -> [PASS][147]
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb5/igt@kms_psr@psr2_cursor_plane_onoff.html
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb2/igt@kms_psr@psr2_cursor_plane_onoff.html

  * igt@kms_rmfb@close-fd:
    - shard-glk:          [DMESG-WARN][148] ([i915#118]) -> [PASS][149]
   [148]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-glk8/igt@kms_rmfb@close-fd.html
   [149]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-glk8/igt@kms_rmfb@close-fd.html

  * igt@perf@polling-small-buf:
    - shard-skl:          [FAIL][150] ([i915#1722]) -> [PASS][151]
   [150]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-skl6/igt@perf@polling-small-buf.html
   [151]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-skl10/igt@perf@polling-small-buf.html

  
#### Warnings ####

  * igt@gem_eio@unwedge-stress:
    - shard-tglb:         [FAIL][152] ([i915#232]) -> [TIMEOUT][153] ([i915#3063] / [i915#3648])
   [152]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-tglb1/igt@gem_eio@unwedge-stress.html
   [153]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-tglb7/igt@gem_eio@unwedge-stress.html

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-iclb:         [SKIP][154] ([i915#588]) -> [SKIP][155] ([i915#658])
   [154]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html
   [155]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb7/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@i915_pm_rc6_residency@rc6-idle:
    - shard-iclb:         [WARN][156] ([i915#1804] / [i915#2684]) -> [WARN][157] ([i915#2684])
   [156]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb4/igt@i915_pm_rc6_residency@rc6-idle.html
   [157]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb1/igt@i915_pm_rc6_residency@rc6-idle.html

  * igt@kms_psr2_sf@overlay-plane-update-continuous-sf:
    - shard-iclb:         [SKIP][158] ([fdo#111068] / [i915#658]) -> [SKIP][159] ([i915#2920])
   [158]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb6/igt@kms_psr2_sf@overlay-plane-update-continuous-sf.html
   [159]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb2/igt@kms_psr2_sf@overlay-plane-update-continuous-sf.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area:
    - shard-iclb:         [SKIP][160] ([i915#2920]) -> [SKIP][161] ([fdo#111068] / [i915#658])
   [160]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb2/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html
   [161]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb7/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html

  * igt@kms_psr2_su@page_flip-p010:
    - shard-iclb:         [SKIP][162] ([fdo#109642] / [fdo#111068] / [i915#658]) -> [FAIL][163] ([i915#4148])
   [162]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb5/igt@kms_psr2_su@page_flip-p010.html
   [163]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb2/igt@kms_psr2_su@page_flip-p010.html

  * igt@kms_psr2_su@page_flip-xrgb8888:
    - shard-iclb:         [FAIL][164] ([i915#4148]) -> [SKIP][165] ([fdo#109642] / [fdo#111068] / [i915#658])
   [164]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11151/shard-iclb2/igt@kms_psr2_su@page_flip-xrgb8888.html
   [165]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/shard-iclb7/igt@kms_psr2_su@page_flip-xrgb8888.html

  
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109274]: https://bugs.freedesktop.org/show_bug.cgi?id=109274
  [fdo#109280]: https://bugs.freedesktop.org/show_bug.cgi?id=109280
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109291]: https://bugs.freedesktop.org/show_bug.cgi?id=109291
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111614]: https://bugs.freedesktop.org/show_bug.cgi?id=111614
  [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1063]: https://gitlab.freedesktop.org/drm/intel/issues/1063
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1319]: https://gitlab.freedesktop.org/drm/intel/issues/1319
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#1722]: https://gitlab.freedesktop.org/drm/intel/issues/1722
  [i915#180]: h

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22128/index.html

[-- Attachment #2: Type: text/html, Size: 34849 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
  2022-01-27 14:11   ` [Intel-gfx] " Arunpravin
@ 2022-01-28 14:18     ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2022-01-28 14:18 UTC (permalink / raw)
  To: Arunpravin
  Cc: Intel Graphics Development, amd-gfx list, ML dri-devel,
	tzimmermann, alexander.deucher, Christian König,
	Matthew Auld

On Thu, 27 Jan 2022 at 14:11, Arunpravin
<Arunpravin.PaneerSelvam@amd.com> wrote:
>
> - Remove drm_mm references and replace with drm buddy functionalities
> - Add res cursor support for drm buddy
>
> v2(Matthew Auld):
>   - replace spinlock with mutex as we call kmem_cache_zalloc
>     (..., GFP_KERNEL) in drm_buddy_alloc() function
>
>   - lock drm_buddy_block_trim() function as it calls
>     mark_free/mark_split are all globally visible
>
> v3(Matthew Auld):
>   - remove trim method error handling as we address the failure case
>     at drm_buddy_block_trim() function
>
> v4:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> v5:
>   - fix merge conflict issue
>
> v6:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>  drivers/gpu/drm/Kconfig                       |   1 +
>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>  4 files changed, 231 insertions(+), 133 deletions(-)

<snip>

>
> -/**
> - * amdgpu_vram_mgr_virt_start - update virtual start address
> - *
> - * @mem: ttm_resource to update
> - * @node: just allocated node
> - *
> - * Calculate a virtual BO start address to easily check if everything is CPU
> - * accessible.
> - */
> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
> -                                      struct drm_mm_node *node)
> -{
> -       unsigned long start;
> -
> -       start = node->start + node->size;
> -       if (start > mem->num_pages)
> -               start -= mem->num_pages;
> -       else
> -               start = 0;
> -       mem->start = max(mem->start, start);
> -}
> -
>  /**
>   * amdgpu_vram_mgr_new - allocate new ranges
>   *
> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                                const struct ttm_place *place,
>                                struct ttm_resource **res)
>  {
> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
> -       struct ttm_range_mgr_node *node;
> -       struct drm_mm *mm = &mgr->mm;
> -       enum drm_mm_insert_mode mode;
> +       struct amdgpu_vram_mgr_node *node;
> +       struct drm_buddy *mm = &mgr->mm;
> +       struct drm_buddy_block *block;
>         unsigned i;
>         int r;
>
> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                 goto error_sub;
>         }
>
> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>                 pages_per_node = ~0ul;
> -               num_nodes = 1;
> -       } else {
> +       else {
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>                 pages_per_node = HPAGE_PMD_NR;
>  #else
> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>  #endif
>                 pages_per_node = max_t(uint32_t, pages_per_node,
>                                        tbo->page_alignment);
> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>         }
>
> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
> -                       GFP_KERNEL | __GFP_ZERO);
> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>         if (!node) {
>                 r = -ENOMEM;
>                 goto error_sub;
> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>
>         ttm_resource_init(tbo, place, &node->base);
>
> -       mode = DRM_MM_INSERT_BEST;
> +       INIT_LIST_HEAD(&node->blocks);
> +
>         if (place->flags & TTM_PL_FLAG_TOPDOWN)
> -               mode = DRM_MM_INSERT_HIGH;
> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
> +
> +       if (place->fpfn || lpfn != man->size)
> +               /* Allocate blocks in desired range */
> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
> +       min_page_size = mgr->default_page_size;
> +       BUG_ON(min_page_size < mm->chunk_size);
>
>         pages_left = node->base.num_pages;
>
> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>         pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>
>         i = 0;
> -       spin_lock(&mgr->lock);
>         while (pages_left) {
> -               uint32_t alignment = tbo->page_alignment;
> -
>                 if (pages >= pages_per_node)
> -                       alignment = pages_per_node;
> -
> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
> -                                               alignment, 0, place->fpfn,
> -                                               lpfn, mode);
> -               if (unlikely(r)) {
> -                       if (pages > pages_per_node) {
> -                               if (is_power_of_2(pages))
> -                                       pages = pages / 2;
> -                               else
> -                                       pages = rounddown_pow_of_two(pages);
> -                               continue;
> -                       }
> -                       goto error_free;
> +                       pages = pages_per_node;
> +
> +               n_pages = pages;
> +
> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +                       n_pages = roundup_pow_of_two(n_pages);
> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
> +
> +                       if (n_pages > lpfn)
> +                               lpfn = n_pages;
>                 }
>
> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
> +               mutex_lock(&mgr->lock);
> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> +                                          (u64)lpfn << PAGE_SHIFT,
> +                                          (u64)n_pages << PAGE_SHIFT,
> +                                          min_page_size,
> +                                          &node->blocks,
> +                                          node->flags);
> +               mutex_unlock(&mgr->lock);
> +               if (unlikely(r))
> +                       goto error_free_blocks;
> +
>                 pages_left -= pages;
>                 ++i;
>
>                 if (pages > pages_left)
>                         pages = pages_left;
>         }
> -       spin_unlock(&mgr->lock);
> +
> +       /* Free unused pages for contiguous allocation */
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
> +
> +               mutex_lock(&mgr->lock);
> +               drm_buddy_block_trim(mm,
> +                                    actual_size,
> +                                    &node->blocks);
> +               mutex_unlock(&mgr->lock);
> +       }
> +
> +       list_for_each_entry(block, &node->blocks, link)
> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
> +
> +       block = list_first_entry_or_null(&node->blocks,
> +                                        struct drm_buddy_block,
> +                                        link);
> +       if (!block) {
> +               r = -ENOENT;
> +               goto error_free_res;
> +       }
> +
> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;

Hmm, does this work? It looks like there are various places checking
that res->start + res->num_pages <= visible_size, which IIUC should
only return true when the entire object is placed in the mappable
portion. i915 is doing something similar. Also it looks like
ttm_resource_compat() is potentially relying on this, like when moving
something from non-mappable -> mappable in
amdgpu_bo_fault_reserve_notify()?

Perhaps something like:

if (vis_usage == num_pages)
    base.start = 0;
else
    base.start = visible_size;

Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
@ 2022-01-28 14:18     ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2022-01-28 14:18 UTC (permalink / raw)
  To: Arunpravin
  Cc: Intel Graphics Development, amd-gfx list, ML dri-devel,
	tzimmermann, alexander.deucher, Christian König,
	Matthew Auld

On Thu, 27 Jan 2022 at 14:11, Arunpravin
<Arunpravin.PaneerSelvam@amd.com> wrote:
>
> - Remove drm_mm references and replace with drm buddy functionalities
> - Add res cursor support for drm buddy
>
> v2(Matthew Auld):
>   - replace spinlock with mutex as we call kmem_cache_zalloc
>     (..., GFP_KERNEL) in drm_buddy_alloc() function
>
>   - lock drm_buddy_block_trim() function as it calls
>     mark_free/mark_split are all globally visible
>
> v3(Matthew Auld):
>   - remove trim method error handling as we address the failure case
>     at drm_buddy_block_trim() function
>
> v4:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> v5:
>   - fix merge conflict issue
>
> v6:
>   - fix warnings reported by kernel test robot <lkp@intel.com>
>
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>  drivers/gpu/drm/Kconfig                       |   1 +
>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>  4 files changed, 231 insertions(+), 133 deletions(-)

<snip>

>
> -/**
> - * amdgpu_vram_mgr_virt_start - update virtual start address
> - *
> - * @mem: ttm_resource to update
> - * @node: just allocated node
> - *
> - * Calculate a virtual BO start address to easily check if everything is CPU
> - * accessible.
> - */
> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
> -                                      struct drm_mm_node *node)
> -{
> -       unsigned long start;
> -
> -       start = node->start + node->size;
> -       if (start > mem->num_pages)
> -               start -= mem->num_pages;
> -       else
> -               start = 0;
> -       mem->start = max(mem->start, start);
> -}
> -
>  /**
>   * amdgpu_vram_mgr_new - allocate new ranges
>   *
> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                                const struct ttm_place *place,
>                                struct ttm_resource **res)
>  {
> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
> -       struct ttm_range_mgr_node *node;
> -       struct drm_mm *mm = &mgr->mm;
> -       enum drm_mm_insert_mode mode;
> +       struct amdgpu_vram_mgr_node *node;
> +       struct drm_buddy *mm = &mgr->mm;
> +       struct drm_buddy_block *block;
>         unsigned i;
>         int r;
>
> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>                 goto error_sub;
>         }
>
> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>                 pages_per_node = ~0ul;
> -               num_nodes = 1;
> -       } else {
> +       else {
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>                 pages_per_node = HPAGE_PMD_NR;
>  #else
> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>  #endif
>                 pages_per_node = max_t(uint32_t, pages_per_node,
>                                        tbo->page_alignment);
> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>         }
>
> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
> -                       GFP_KERNEL | __GFP_ZERO);
> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>         if (!node) {
>                 r = -ENOMEM;
>                 goto error_sub;
> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>
>         ttm_resource_init(tbo, place, &node->base);
>
> -       mode = DRM_MM_INSERT_BEST;
> +       INIT_LIST_HEAD(&node->blocks);
> +
>         if (place->flags & TTM_PL_FLAG_TOPDOWN)
> -               mode = DRM_MM_INSERT_HIGH;
> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
> +
> +       if (place->fpfn || lpfn != man->size)
> +               /* Allocate blocks in desired range */
> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
> +       min_page_size = mgr->default_page_size;
> +       BUG_ON(min_page_size < mm->chunk_size);
>
>         pages_left = node->base.num_pages;
>
> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>         pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>
>         i = 0;
> -       spin_lock(&mgr->lock);
>         while (pages_left) {
> -               uint32_t alignment = tbo->page_alignment;
> -
>                 if (pages >= pages_per_node)
> -                       alignment = pages_per_node;
> -
> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
> -                                               alignment, 0, place->fpfn,
> -                                               lpfn, mode);
> -               if (unlikely(r)) {
> -                       if (pages > pages_per_node) {
> -                               if (is_power_of_2(pages))
> -                                       pages = pages / 2;
> -                               else
> -                                       pages = rounddown_pow_of_two(pages);
> -                               continue;
> -                       }
> -                       goto error_free;
> +                       pages = pages_per_node;
> +
> +               n_pages = pages;
> +
> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +                       n_pages = roundup_pow_of_two(n_pages);
> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
> +
> +                       if (n_pages > lpfn)
> +                               lpfn = n_pages;
>                 }
>
> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
> +               mutex_lock(&mgr->lock);
> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> +                                          (u64)lpfn << PAGE_SHIFT,
> +                                          (u64)n_pages << PAGE_SHIFT,
> +                                          min_page_size,
> +                                          &node->blocks,
> +                                          node->flags);
> +               mutex_unlock(&mgr->lock);
> +               if (unlikely(r))
> +                       goto error_free_blocks;
> +
>                 pages_left -= pages;
>                 ++i;
>
>                 if (pages > pages_left)
>                         pages = pages_left;
>         }
> -       spin_unlock(&mgr->lock);
> +
> +       /* Free unused pages for contiguous allocation */
> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
> +
> +               mutex_lock(&mgr->lock);
> +               drm_buddy_block_trim(mm,
> +                                    actual_size,
> +                                    &node->blocks);
> +               mutex_unlock(&mgr->lock);
> +       }
> +
> +       list_for_each_entry(block, &node->blocks, link)
> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
> +
> +       block = list_first_entry_or_null(&node->blocks,
> +                                        struct drm_buddy_block,
> +                                        link);
> +       if (!block) {
> +               r = -ENOENT;
> +               goto error_free_res;
> +       }
> +
> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;

Hmm, does this work? It looks like there are various places checking
that res->start + res->num_pages <= visible_size, which IIUC should
only return true when the entire object is placed in the mappable
portion. i915 is doing something similar. Also it looks like
ttm_resource_compat() is potentially relying on this, like when moving
something from non-mappable -> mappable in
amdgpu_bo_fault_reserve_notify()?

Perhaps something like:

if (vis_usage == num_pages)
    base.start = 0;
else
    base.start = visible_size;

Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
  2022-01-28 14:18     ` [Intel-gfx] " Matthew Auld
@ 2022-02-04 11:22       ` Arunpravin
  -1 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-02-04 11:22 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Intel Graphics Development, amd-gfx list, ML dri-devel,
	tzimmermann, alexander.deucher, Christian König,
	Matthew Auld



On 28/01/22 7:48 pm, Matthew Auld wrote:
> On Thu, 27 Jan 2022 at 14:11, Arunpravin
> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>
>> - Remove drm_mm references and replace with drm buddy functionalities
>> - Add res cursor support for drm buddy
>>
>> v2(Matthew Auld):
>>   - replace spinlock with mutex as we call kmem_cache_zalloc
>>     (..., GFP_KERNEL) in drm_buddy_alloc() function
>>
>>   - lock drm_buddy_block_trim() function as it calls
>>     mark_free/mark_split are all globally visible
>>
>> v3(Matthew Auld):
>>   - remove trim method error handling as we address the failure case
>>     at drm_buddy_block_trim() function
>>
>> v4:
>>   - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> v5:
>>   - fix merge conflict issue
>>
>> v6:
>>   - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>  drivers/gpu/drm/Kconfig                       |   1 +
>>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>  4 files changed, 231 insertions(+), 133 deletions(-)
> 
> <snip>
> 
>>
>> -/**
>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>> - *
>> - * @mem: ttm_resource to update
>> - * @node: just allocated node
>> - *
>> - * Calculate a virtual BO start address to easily check if everything is CPU
>> - * accessible.
>> - */
>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>> -                                      struct drm_mm_node *node)
>> -{
>> -       unsigned long start;
>> -
>> -       start = node->start + node->size;
>> -       if (start > mem->num_pages)
>> -               start -= mem->num_pages;
>> -       else
>> -               start = 0;
>> -       mem->start = max(mem->start, start);
>> -}
>> -
>>  /**
>>   * amdgpu_vram_mgr_new - allocate new ranges
>>   *
>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>                                const struct ttm_place *place,
>>                                struct ttm_resource **res)
>>  {
>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>> -       struct ttm_range_mgr_node *node;
>> -       struct drm_mm *mm = &mgr->mm;
>> -       enum drm_mm_insert_mode mode;
>> +       struct amdgpu_vram_mgr_node *node;
>> +       struct drm_buddy *mm = &mgr->mm;
>> +       struct drm_buddy_block *block;
>>         unsigned i;
>>         int r;
>>
>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>                 goto error_sub;
>>         }
>>
>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>                 pages_per_node = ~0ul;
>> -               num_nodes = 1;
>> -       } else {
>> +       else {
>>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>                 pages_per_node = HPAGE_PMD_NR;
>>  #else
>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>  #endif
>>                 pages_per_node = max_t(uint32_t, pages_per_node,
>>                                        tbo->page_alignment);
>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>         }
>>
>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>> -                       GFP_KERNEL | __GFP_ZERO);
>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>         if (!node) {
>>                 r = -ENOMEM;
>>                 goto error_sub;
>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>
>>         ttm_resource_init(tbo, place, &node->base);
>>
>> -       mode = DRM_MM_INSERT_BEST;
>> +       INIT_LIST_HEAD(&node->blocks);
>> +
>>         if (place->flags & TTM_PL_FLAG_TOPDOWN)
>> -               mode = DRM_MM_INSERT_HIGH;
>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>> +
>> +       if (place->fpfn || lpfn != man->size)
>> +               /* Allocate blocks in desired range */
>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>> +       min_page_size = mgr->default_page_size;
>> +       BUG_ON(min_page_size < mm->chunk_size);
>>
>>         pages_left = node->base.num_pages;
>>
>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>         pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>
>>         i = 0;
>> -       spin_lock(&mgr->lock);
>>         while (pages_left) {
>> -               uint32_t alignment = tbo->page_alignment;
>> -
>>                 if (pages >= pages_per_node)
>> -                       alignment = pages_per_node;
>> -
>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>> -                                               alignment, 0, place->fpfn,
>> -                                               lpfn, mode);
>> -               if (unlikely(r)) {
>> -                       if (pages > pages_per_node) {
>> -                               if (is_power_of_2(pages))
>> -                                       pages = pages / 2;
>> -                               else
>> -                                       pages = rounddown_pow_of_two(pages);
>> -                               continue;
>> -                       }
>> -                       goto error_free;
>> +                       pages = pages_per_node;
>> +
>> +               n_pages = pages;
>> +
>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +                       n_pages = roundup_pow_of_two(n_pages);
>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>> +
>> +                       if (n_pages > lpfn)
>> +                               lpfn = n_pages;
>>                 }
>>
>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>> +               mutex_lock(&mgr->lock);
>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>> +                                          (u64)lpfn << PAGE_SHIFT,
>> +                                          (u64)n_pages << PAGE_SHIFT,
>> +                                          min_page_size,
>> +                                          &node->blocks,
>> +                                          node->flags);
>> +               mutex_unlock(&mgr->lock);
>> +               if (unlikely(r))
>> +                       goto error_free_blocks;
>> +
>>                 pages_left -= pages;
>>                 ++i;
>>
>>                 if (pages > pages_left)
>>                         pages = pages_left;
>>         }
>> -       spin_unlock(&mgr->lock);
>> +
>> +       /* Free unused pages for contiguous allocation */
>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>> +
>> +               mutex_lock(&mgr->lock);
>> +               drm_buddy_block_trim(mm,
>> +                                    actual_size,
>> +                                    &node->blocks);
>> +               mutex_unlock(&mgr->lock);
>> +       }
>> +
>> +       list_for_each_entry(block, &node->blocks, link)
>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>> +
>> +       block = list_first_entry_or_null(&node->blocks,
>> +                                        struct drm_buddy_block,
>> +                                        link);
>> +       if (!block) {
>> +               r = -ENOENT;
>> +               goto error_free_res;
>> +       }
>> +
>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
> 
> Hmm, does this work? It looks like there are various places checking
> that res->start + res->num_pages <= visible_size, which IIUC should
> only return true when the entire object is placed in the mappable
> portion. i915 is doing something similar. Also it looks like
> ttm_resource_compat() is potentially relying on this, like when moving
> something from non-mappable -> mappable in
> amdgpu_bo_fault_reserve_notify()?
> 
> Perhaps something like:
> 
> if (vis_usage == num_pages)
>     base.start = 0;
> else
>     base.start = visible_size;
> 
> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
> 

hmm, I wonder how it works, may be we didn't go through the corner case
where res->start + res->num_pages > visible_size

in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
set the ttm_place.lpfn = visible_pfn at
amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
alloc_range_bias() in drm_buddy.c.

Here we get blocks chained together in random order complying
visible_pfn range. say for instance num_pages = 13
we may get,
Block 1 addr - 500 (order-3)
Block 2 addr - 400 (order-2)
Block 3 addr - 600 (order-0)

I think currently base.start = Block 1 start address fetched from the
list and the address 500 assigned to it, which is good for the resource
access since we access the blocks using the list link

But for the check res->start + res->num_pages <= visible_size in few
places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
doesn't work since the function looks for nodes in continuous address to
calculate the start address. AFAIK, assigning the start address (400 +
num_pages <= visible_size) mislead in our case since we use linked list

how about replacing the check with a bool type return function which
checks the each block start address + block size <= visible_size?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
@ 2022-02-04 11:22       ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-02-04 11:22 UTC (permalink / raw)
  To: Matthew Auld
  Cc: Intel Graphics Development, amd-gfx list, ML dri-devel,
	tzimmermann, alexander.deucher, Christian König,
	Matthew Auld



On 28/01/22 7:48 pm, Matthew Auld wrote:
> On Thu, 27 Jan 2022 at 14:11, Arunpravin
> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>
>> - Remove drm_mm references and replace with drm buddy functionalities
>> - Add res cursor support for drm buddy
>>
>> v2(Matthew Auld):
>>   - replace spinlock with mutex as we call kmem_cache_zalloc
>>     (..., GFP_KERNEL) in drm_buddy_alloc() function
>>
>>   - lock drm_buddy_block_trim() function as it calls
>>     mark_free/mark_split are all globally visible
>>
>> v3(Matthew Auld):
>>   - remove trim method error handling as we address the failure case
>>     at drm_buddy_block_trim() function
>>
>> v4:
>>   - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> v5:
>>   - fix merge conflict issue
>>
>> v6:
>>   - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>  drivers/gpu/drm/Kconfig                       |   1 +
>>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>  4 files changed, 231 insertions(+), 133 deletions(-)
> 
> <snip>
> 
>>
>> -/**
>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>> - *
>> - * @mem: ttm_resource to update
>> - * @node: just allocated node
>> - *
>> - * Calculate a virtual BO start address to easily check if everything is CPU
>> - * accessible.
>> - */
>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>> -                                      struct drm_mm_node *node)
>> -{
>> -       unsigned long start;
>> -
>> -       start = node->start + node->size;
>> -       if (start > mem->num_pages)
>> -               start -= mem->num_pages;
>> -       else
>> -               start = 0;
>> -       mem->start = max(mem->start, start);
>> -}
>> -
>>  /**
>>   * amdgpu_vram_mgr_new - allocate new ranges
>>   *
>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>                                const struct ttm_place *place,
>>                                struct ttm_resource **res)
>>  {
>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>> -       struct ttm_range_mgr_node *node;
>> -       struct drm_mm *mm = &mgr->mm;
>> -       enum drm_mm_insert_mode mode;
>> +       struct amdgpu_vram_mgr_node *node;
>> +       struct drm_buddy *mm = &mgr->mm;
>> +       struct drm_buddy_block *block;
>>         unsigned i;
>>         int r;
>>
>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>                 goto error_sub;
>>         }
>>
>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>                 pages_per_node = ~0ul;
>> -               num_nodes = 1;
>> -       } else {
>> +       else {
>>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>                 pages_per_node = HPAGE_PMD_NR;
>>  #else
>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>  #endif
>>                 pages_per_node = max_t(uint32_t, pages_per_node,
>>                                        tbo->page_alignment);
>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>         }
>>
>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>> -                       GFP_KERNEL | __GFP_ZERO);
>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>         if (!node) {
>>                 r = -ENOMEM;
>>                 goto error_sub;
>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>
>>         ttm_resource_init(tbo, place, &node->base);
>>
>> -       mode = DRM_MM_INSERT_BEST;
>> +       INIT_LIST_HEAD(&node->blocks);
>> +
>>         if (place->flags & TTM_PL_FLAG_TOPDOWN)
>> -               mode = DRM_MM_INSERT_HIGH;
>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>> +
>> +       if (place->fpfn || lpfn != man->size)
>> +               /* Allocate blocks in desired range */
>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>> +       min_page_size = mgr->default_page_size;
>> +       BUG_ON(min_page_size < mm->chunk_size);
>>
>>         pages_left = node->base.num_pages;
>>
>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>         pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>
>>         i = 0;
>> -       spin_lock(&mgr->lock);
>>         while (pages_left) {
>> -               uint32_t alignment = tbo->page_alignment;
>> -
>>                 if (pages >= pages_per_node)
>> -                       alignment = pages_per_node;
>> -
>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>> -                                               alignment, 0, place->fpfn,
>> -                                               lpfn, mode);
>> -               if (unlikely(r)) {
>> -                       if (pages > pages_per_node) {
>> -                               if (is_power_of_2(pages))
>> -                                       pages = pages / 2;
>> -                               else
>> -                                       pages = rounddown_pow_of_two(pages);
>> -                               continue;
>> -                       }
>> -                       goto error_free;
>> +                       pages = pages_per_node;
>> +
>> +               n_pages = pages;
>> +
>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +                       n_pages = roundup_pow_of_two(n_pages);
>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>> +
>> +                       if (n_pages > lpfn)
>> +                               lpfn = n_pages;
>>                 }
>>
>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>> +               mutex_lock(&mgr->lock);
>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>> +                                          (u64)lpfn << PAGE_SHIFT,
>> +                                          (u64)n_pages << PAGE_SHIFT,
>> +                                          min_page_size,
>> +                                          &node->blocks,
>> +                                          node->flags);
>> +               mutex_unlock(&mgr->lock);
>> +               if (unlikely(r))
>> +                       goto error_free_blocks;
>> +
>>                 pages_left -= pages;
>>                 ++i;
>>
>>                 if (pages > pages_left)
>>                         pages = pages_left;
>>         }
>> -       spin_unlock(&mgr->lock);
>> +
>> +       /* Free unused pages for contiguous allocation */
>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>> +
>> +               mutex_lock(&mgr->lock);
>> +               drm_buddy_block_trim(mm,
>> +                                    actual_size,
>> +                                    &node->blocks);
>> +               mutex_unlock(&mgr->lock);
>> +       }
>> +
>> +       list_for_each_entry(block, &node->blocks, link)
>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>> +
>> +       block = list_first_entry_or_null(&node->blocks,
>> +                                        struct drm_buddy_block,
>> +                                        link);
>> +       if (!block) {
>> +               r = -ENOENT;
>> +               goto error_free_res;
>> +       }
>> +
>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
> 
> Hmm, does this work? It looks like there are various places checking
> that res->start + res->num_pages <= visible_size, which IIUC should
> only return true when the entire object is placed in the mappable
> portion. i915 is doing something similar. Also it looks like
> ttm_resource_compat() is potentially relying on this, like when moving
> something from non-mappable -> mappable in
> amdgpu_bo_fault_reserve_notify()?
> 
> Perhaps something like:
> 
> if (vis_usage == num_pages)
>     base.start = 0;
> else
>     base.start = visible_size;
> 
> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
> 

hmm, I wonder how it works, may be we didn't go through the corner case
where res->start + res->num_pages > visible_size

in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
set the ttm_place.lpfn = visible_pfn at
amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
alloc_range_bias() in drm_buddy.c.

Here we get blocks chained together in random order complying
visible_pfn range. say for instance num_pages = 13
we may get,
Block 1 addr - 500 (order-3)
Block 2 addr - 400 (order-2)
Block 3 addr - 600 (order-0)

I think currently base.start = Block 1 start address fetched from the
list and the address 500 assigned to it, which is good for the resource
access since we access the blocks using the list link

But for the check res->start + res->num_pages <= visible_size in few
places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
doesn't work since the function looks for nodes in continuous address to
calculate the start address. AFAIK, assigning the start address (400 +
num_pages <= visible_size) mislead in our case since we use linked list

how about replacing the check with a bool type return function which
checks the each block start address + block size <= visible_size?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
  2022-02-04 11:22       ` [Intel-gfx] " Arunpravin
@ 2022-02-04 13:23         ` Christian König
  -1 siblings, 0 replies; 34+ messages in thread
From: Christian König @ 2022-02-04 13:23 UTC (permalink / raw)
  To: Arunpravin, Matthew Auld
  Cc: Intel Graphics Development, ML dri-devel, amd-gfx list,
	tzimmermann, alexander.deucher, Christian König,
	Matthew Auld

Am 04.02.22 um 12:22 schrieb Arunpravin:
> On 28/01/22 7:48 pm, Matthew Auld wrote:
>> On Thu, 27 Jan 2022 at 14:11, Arunpravin
>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>> - Remove drm_mm references and replace with drm buddy functionalities
>>> - Add res cursor support for drm buddy
>>>
>>> v2(Matthew Auld):
>>>    - replace spinlock with mutex as we call kmem_cache_zalloc
>>>      (..., GFP_KERNEL) in drm_buddy_alloc() function
>>>
>>>    - lock drm_buddy_block_trim() function as it calls
>>>      mark_free/mark_split are all globally visible
>>>
>>> v3(Matthew Auld):
>>>    - remove trim method error handling as we address the failure case
>>>      at drm_buddy_block_trim() function
>>>
>>> v4:
>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>
>>> v5:
>>>    - fix merge conflict issue
>>>
>>> v6:
>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>
>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>> ---
>>>   drivers/gpu/drm/Kconfig                       |   1 +
>>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>>   4 files changed, 231 insertions(+), 133 deletions(-)
>> <snip>
>>
>>> -/**
>>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>>> - *
>>> - * @mem: ttm_resource to update
>>> - * @node: just allocated node
>>> - *
>>> - * Calculate a virtual BO start address to easily check if everything is CPU
>>> - * accessible.
>>> - */
>>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>> -                                      struct drm_mm_node *node)
>>> -{
>>> -       unsigned long start;
>>> -
>>> -       start = node->start + node->size;
>>> -       if (start > mem->num_pages)
>>> -               start -= mem->num_pages;
>>> -       else
>>> -               start = 0;
>>> -       mem->start = max(mem->start, start);
>>> -}
>>> -
>>>   /**
>>>    * amdgpu_vram_mgr_new - allocate new ranges
>>>    *
>>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>                                 const struct ttm_place *place,
>>>                                 struct ttm_resource **res)
>>>   {
>>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>>          struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>          struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>> -       struct ttm_range_mgr_node *node;
>>> -       struct drm_mm *mm = &mgr->mm;
>>> -       enum drm_mm_insert_mode mode;
>>> +       struct amdgpu_vram_mgr_node *node;
>>> +       struct drm_buddy *mm = &mgr->mm;
>>> +       struct drm_buddy_block *block;
>>>          unsigned i;
>>>          int r;
>>>
>>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>                  goto error_sub;
>>>          }
>>>
>>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>                  pages_per_node = ~0ul;
>>> -               num_nodes = 1;
>>> -       } else {
>>> +       else {
>>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>                  pages_per_node = HPAGE_PMD_NR;
>>>   #else
>>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>   #endif
>>>                  pages_per_node = max_t(uint32_t, pages_per_node,
>>>                                         tbo->page_alignment);
>>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>>          }
>>>
>>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>>> -                       GFP_KERNEL | __GFP_ZERO);
>>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>>          if (!node) {
>>>                  r = -ENOMEM;
>>>                  goto error_sub;
>>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>
>>>          ttm_resource_init(tbo, place, &node->base);
>>>
>>> -       mode = DRM_MM_INSERT_BEST;
>>> +       INIT_LIST_HEAD(&node->blocks);
>>> +
>>>          if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>> -               mode = DRM_MM_INSERT_HIGH;
>>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>>> +
>>> +       if (place->fpfn || lpfn != man->size)
>>> +               /* Allocate blocks in desired range */
>>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>> +
>>> +       min_page_size = mgr->default_page_size;
>>> +       BUG_ON(min_page_size < mm->chunk_size);
>>>
>>>          pages_left = node->base.num_pages;
>>>
>>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>          pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>>
>>>          i = 0;
>>> -       spin_lock(&mgr->lock);
>>>          while (pages_left) {
>>> -               uint32_t alignment = tbo->page_alignment;
>>> -
>>>                  if (pages >= pages_per_node)
>>> -                       alignment = pages_per_node;
>>> -
>>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>>> -                                               alignment, 0, place->fpfn,
>>> -                                               lpfn, mode);
>>> -               if (unlikely(r)) {
>>> -                       if (pages > pages_per_node) {
>>> -                               if (is_power_of_2(pages))
>>> -                                       pages = pages / 2;
>>> -                               else
>>> -                                       pages = rounddown_pow_of_two(pages);
>>> -                               continue;
>>> -                       }
>>> -                       goto error_free;
>>> +                       pages = pages_per_node;
>>> +
>>> +               n_pages = pages;
>>> +
>>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> +                       n_pages = roundup_pow_of_two(n_pages);
>>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>>> +
>>> +                       if (n_pages > lpfn)
>>> +                               lpfn = n_pages;
>>>                  }
>>>
>>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>>> +               mutex_lock(&mgr->lock);
>>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>>> +                                          (u64)lpfn << PAGE_SHIFT,
>>> +                                          (u64)n_pages << PAGE_SHIFT,
>>> +                                          min_page_size,
>>> +                                          &node->blocks,
>>> +                                          node->flags);
>>> +               mutex_unlock(&mgr->lock);
>>> +               if (unlikely(r))
>>> +                       goto error_free_blocks;
>>> +
>>>                  pages_left -= pages;
>>>                  ++i;
>>>
>>>                  if (pages > pages_left)
>>>                          pages = pages_left;
>>>          }
>>> -       spin_unlock(&mgr->lock);
>>> +
>>> +       /* Free unused pages for contiguous allocation */
>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>>> +
>>> +               mutex_lock(&mgr->lock);
>>> +               drm_buddy_block_trim(mm,
>>> +                                    actual_size,
>>> +                                    &node->blocks);
>>> +               mutex_unlock(&mgr->lock);
>>> +       }
>>> +
>>> +       list_for_each_entry(block, &node->blocks, link)
>>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>>> +
>>> +       block = list_first_entry_or_null(&node->blocks,
>>> +                                        struct drm_buddy_block,
>>> +                                        link);
>>> +       if (!block) {
>>> +               r = -ENOENT;
>>> +               goto error_free_res;
>>> +       }
>>> +
>>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
>> Hmm, does this work? It looks like there are various places checking
>> that res->start + res->num_pages <= visible_size, which IIUC should
>> only return true when the entire object is placed in the mappable
>> portion. i915 is doing something similar. Also it looks like
>> ttm_resource_compat() is potentially relying on this, like when moving
>> something from non-mappable -> mappable in
>> amdgpu_bo_fault_reserve_notify()?
>>
>> Perhaps something like:
>>
>> if (vis_usage == num_pages)
>>      base.start = 0;
>> else
>>      base.start = visible_size;
>>
>> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
>>
> hmm, I wonder how it works, may be we didn't go through the corner case
> where res->start + res->num_pages > visible_size
>
> in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
> set the ttm_place.lpfn = visible_pfn at
> amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
> function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
> alloc_range_bias() in drm_buddy.c.
>
> Here we get blocks chained together in random order complying
> visible_pfn range. say for instance num_pages = 13
> we may get,
> Block 1 addr - 500 (order-3)
> Block 2 addr - 400 (order-2)
> Block 3 addr - 600 (order-0)
>
> I think currently base.start = Block 1 start address fetched from the
> list and the address 500 assigned to it, which is good for the resource
> access since we access the blocks using the list link
>
> But for the check res->start + res->num_pages <= visible_size in few
> places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
> doesn't work since the function looks for nodes in continuous address to
> calculate the start address. AFAIK, assigning the start address (400 +
> num_pages <= visible_size) mislead in our case since we use linked list
>
> how about replacing the check with a bool type return function which
> checks the each block start address + block size <= visible_size?

Yeah, we already have that in the TTM code. It's just not used 
everywhere IIRC.

The node->start can just be set to the invalid offset for now and should 
be removed as soon as we don't need it any more.

Regards,
Christian.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
@ 2022-02-04 13:23         ` Christian König
  0 siblings, 0 replies; 34+ messages in thread
From: Christian König @ 2022-02-04 13:23 UTC (permalink / raw)
  To: Arunpravin, Matthew Auld
  Cc: Intel Graphics Development, ML dri-devel, amd-gfx list,
	tzimmermann, alexander.deucher, Christian König,
	Matthew Auld

Am 04.02.22 um 12:22 schrieb Arunpravin:
> On 28/01/22 7:48 pm, Matthew Auld wrote:
>> On Thu, 27 Jan 2022 at 14:11, Arunpravin
>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>> - Remove drm_mm references and replace with drm buddy functionalities
>>> - Add res cursor support for drm buddy
>>>
>>> v2(Matthew Auld):
>>>    - replace spinlock with mutex as we call kmem_cache_zalloc
>>>      (..., GFP_KERNEL) in drm_buddy_alloc() function
>>>
>>>    - lock drm_buddy_block_trim() function as it calls
>>>      mark_free/mark_split are all globally visible
>>>
>>> v3(Matthew Auld):
>>>    - remove trim method error handling as we address the failure case
>>>      at drm_buddy_block_trim() function
>>>
>>> v4:
>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>
>>> v5:
>>>    - fix merge conflict issue
>>>
>>> v6:
>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>
>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>> ---
>>>   drivers/gpu/drm/Kconfig                       |   1 +
>>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>>   4 files changed, 231 insertions(+), 133 deletions(-)
>> <snip>
>>
>>> -/**
>>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>>> - *
>>> - * @mem: ttm_resource to update
>>> - * @node: just allocated node
>>> - *
>>> - * Calculate a virtual BO start address to easily check if everything is CPU
>>> - * accessible.
>>> - */
>>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>> -                                      struct drm_mm_node *node)
>>> -{
>>> -       unsigned long start;
>>> -
>>> -       start = node->start + node->size;
>>> -       if (start > mem->num_pages)
>>> -               start -= mem->num_pages;
>>> -       else
>>> -               start = 0;
>>> -       mem->start = max(mem->start, start);
>>> -}
>>> -
>>>   /**
>>>    * amdgpu_vram_mgr_new - allocate new ranges
>>>    *
>>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>                                 const struct ttm_place *place,
>>>                                 struct ttm_resource **res)
>>>   {
>>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>>          struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>          struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>> -       struct ttm_range_mgr_node *node;
>>> -       struct drm_mm *mm = &mgr->mm;
>>> -       enum drm_mm_insert_mode mode;
>>> +       struct amdgpu_vram_mgr_node *node;
>>> +       struct drm_buddy *mm = &mgr->mm;
>>> +       struct drm_buddy_block *block;
>>>          unsigned i;
>>>          int r;
>>>
>>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>                  goto error_sub;
>>>          }
>>>
>>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>                  pages_per_node = ~0ul;
>>> -               num_nodes = 1;
>>> -       } else {
>>> +       else {
>>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>                  pages_per_node = HPAGE_PMD_NR;
>>>   #else
>>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>   #endif
>>>                  pages_per_node = max_t(uint32_t, pages_per_node,
>>>                                         tbo->page_alignment);
>>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>>          }
>>>
>>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>>> -                       GFP_KERNEL | __GFP_ZERO);
>>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>>          if (!node) {
>>>                  r = -ENOMEM;
>>>                  goto error_sub;
>>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>
>>>          ttm_resource_init(tbo, place, &node->base);
>>>
>>> -       mode = DRM_MM_INSERT_BEST;
>>> +       INIT_LIST_HEAD(&node->blocks);
>>> +
>>>          if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>> -               mode = DRM_MM_INSERT_HIGH;
>>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>>> +
>>> +       if (place->fpfn || lpfn != man->size)
>>> +               /* Allocate blocks in desired range */
>>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>> +
>>> +       min_page_size = mgr->default_page_size;
>>> +       BUG_ON(min_page_size < mm->chunk_size);
>>>
>>>          pages_left = node->base.num_pages;
>>>
>>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>          pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>>
>>>          i = 0;
>>> -       spin_lock(&mgr->lock);
>>>          while (pages_left) {
>>> -               uint32_t alignment = tbo->page_alignment;
>>> -
>>>                  if (pages >= pages_per_node)
>>> -                       alignment = pages_per_node;
>>> -
>>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>>> -                                               alignment, 0, place->fpfn,
>>> -                                               lpfn, mode);
>>> -               if (unlikely(r)) {
>>> -                       if (pages > pages_per_node) {
>>> -                               if (is_power_of_2(pages))
>>> -                                       pages = pages / 2;
>>> -                               else
>>> -                                       pages = rounddown_pow_of_two(pages);
>>> -                               continue;
>>> -                       }
>>> -                       goto error_free;
>>> +                       pages = pages_per_node;
>>> +
>>> +               n_pages = pages;
>>> +
>>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> +                       n_pages = roundup_pow_of_two(n_pages);
>>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>>> +
>>> +                       if (n_pages > lpfn)
>>> +                               lpfn = n_pages;
>>>                  }
>>>
>>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>>> +               mutex_lock(&mgr->lock);
>>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>>> +                                          (u64)lpfn << PAGE_SHIFT,
>>> +                                          (u64)n_pages << PAGE_SHIFT,
>>> +                                          min_page_size,
>>> +                                          &node->blocks,
>>> +                                          node->flags);
>>> +               mutex_unlock(&mgr->lock);
>>> +               if (unlikely(r))
>>> +                       goto error_free_blocks;
>>> +
>>>                  pages_left -= pages;
>>>                  ++i;
>>>
>>>                  if (pages > pages_left)
>>>                          pages = pages_left;
>>>          }
>>> -       spin_unlock(&mgr->lock);
>>> +
>>> +       /* Free unused pages for contiguous allocation */
>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>>> +
>>> +               mutex_lock(&mgr->lock);
>>> +               drm_buddy_block_trim(mm,
>>> +                                    actual_size,
>>> +                                    &node->blocks);
>>> +               mutex_unlock(&mgr->lock);
>>> +       }
>>> +
>>> +       list_for_each_entry(block, &node->blocks, link)
>>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>>> +
>>> +       block = list_first_entry_or_null(&node->blocks,
>>> +                                        struct drm_buddy_block,
>>> +                                        link);
>>> +       if (!block) {
>>> +               r = -ENOENT;
>>> +               goto error_free_res;
>>> +       }
>>> +
>>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
>> Hmm, does this work? It looks like there are various places checking
>> that res->start + res->num_pages <= visible_size, which IIUC should
>> only return true when the entire object is placed in the mappable
>> portion. i915 is doing something similar. Also it looks like
>> ttm_resource_compat() is potentially relying on this, like when moving
>> something from non-mappable -> mappable in
>> amdgpu_bo_fault_reserve_notify()?
>>
>> Perhaps something like:
>>
>> if (vis_usage == num_pages)
>>      base.start = 0;
>> else
>>      base.start = visible_size;
>>
>> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
>>
> hmm, I wonder how it works, may be we didn't go through the corner case
> where res->start + res->num_pages > visible_size
>
> in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
> set the ttm_place.lpfn = visible_pfn at
> amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
> function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
> alloc_range_bias() in drm_buddy.c.
>
> Here we get blocks chained together in random order complying
> visible_pfn range. say for instance num_pages = 13
> we may get,
> Block 1 addr - 500 (order-3)
> Block 2 addr - 400 (order-2)
> Block 3 addr - 600 (order-0)
>
> I think currently base.start = Block 1 start address fetched from the
> list and the address 500 assigned to it, which is good for the resource
> access since we access the blocks using the list link
>
> But for the check res->start + res->num_pages <= visible_size in few
> places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
> doesn't work since the function looks for nodes in continuous address to
> calculate the start address. AFAIK, assigning the start address (400 +
> num_pages <= visible_size) mislead in our case since we use linked list
>
> how about replacing the check with a bool type return function which
> checks the each block start address + block size <= visible_size?

Yeah, we already have that in the TTM code. It's just not used 
everywhere IIRC.

The node->start can just be set to the invalid offset for now and should 
be removed as soon as we don't need it any more.

Regards,
Christian.


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
  2022-02-04 13:23         ` [Intel-gfx] " Christian König
@ 2022-02-08 11:20           ` Arunpravin
  -1 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-02-08 11:20 UTC (permalink / raw)
  To: Christian König, Matthew Auld
  Cc: Intel Graphics Development, ML dri-devel, amd-gfx list,
	tzimmermann, alexander.deucher, Christian König,
	Matthew Auld



On 04/02/22 6:53 pm, Christian König wrote:
> Am 04.02.22 um 12:22 schrieb Arunpravin:
>> On 28/01/22 7:48 pm, Matthew Auld wrote:
>>> On Thu, 27 Jan 2022 at 14:11, Arunpravin
>>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>>> - Remove drm_mm references and replace with drm buddy functionalities
>>>> - Add res cursor support for drm buddy
>>>>
>>>> v2(Matthew Auld):
>>>>    - replace spinlock with mutex as we call kmem_cache_zalloc
>>>>      (..., GFP_KERNEL) in drm_buddy_alloc() function
>>>>
>>>>    - lock drm_buddy_block_trim() function as it calls
>>>>      mark_free/mark_split are all globally visible
>>>>
>>>> v3(Matthew Auld):
>>>>    - remove trim method error handling as we address the failure case
>>>>      at drm_buddy_block_trim() function
>>>>
>>>> v4:
>>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>
>>>> v5:
>>>>    - fix merge conflict issue
>>>>
>>>> v6:
>>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>
>>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/Kconfig                       |   1 +
>>>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>>>   4 files changed, 231 insertions(+), 133 deletions(-)
>>> <snip>
>>>
>>>> -/**
>>>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>>>> - *
>>>> - * @mem: ttm_resource to update
>>>> - * @node: just allocated node
>>>> - *
>>>> - * Calculate a virtual BO start address to easily check if everything is CPU
>>>> - * accessible.
>>>> - */
>>>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>>> -                                      struct drm_mm_node *node)
>>>> -{
>>>> -       unsigned long start;
>>>> -
>>>> -       start = node->start + node->size;
>>>> -       if (start > mem->num_pages)
>>>> -               start -= mem->num_pages;
>>>> -       else
>>>> -               start = 0;
>>>> -       mem->start = max(mem->start, start);
>>>> -}
>>>> -
>>>>   /**
>>>>    * amdgpu_vram_mgr_new - allocate new ranges
>>>>    *
>>>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>                                 const struct ttm_place *place,
>>>>                                 struct ttm_resource **res)
>>>>   {
>>>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>>>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>>>          struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>>          struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>>> -       struct ttm_range_mgr_node *node;
>>>> -       struct drm_mm *mm = &mgr->mm;
>>>> -       enum drm_mm_insert_mode mode;
>>>> +       struct amdgpu_vram_mgr_node *node;
>>>> +       struct drm_buddy *mm = &mgr->mm;
>>>> +       struct drm_buddy_block *block;
>>>>          unsigned i;
>>>>          int r;
>>>>
>>>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>                  goto error_sub;
>>>>          }
>>>>
>>>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>                  pages_per_node = ~0ul;
>>>> -               num_nodes = 1;
>>>> -       } else {
>>>> +       else {
>>>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>>                  pages_per_node = HPAGE_PMD_NR;
>>>>   #else
>>>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>   #endif
>>>>                  pages_per_node = max_t(uint32_t, pages_per_node,
>>>>                                         tbo->page_alignment);
>>>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>>>          }
>>>>
>>>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>>>> -                       GFP_KERNEL | __GFP_ZERO);
>>>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>>>          if (!node) {
>>>>                  r = -ENOMEM;
>>>>                  goto error_sub;
>>>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>
>>>>          ttm_resource_init(tbo, place, &node->base);
>>>>
>>>> -       mode = DRM_MM_INSERT_BEST;
>>>> +       INIT_LIST_HEAD(&node->blocks);
>>>> +
>>>>          if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>>> -               mode = DRM_MM_INSERT_HIGH;
>>>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>>>> +
>>>> +       if (place->fpfn || lpfn != man->size)
>>>> +               /* Allocate blocks in desired range */
>>>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>> +
>>>> +       min_page_size = mgr->default_page_size;
>>>> +       BUG_ON(min_page_size < mm->chunk_size);
>>>>
>>>>          pages_left = node->base.num_pages;
>>>>
>>>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>          pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>>>
>>>>          i = 0;
>>>> -       spin_lock(&mgr->lock);
>>>>          while (pages_left) {
>>>> -               uint32_t alignment = tbo->page_alignment;
>>>> -
>>>>                  if (pages >= pages_per_node)
>>>> -                       alignment = pages_per_node;
>>>> -
>>>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>>>> -                                               alignment, 0, place->fpfn,
>>>> -                                               lpfn, mode);
>>>> -               if (unlikely(r)) {
>>>> -                       if (pages > pages_per_node) {
>>>> -                               if (is_power_of_2(pages))
>>>> -                                       pages = pages / 2;
>>>> -                               else
>>>> -                                       pages = rounddown_pow_of_two(pages);
>>>> -                               continue;
>>>> -                       }
>>>> -                       goto error_free;
>>>> +                       pages = pages_per_node;
>>>> +
>>>> +               n_pages = pages;
>>>> +
>>>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> +                       n_pages = roundup_pow_of_two(n_pages);
>>>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>>>> +
>>>> +                       if (n_pages > lpfn)
>>>> +                               lpfn = n_pages;
>>>>                  }
>>>>
>>>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>>>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>>>> +               mutex_lock(&mgr->lock);
>>>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>>>> +                                          (u64)lpfn << PAGE_SHIFT,
>>>> +                                          (u64)n_pages << PAGE_SHIFT,
>>>> +                                          min_page_size,
>>>> +                                          &node->blocks,
>>>> +                                          node->flags);
>>>> +               mutex_unlock(&mgr->lock);
>>>> +               if (unlikely(r))
>>>> +                       goto error_free_blocks;
>>>> +
>>>>                  pages_left -= pages;
>>>>                  ++i;
>>>>
>>>>                  if (pages > pages_left)
>>>>                          pages = pages_left;
>>>>          }
>>>> -       spin_unlock(&mgr->lock);
>>>> +
>>>> +       /* Free unused pages for contiguous allocation */
>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>>>> +
>>>> +               mutex_lock(&mgr->lock);
>>>> +               drm_buddy_block_trim(mm,
>>>> +                                    actual_size,
>>>> +                                    &node->blocks);
>>>> +               mutex_unlock(&mgr->lock);
>>>> +       }
>>>> +
>>>> +       list_for_each_entry(block, &node->blocks, link)
>>>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>>>> +
>>>> +       block = list_first_entry_or_null(&node->blocks,
>>>> +                                        struct drm_buddy_block,
>>>> +                                        link);
>>>> +       if (!block) {
>>>> +               r = -ENOENT;
>>>> +               goto error_free_res;
>>>> +       }
>>>> +
>>>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
>>> Hmm, does this work? It looks like there are various places checking
>>> that res->start + res->num_pages <= visible_size, which IIUC should
>>> only return true when the entire object is placed in the mappable
>>> portion. i915 is doing something similar. Also it looks like
>>> ttm_resource_compat() is potentially relying on this, like when moving
>>> something from non-mappable -> mappable in
>>> amdgpu_bo_fault_reserve_notify()?
>>>
>>> Perhaps something like:
>>>
>>> if (vis_usage == num_pages)
>>>      base.start = 0;
>>> else
>>>      base.start = visible_size;
>>>
>>> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
>>>
>> hmm, I wonder how it works, may be we didn't go through the corner case
>> where res->start + res->num_pages > visible_size
>>
>> in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
>> set the ttm_place.lpfn = visible_pfn at
>> amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
>> function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
>> alloc_range_bias() in drm_buddy.c.
>>
>> Here we get blocks chained together in random order complying
>> visible_pfn range. say for instance num_pages = 13
>> we may get,
>> Block 1 addr - 500 (order-3)
>> Block 2 addr - 400 (order-2)
>> Block 3 addr - 600 (order-0)
>>
>> I think currently base.start = Block 1 start address fetched from the
>> list and the address 500 assigned to it, which is good for the resource
>> access since we access the blocks using the list link
>>
>> But for the check res->start + res->num_pages <= visible_size in few
>> places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
>> doesn't work since the function looks for nodes in continuous address to
>> calculate the start address. AFAIK, assigning the start address (400 +
>> num_pages <= visible_size) mislead in our case since we use linked list
>>
>> how about replacing the check with a bool type return function which
>> checks the each block start address + block size <= visible_size?
> 
> Yeah, we already have that in the TTM code. It's just not used 
> everywhere IIRC.

Hi Christian,
here we have a problem, many places in ttm and amdgpu, we are using the
tbo->resource->start + bo->resource->num_pages operation, this doesn't
work in case of drm buddy since it allocates blocks in different
locations which are chained together using linked list.
> 
> The node->start can just be set to the invalid offset for now and should 
> be removed as soon as we don't need it any more.
Assigning the start block offset to resource->start doesn't work,
If we set node->start to invalid offset, we get an incorrect value?
> 
> Regards,
> Christian.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
@ 2022-02-08 11:20           ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-02-08 11:20 UTC (permalink / raw)
  To: Christian König, Matthew Auld
  Cc: Intel Graphics Development, ML dri-devel, amd-gfx list,
	tzimmermann, alexander.deucher, Christian König,
	Matthew Auld



On 04/02/22 6:53 pm, Christian König wrote:
> Am 04.02.22 um 12:22 schrieb Arunpravin:
>> On 28/01/22 7:48 pm, Matthew Auld wrote:
>>> On Thu, 27 Jan 2022 at 14:11, Arunpravin
>>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>>> - Remove drm_mm references and replace with drm buddy functionalities
>>>> - Add res cursor support for drm buddy
>>>>
>>>> v2(Matthew Auld):
>>>>    - replace spinlock with mutex as we call kmem_cache_zalloc
>>>>      (..., GFP_KERNEL) in drm_buddy_alloc() function
>>>>
>>>>    - lock drm_buddy_block_trim() function as it calls
>>>>      mark_free/mark_split are all globally visible
>>>>
>>>> v3(Matthew Auld):
>>>>    - remove trim method error handling as we address the failure case
>>>>      at drm_buddy_block_trim() function
>>>>
>>>> v4:
>>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>
>>>> v5:
>>>>    - fix merge conflict issue
>>>>
>>>> v6:
>>>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>
>>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/Kconfig                       |   1 +
>>>>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>>>   4 files changed, 231 insertions(+), 133 deletions(-)
>>> <snip>
>>>
>>>> -/**
>>>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>>>> - *
>>>> - * @mem: ttm_resource to update
>>>> - * @node: just allocated node
>>>> - *
>>>> - * Calculate a virtual BO start address to easily check if everything is CPU
>>>> - * accessible.
>>>> - */
>>>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>>> -                                      struct drm_mm_node *node)
>>>> -{
>>>> -       unsigned long start;
>>>> -
>>>> -       start = node->start + node->size;
>>>> -       if (start > mem->num_pages)
>>>> -               start -= mem->num_pages;
>>>> -       else
>>>> -               start = 0;
>>>> -       mem->start = max(mem->start, start);
>>>> -}
>>>> -
>>>>   /**
>>>>    * amdgpu_vram_mgr_new - allocate new ranges
>>>>    *
>>>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>                                 const struct ttm_place *place,
>>>>                                 struct ttm_resource **res)
>>>>   {
>>>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>>>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>>>          struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>>          struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>>> -       struct ttm_range_mgr_node *node;
>>>> -       struct drm_mm *mm = &mgr->mm;
>>>> -       enum drm_mm_insert_mode mode;
>>>> +       struct amdgpu_vram_mgr_node *node;
>>>> +       struct drm_buddy *mm = &mgr->mm;
>>>> +       struct drm_buddy_block *block;
>>>>          unsigned i;
>>>>          int r;
>>>>
>>>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>                  goto error_sub;
>>>>          }
>>>>
>>>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>                  pages_per_node = ~0ul;
>>>> -               num_nodes = 1;
>>>> -       } else {
>>>> +       else {
>>>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>>                  pages_per_node = HPAGE_PMD_NR;
>>>>   #else
>>>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>   #endif
>>>>                  pages_per_node = max_t(uint32_t, pages_per_node,
>>>>                                         tbo->page_alignment);
>>>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>>>          }
>>>>
>>>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>>>> -                       GFP_KERNEL | __GFP_ZERO);
>>>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>>>          if (!node) {
>>>>                  r = -ENOMEM;
>>>>                  goto error_sub;
>>>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>
>>>>          ttm_resource_init(tbo, place, &node->base);
>>>>
>>>> -       mode = DRM_MM_INSERT_BEST;
>>>> +       INIT_LIST_HEAD(&node->blocks);
>>>> +
>>>>          if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>>> -               mode = DRM_MM_INSERT_HIGH;
>>>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>>>> +
>>>> +       if (place->fpfn || lpfn != man->size)
>>>> +               /* Allocate blocks in desired range */
>>>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>> +
>>>> +       min_page_size = mgr->default_page_size;
>>>> +       BUG_ON(min_page_size < mm->chunk_size);
>>>>
>>>>          pages_left = node->base.num_pages;
>>>>
>>>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>          pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>>>
>>>>          i = 0;
>>>> -       spin_lock(&mgr->lock);
>>>>          while (pages_left) {
>>>> -               uint32_t alignment = tbo->page_alignment;
>>>> -
>>>>                  if (pages >= pages_per_node)
>>>> -                       alignment = pages_per_node;
>>>> -
>>>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>>>> -                                               alignment, 0, place->fpfn,
>>>> -                                               lpfn, mode);
>>>> -               if (unlikely(r)) {
>>>> -                       if (pages > pages_per_node) {
>>>> -                               if (is_power_of_2(pages))
>>>> -                                       pages = pages / 2;
>>>> -                               else
>>>> -                                       pages = rounddown_pow_of_two(pages);
>>>> -                               continue;
>>>> -                       }
>>>> -                       goto error_free;
>>>> +                       pages = pages_per_node;
>>>> +
>>>> +               n_pages = pages;
>>>> +
>>>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> +                       n_pages = roundup_pow_of_two(n_pages);
>>>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>>>> +
>>>> +                       if (n_pages > lpfn)
>>>> +                               lpfn = n_pages;
>>>>                  }
>>>>
>>>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>>>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>>>> +               mutex_lock(&mgr->lock);
>>>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>>>> +                                          (u64)lpfn << PAGE_SHIFT,
>>>> +                                          (u64)n_pages << PAGE_SHIFT,
>>>> +                                          min_page_size,
>>>> +                                          &node->blocks,
>>>> +                                          node->flags);
>>>> +               mutex_unlock(&mgr->lock);
>>>> +               if (unlikely(r))
>>>> +                       goto error_free_blocks;
>>>> +
>>>>                  pages_left -= pages;
>>>>                  ++i;
>>>>
>>>>                  if (pages > pages_left)
>>>>                          pages = pages_left;
>>>>          }
>>>> -       spin_unlock(&mgr->lock);
>>>> +
>>>> +       /* Free unused pages for contiguous allocation */
>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>>>> +
>>>> +               mutex_lock(&mgr->lock);
>>>> +               drm_buddy_block_trim(mm,
>>>> +                                    actual_size,
>>>> +                                    &node->blocks);
>>>> +               mutex_unlock(&mgr->lock);
>>>> +       }
>>>> +
>>>> +       list_for_each_entry(block, &node->blocks, link)
>>>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>>>> +
>>>> +       block = list_first_entry_or_null(&node->blocks,
>>>> +                                        struct drm_buddy_block,
>>>> +                                        link);
>>>> +       if (!block) {
>>>> +               r = -ENOENT;
>>>> +               goto error_free_res;
>>>> +       }
>>>> +
>>>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
>>> Hmm, does this work? It looks like there are various places checking
>>> that res->start + res->num_pages <= visible_size, which IIUC should
>>> only return true when the entire object is placed in the mappable
>>> portion. i915 is doing something similar. Also it looks like
>>> ttm_resource_compat() is potentially relying on this, like when moving
>>> something from non-mappable -> mappable in
>>> amdgpu_bo_fault_reserve_notify()?
>>>
>>> Perhaps something like:
>>>
>>> if (vis_usage == num_pages)
>>>      base.start = 0;
>>> else
>>>      base.start = visible_size;
>>>
>>> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
>>>
>> hmm, I wonder how it works, may be we didn't go through the corner case
>> where res->start + res->num_pages > visible_size
>>
>> in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
>> set the ttm_place.lpfn = visible_pfn at
>> amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
>> function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
>> alloc_range_bias() in drm_buddy.c.
>>
>> Here we get blocks chained together in random order complying
>> visible_pfn range. say for instance num_pages = 13
>> we may get,
>> Block 1 addr - 500 (order-3)
>> Block 2 addr - 400 (order-2)
>> Block 3 addr - 600 (order-0)
>>
>> I think currently base.start = Block 1 start address fetched from the
>> list and the address 500 assigned to it, which is good for the resource
>> access since we access the blocks using the list link
>>
>> But for the check res->start + res->num_pages <= visible_size in few
>> places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
>> doesn't work since the function looks for nodes in continuous address to
>> calculate the start address. AFAIK, assigning the start address (400 +
>> num_pages <= visible_size) mislead in our case since we use linked list
>>
>> how about replacing the check with a bool type return function which
>> checks the each block start address + block size <= visible_size?
> 
> Yeah, we already have that in the TTM code. It's just not used 
> everywhere IIRC.

Hi Christian,
here we have a problem, many places in ttm and amdgpu, we are using the
tbo->resource->start + bo->resource->num_pages operation, this doesn't
work in case of drm buddy since it allocates blocks in different
locations which are chained together using linked list.
> 
> The node->start can just be set to the invalid offset for now and should 
> be removed as soon as we don't need it any more.
Assigning the start block offset to resource->start doesn't work,
If we set node->start to invalid offset, we get an incorrect value?
> 
> Regards,
> Christian.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v11 1/5] drm: improve drm_buddy_alloc function
  2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
  (?)
@ 2022-02-10 16:10   ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2022-02-10 16:10 UTC (permalink / raw)
  To: Arunpravin, dri-devel, intel-gfx, amd-gfx
  Cc: alexander.deucher, tzimmermann, christian.koenig

On 27/01/2022 14:11, Arunpravin wrote:
> - Make drm_buddy_alloc a single function to handle
>    range allocation and non-range allocation demands
> 
> - Implemented a new function alloc_range() which allocates
>    the requested power-of-two block comply with range limitations
> 
> - Moved order computation and memory alignment logic from
>    i915 driver to drm buddy
> 
> v2:
>    merged below changes to keep the build unbroken
>     - drm_buddy_alloc_range() becomes obsolete and may be removed
>     - enable ttm range allocation (fpfn / lpfn) support in i915 driver
>     - apply enhanced drm_buddy_alloc() function to i915 driver
> 
> v3(Matthew Auld):
>    - Fix alignment issues and remove unnecessary list_empty check
>    - add more validation checks for input arguments
>    - make alloc_range() block allocations as bottom-up
>    - optimize order computation logic
>    - replace uint64_t with u64, which is preferred in the kernel
> 
> v4(Matthew Auld):
>    - keep drm_buddy_alloc_range() function implementation for generic
>      actual range allocations
>    - keep alloc_range() implementation for end bias allocations
> 
> v5(Matthew Auld):
>    - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
>      as place->lpfn will currently always be zero for i915
> 
> v6(Matthew Auld):
>    - fixup potential uaf - If we are unlucky and can't allocate
>      enough memory when splitting blocks, where we temporarily
>      end up with the given block and its buddy on the respective
>      free list, then we need to ensure we delete both blocks,
>      and no just the buddy, before potentially freeing them
> 
>    - fix warnings reported by kernel test robot <lkp@intel.com>
> 
> v7(Matthew Auld):
>    - revert fixup potential uaf
>    - keep __alloc_range() add node to the list logic same as
>      drm_buddy_alloc_blocks() by having a temporary list variable
>    - at drm_buddy_alloc_blocks() keep i915 range_overflows macro
>      and add a new check for end variable
> 
> v8:
>    - fix warnings reported by kernel test robot <lkp@intel.com>
> 
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/drm_buddy.c                   | 315 +++++++++++++-----
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
>   include/drm/drm_buddy.h                       |  13 +-
>   4 files changed, 280 insertions(+), 117 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index d60878bc9c20..cfc160a1ef1a 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
>   }
>   EXPORT_SYMBOL(drm_buddy_free_list);
>   
> -/**
> - * drm_buddy_alloc_blocks - allocate power-of-two blocks
> - *
> - * @mm: DRM buddy manager to allocate from
> - * @order: size of the allocation
> - *
> - * The order value here translates to:
> - *
> - * 0 = 2^0 * mm->chunk_size
> - * 1 = 2^1 * mm->chunk_size
> - * 2 = 2^2 * mm->chunk_size
> - *
> - * Returns:
> - * allocated ptr to the &drm_buddy_block on success
> - */
> -struct drm_buddy_block *
> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
> +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> +{
> +	return s1 <= e2 && e1 >= s2;
> +}
> +
> +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
> +{
> +	return s1 <= s2 && e1 >= e2;
> +}
> +
> +static struct drm_buddy_block *
> +alloc_range_bias(struct drm_buddy *mm,
> +		 u64 start, u64 end,
> +		 unsigned int order)
> +{
> +	struct drm_buddy_block *block;
> +	struct drm_buddy_block *buddy;
> +	LIST_HEAD(dfs);
> +	int err;
> +	int i;
> +
> +	end = end - 1;
> +
> +	for (i = 0; i < mm->n_roots; ++i)
> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
> +
> +	do {
> +		u64 block_start;
> +		u64 block_end;
> +
> +		block = list_first_entry_or_null(&dfs,
> +						 struct drm_buddy_block,
> +						 tmp_link);
> +		if (!block)
> +			break;
> +
> +		list_del(&block->tmp_link);
> +
> +		if (drm_buddy_block_order(block) < order)
> +			continue;
> +
> +		block_start = drm_buddy_block_offset(block);
> +		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
> +
> +		if (!overlaps(start, end, block_start, block_end))
> +			continue;
> +
> +		if (drm_buddy_block_is_allocated(block))
> +			continue;
> +
> +		if (contains(start, end, block_start, block_end) &&
> +		    order == drm_buddy_block_order(block)) {
> +			/*
> +			 * Find the free block within the range.
> +			 */
> +			if (drm_buddy_block_is_free(block))
> +				return block;
> +
> +			continue;
> +		}
> +
> +		if (!drm_buddy_block_is_split(block)) {
> +			err = split_block(mm, block);
> +			if (unlikely(err))
> +				goto err_undo;
> +		}
> +
> +		list_add(&block->right->tmp_link, &dfs);
> +		list_add(&block->left->tmp_link, &dfs);
> +	} while (1);
> +
> +	return ERR_PTR(-ENOSPC);
> +
> +err_undo:
> +	/*
> +	 * We really don't want to leave around a bunch of split blocks, since
> +	 * bigger is better, so make sure we merge everything back before we
> +	 * free the allocated blocks.
> +	 */
> +	buddy = get_buddy(block);
> +	if (buddy &&
> +	    (drm_buddy_block_is_free(block) &&
> +	     drm_buddy_block_is_free(buddy)))
> +		__drm_buddy_free(mm, block);
> +	return ERR_PTR(err);
> +}
> +
> +static struct drm_buddy_block *
> +alloc_from_freelist(struct drm_buddy *mm,
> +		    unsigned int order,
> +		    unsigned long flags)
>   {
>   	struct drm_buddy_block *block = NULL;
>   	unsigned int i;
> @@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>   	while (i != order) {
>   		err = split_block(mm, block);
>   		if (unlikely(err))
> -			goto out_free;
> +			goto err_undo;
>   
> -		/* Go low */
> -		block = block->left;
> +		block = block->right;
>   		i--;
>   	}
> -
> -	mark_allocated(block);
> -	mm->avail -= drm_buddy_block_size(mm, block);
> -	kmemleak_update_trace(block);
>   	return block;
>   
> -out_free:
> +err_undo:
>   	if (i != order)
>   		__drm_buddy_free(mm, block);
>   	return ERR_PTR(err);
>   }
> -EXPORT_SYMBOL(drm_buddy_alloc_blocks);
> -
> -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> -{
> -	return s1 <= e2 && e1 >= s2;
> -}
>   
> -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
> -{
> -	return s1 <= s2 && e1 >= e2;
> -}
> -
> -/**
> - * drm_buddy_alloc_range - allocate range
> - *
> - * @mm: DRM buddy manager to allocate from
> - * @blocks: output list head to add allocated blocks
> - * @start: start of the allowed range for this block
> - * @size: size of the allocation
> - *
> - * Intended for pre-allocating portions of the address space, for example to
> - * reserve a block for the initial framebuffer or similar, hence the expectation
> - * here is that drm_buddy_alloc_blocks() is still the main vehicle for
> - * allocations, so if that's not the case then the drm_mm range allocator is
> - * probably a much better fit, and so you should probably go use that instead.
> - *
> - * Note that it's safe to chain together multiple alloc_ranges
> - * with the same blocks list
> - *
> - * Returns:
> - * 0 on success, error code on failure.
> - */
> -int drm_buddy_alloc_range(struct drm_buddy *mm,
> -			  struct list_head *blocks,
> -			  u64 start, u64 size)
> +static int __alloc_range(struct drm_buddy *mm,
> +			 struct list_head *dfs,
> +			 u64 start, u64 size,
> +			 struct list_head *blocks)
>   {
>   	struct drm_buddy_block *block;
>   	struct drm_buddy_block *buddy;
>   	LIST_HEAD(allocated);
> -	LIST_HEAD(dfs);
>   	u64 end;
>   	int err;
> -	int i;
> -
> -	if (size < mm->chunk_size)
> -		return -EINVAL;
> -
> -	if (!IS_ALIGNED(size | start, mm->chunk_size))
> -		return -EINVAL;
> -
> -	if (range_overflows(start, size, mm->size))
> -		return -EINVAL;
> -
> -	for (i = 0; i < mm->n_roots; ++i)
> -		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>   
>   	end = start + size - 1;
>   
> @@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>   		u64 block_start;
>   		u64 block_end;
>   
> -		block = list_first_entry_or_null(&dfs,
> +		block = list_first_entry_or_null(dfs,
>   						 struct drm_buddy_block,
>   						 tmp_link);
>   		if (!block)
> @@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>   				goto err_undo;
>   		}
>   
> -		list_add(&block->right->tmp_link, &dfs);
> -		list_add(&block->left->tmp_link, &dfs);
> +		list_add(&block->right->tmp_link, dfs);
> +		list_add(&block->left->tmp_link, dfs);
>   	} while (1);
>   
>   	list_splice_tail(&allocated, blocks);
> @@ -459,7 +484,143 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>   	drm_buddy_free_list(mm, &allocated);
>   	return err;
>   }
> -EXPORT_SYMBOL(drm_buddy_alloc_range);
> +
> +/**
> + * __drm_buddy_alloc_range - actual range allocation
> + *
> + * @mm: DRM buddy manager to allocate from
> + * @start: start of the allowed range for this block
> + * @size: size of the allocation
> + * @blocks: output list head to add allocated blocks

I guess no need to add kernel-doc for static functions.

> + *
> + * Intended for pre-allocating portions of the address space, for example to
> + * reserve a block for the initial framebuffer or similar
> + *
> + * Note that it's safe to chain together multiple alloc_ranges
> + * with the same blocks list
> + *
> + * Returns:
> + * 0 on success, error code on failure.
> + */
> +static int __drm_buddy_alloc_range(struct drm_buddy *mm,
> +				   u64 start,
> +				   u64 size,
> +				   struct list_head *blocks)
> +{
> +	LIST_HEAD(dfs);
> +	int i;
> +
> +	for (i = 0; i < mm->n_roots; ++i)
> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
> +
> +	return __alloc_range(mm, &dfs, start, size, blocks);
> +}
> +
> +/**
> + * drm_buddy_alloc_blocks - allocate power-of-two blocks
> + *
> + * @mm: DRM buddy manager to allocate from
> + * @start: start of the allowed range for this block
> + * @end: end of the allowed range for this block
> + * @size: size of the allocation
> + * @min_page_size: alignment of the allocation
> + * @blocks: output list head to add allocated blocks
> + * @flags: DRM_BUDDY_*_ALLOCATION flags
> + *
> + * alloc_range_bias() called on range limitations, which traverses
> + * the tree and returns the desired block.
> + *
> + * alloc_from_freelist() called when *no* range restrictions
> + * are enforced, which picks the block from the freelist.
> + *
> + * blocks are allocated in order, the order value here translates to:
> + *
> + * 0 = 2^0 * mm->chunk_size
> + * 1 = 2^1 * mm->chunk_size
> + * 2 = 2^2 * mm->chunk_size

The order stuff here can be dropped I think, since this is now just 'size'.

> + *
> + * Returns:
> + * 0 on success, error code on failure.
> + */
> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> +			   u64 start, u64 end, u64 size,
> +			   u64 min_page_size,
> +			   struct list_head *blocks,
> +			   unsigned long flags)
> +{
> +	struct drm_buddy_block *block = NULL;
> +	unsigned int min_order, order;
> +	unsigned long pages;
> +	LIST_HEAD(allocated);
> +	int err;
> +
> +	if (size < mm->chunk_size)
> +		return -EINVAL;
> +
> +	if (min_page_size < mm->chunk_size)
> +		return -EINVAL;
> +
> +	if (!is_power_of_2(min_page_size))
> +		return -EINVAL;
> +
> +	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
> +		return -EINVAL;
> +
> +	if (end > mm->size)
> +		return -EINVAL;
> +
> +	if (range_overflows(start, size, mm->size))
> +		return -EINVAL;
> +
> +	/* Actual range allocation */
> +	if (start + size == end)
> +		return __drm_buddy_alloc_range(mm, start, size, blocks);
> +
> +	pages = size >> ilog2(mm->chunk_size);
> +	order = fls(pages) - 1;
> +	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +
> +	do {
> +		order = min(order, (unsigned int)fls(pages) - 1);
> +		BUG_ON(order > mm->max_order);
> +		BUG_ON(order < min_order);
> +
> +		do {
> +			if (flags & DRM_BUDDY_RANGE_ALLOCATION)

Do we need an explicit flag for this? Can we not just check:

if (start || end < size)

We already implicitly determine the "actual range allocation" above. So 
that would just leave the TODPOWN flag?

Otherwise,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>

> +				/* Allocate traversing within the range */
> +				block = alloc_range_bias(mm, start, end, order);
> +			else
> +				/* Allocate from freelist */
> +				block = alloc_from_freelist(mm, order, flags);
> +
> +			if (!IS_ERR(block))
> +				break;
> +
> +			if (order-- == min_order) {
> +				err = -ENOSPC;
> +				goto err_free;
> +			}
> +		} while (1);
> +
> +		mark_allocated(block);
> +		mm->avail -= drm_buddy_block_size(mm, block);
> +		kmemleak_update_trace(block);
> +		list_add_tail(&block->link, &allocated);
> +
> +		pages -= BIT(order);
> +
> +		if (!pages)
> +			break;
> +	} while (1);
> +
> +	list_splice_tail(&allocated, blocks);
> +	return 0;
> +
> +err_free:
> +	drm_buddy_free_list(mm, &allocated);
> +	return err;
> +}
> +EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>   
>   /**
>    * drm_buddy_block_print - print block information
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 247714bab044..a328a38fab07 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>   	struct i915_ttm_buddy_resource *bman_res;
>   	struct drm_buddy *mm = &bman->mm;
> -	unsigned long n_pages;
> -	unsigned int min_order;
> +	unsigned long n_pages, lpfn;
>   	u64 min_page_size;
>   	u64 size;
>   	int err;
>   
> -	GEM_BUG_ON(place->fpfn || place->lpfn);
> +	lpfn = place->lpfn;
> +	if (!lpfn)
> +		lpfn = man->size;
>   
>   	bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
>   	if (!bman_res)
> @@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	INIT_LIST_HEAD(&bman_res->blocks);
>   	bman_res->mm = mm;
>   
> +	if (place->fpfn || lpfn != man->size)
> +		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
>   	GEM_BUG_ON(!bman_res->base.num_pages);
>   	size = bman_res->base.num_pages << PAGE_SHIFT;
>   
> @@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   		min_page_size = bo->page_alignment << PAGE_SHIFT;
>   
>   	GEM_BUG_ON(min_page_size < mm->chunk_size);
> -	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +
>   	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +		unsigned long pages;
> +
>   		size = roundup_pow_of_two(size);
> -		min_order = ilog2(size) - ilog2(mm->chunk_size);
> +		min_page_size = size;
> +
> +		pages = size >> ilog2(mm->chunk_size);
> +		if (pages > lpfn)
> +			lpfn = pages;
>   	}
>   
>   	if (size > mm->size) {
> @@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   
>   	n_pages = size >> ilog2(mm->chunk_size);
>   
> -	do {
> -		struct drm_buddy_block *block;
> -		unsigned int order;
> -
> -		order = fls(n_pages) - 1;
> -		GEM_BUG_ON(order > mm->max_order);
> -		GEM_BUG_ON(order < min_order);
> -
> -		do {
> -			mutex_lock(&bman->lock);
> -			block = drm_buddy_alloc_blocks(mm, order);
> -			mutex_unlock(&bman->lock);
> -			if (!IS_ERR(block))
> -				break;
> -
> -			if (order-- == min_order) {
> -				err = -ENOSPC;
> -				goto err_free_blocks;
> -			}
> -		} while (1);
> -
> -		n_pages -= BIT(order);
> -
> -		list_add_tail(&block->link, &bman_res->blocks);
> -
> -		if (!n_pages)
> -			break;
> -	} while (1);
> +	mutex_lock(&bman->lock);
> +	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> +				     (u64)lpfn << PAGE_SHIFT,
> +				     (u64)n_pages << PAGE_SHIFT,
> +				     min_page_size,
> +				     &bman_res->blocks,
> +				     bman_res->flags);
> +	mutex_unlock(&bman->lock);
> +	if (unlikely(err))
> +		goto err_free_blocks;
>   
>   	*res = &bman_res->base;
>   	return 0;
> @@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
>   {
>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>   	struct drm_buddy *mm = &bman->mm;
> +	unsigned long flags = 0;
>   	int ret;
>   
> +	flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
>   	mutex_lock(&bman->lock);
> -	ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
> +	ret = drm_buddy_alloc_blocks(mm, start,
> +				     start + size,
> +				     size, mm->chunk_size,
> +				     &bman->reserved,
> +				     flags);
>   	mutex_unlock(&bman->lock);
>   
>   	return ret;
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> index 312077941411..72c90b432e87 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> @@ -20,6 +20,7 @@ struct drm_buddy;
>    *
>    * @base: struct ttm_resource base class we extend
>    * @blocks: the list of struct i915_buddy_block for this resource/allocation
> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>    * @mm: the struct i915_buddy_mm for this resource
>    *
>    * Extends the struct ttm_resource to manage an address space allocation with
> @@ -28,6 +29,7 @@ struct drm_buddy;
>   struct i915_ttm_buddy_resource {
>   	struct ttm_resource base;
>   	struct list_head blocks;
> +	unsigned long flags;
>   	struct drm_buddy *mm;
>   };
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index f524db152413..54f25a372f27 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -22,6 +22,8 @@
>   	start__ >= max__ || size__ > max__ - start__; \
>   })
>   
> +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
> +
>   struct drm_buddy_block {
>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
> @@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
>   
>   void drm_buddy_fini(struct drm_buddy *mm);
>   
> -struct drm_buddy_block *
> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
> -
> -int drm_buddy_alloc_range(struct drm_buddy *mm,
> -			  struct list_head *blocks,
> -			  u64 start, u64 size);
> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> +			   u64 start, u64 end, u64 size,
> +			   u64 min_page_size,
> +			   struct list_head *blocks,
> +			   unsigned long flags);
>   
>   void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
>   
> 
> base-commit: 627e2885b51ab503a98aa89f0a0bd68416c731fc

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v11 1/5] drm: improve drm_buddy_alloc function
@ 2022-02-10 16:10   ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2022-02-10 16:10 UTC (permalink / raw)
  To: Arunpravin, dri-devel, intel-gfx, amd-gfx
  Cc: alexander.deucher, tzimmermann, christian.koenig

On 27/01/2022 14:11, Arunpravin wrote:
> - Make drm_buddy_alloc a single function to handle
>    range allocation and non-range allocation demands
> 
> - Implemented a new function alloc_range() which allocates
>    the requested power-of-two block comply with range limitations
> 
> - Moved order computation and memory alignment logic from
>    i915 driver to drm buddy
> 
> v2:
>    merged below changes to keep the build unbroken
>     - drm_buddy_alloc_range() becomes obsolete and may be removed
>     - enable ttm range allocation (fpfn / lpfn) support in i915 driver
>     - apply enhanced drm_buddy_alloc() function to i915 driver
> 
> v3(Matthew Auld):
>    - Fix alignment issues and remove unnecessary list_empty check
>    - add more validation checks for input arguments
>    - make alloc_range() block allocations as bottom-up
>    - optimize order computation logic
>    - replace uint64_t with u64, which is preferred in the kernel
> 
> v4(Matthew Auld):
>    - keep drm_buddy_alloc_range() function implementation for generic
>      actual range allocations
>    - keep alloc_range() implementation for end bias allocations
> 
> v5(Matthew Auld):
>    - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
>      as place->lpfn will currently always be zero for i915
> 
> v6(Matthew Auld):
>    - fixup potential uaf - If we are unlucky and can't allocate
>      enough memory when splitting blocks, where we temporarily
>      end up with the given block and its buddy on the respective
>      free list, then we need to ensure we delete both blocks,
>      and no just the buddy, before potentially freeing them
> 
>    - fix warnings reported by kernel test robot <lkp@intel.com>
> 
> v7(Matthew Auld):
>    - revert fixup potential uaf
>    - keep __alloc_range() add node to the list logic same as
>      drm_buddy_alloc_blocks() by having a temporary list variable
>    - at drm_buddy_alloc_blocks() keep i915 range_overflows macro
>      and add a new check for end variable
> 
> v8:
>    - fix warnings reported by kernel test robot <lkp@intel.com>
> 
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/drm_buddy.c                   | 315 +++++++++++++-----
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
>   include/drm/drm_buddy.h                       |  13 +-
>   4 files changed, 280 insertions(+), 117 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index d60878bc9c20..cfc160a1ef1a 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
>   }
>   EXPORT_SYMBOL(drm_buddy_free_list);
>   
> -/**
> - * drm_buddy_alloc_blocks - allocate power-of-two blocks
> - *
> - * @mm: DRM buddy manager to allocate from
> - * @order: size of the allocation
> - *
> - * The order value here translates to:
> - *
> - * 0 = 2^0 * mm->chunk_size
> - * 1 = 2^1 * mm->chunk_size
> - * 2 = 2^2 * mm->chunk_size
> - *
> - * Returns:
> - * allocated ptr to the &drm_buddy_block on success
> - */
> -struct drm_buddy_block *
> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
> +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> +{
> +	return s1 <= e2 && e1 >= s2;
> +}
> +
> +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
> +{
> +	return s1 <= s2 && e1 >= e2;
> +}
> +
> +static struct drm_buddy_block *
> +alloc_range_bias(struct drm_buddy *mm,
> +		 u64 start, u64 end,
> +		 unsigned int order)
> +{
> +	struct drm_buddy_block *block;
> +	struct drm_buddy_block *buddy;
> +	LIST_HEAD(dfs);
> +	int err;
> +	int i;
> +
> +	end = end - 1;
> +
> +	for (i = 0; i < mm->n_roots; ++i)
> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
> +
> +	do {
> +		u64 block_start;
> +		u64 block_end;
> +
> +		block = list_first_entry_or_null(&dfs,
> +						 struct drm_buddy_block,
> +						 tmp_link);
> +		if (!block)
> +			break;
> +
> +		list_del(&block->tmp_link);
> +
> +		if (drm_buddy_block_order(block) < order)
> +			continue;
> +
> +		block_start = drm_buddy_block_offset(block);
> +		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
> +
> +		if (!overlaps(start, end, block_start, block_end))
> +			continue;
> +
> +		if (drm_buddy_block_is_allocated(block))
> +			continue;
> +
> +		if (contains(start, end, block_start, block_end) &&
> +		    order == drm_buddy_block_order(block)) {
> +			/*
> +			 * Find the free block within the range.
> +			 */
> +			if (drm_buddy_block_is_free(block))
> +				return block;
> +
> +			continue;
> +		}
> +
> +		if (!drm_buddy_block_is_split(block)) {
> +			err = split_block(mm, block);
> +			if (unlikely(err))
> +				goto err_undo;
> +		}
> +
> +		list_add(&block->right->tmp_link, &dfs);
> +		list_add(&block->left->tmp_link, &dfs);
> +	} while (1);
> +
> +	return ERR_PTR(-ENOSPC);
> +
> +err_undo:
> +	/*
> +	 * We really don't want to leave around a bunch of split blocks, since
> +	 * bigger is better, so make sure we merge everything back before we
> +	 * free the allocated blocks.
> +	 */
> +	buddy = get_buddy(block);
> +	if (buddy &&
> +	    (drm_buddy_block_is_free(block) &&
> +	     drm_buddy_block_is_free(buddy)))
> +		__drm_buddy_free(mm, block);
> +	return ERR_PTR(err);
> +}
> +
> +static struct drm_buddy_block *
> +alloc_from_freelist(struct drm_buddy *mm,
> +		    unsigned int order,
> +		    unsigned long flags)
>   {
>   	struct drm_buddy_block *block = NULL;
>   	unsigned int i;
> @@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>   	while (i != order) {
>   		err = split_block(mm, block);
>   		if (unlikely(err))
> -			goto out_free;
> +			goto err_undo;
>   
> -		/* Go low */
> -		block = block->left;
> +		block = block->right;
>   		i--;
>   	}
> -
> -	mark_allocated(block);
> -	mm->avail -= drm_buddy_block_size(mm, block);
> -	kmemleak_update_trace(block);
>   	return block;
>   
> -out_free:
> +err_undo:
>   	if (i != order)
>   		__drm_buddy_free(mm, block);
>   	return ERR_PTR(err);
>   }
> -EXPORT_SYMBOL(drm_buddy_alloc_blocks);
> -
> -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> -{
> -	return s1 <= e2 && e1 >= s2;
> -}
>   
> -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
> -{
> -	return s1 <= s2 && e1 >= e2;
> -}
> -
> -/**
> - * drm_buddy_alloc_range - allocate range
> - *
> - * @mm: DRM buddy manager to allocate from
> - * @blocks: output list head to add allocated blocks
> - * @start: start of the allowed range for this block
> - * @size: size of the allocation
> - *
> - * Intended for pre-allocating portions of the address space, for example to
> - * reserve a block for the initial framebuffer or similar, hence the expectation
> - * here is that drm_buddy_alloc_blocks() is still the main vehicle for
> - * allocations, so if that's not the case then the drm_mm range allocator is
> - * probably a much better fit, and so you should probably go use that instead.
> - *
> - * Note that it's safe to chain together multiple alloc_ranges
> - * with the same blocks list
> - *
> - * Returns:
> - * 0 on success, error code on failure.
> - */
> -int drm_buddy_alloc_range(struct drm_buddy *mm,
> -			  struct list_head *blocks,
> -			  u64 start, u64 size)
> +static int __alloc_range(struct drm_buddy *mm,
> +			 struct list_head *dfs,
> +			 u64 start, u64 size,
> +			 struct list_head *blocks)
>   {
>   	struct drm_buddy_block *block;
>   	struct drm_buddy_block *buddy;
>   	LIST_HEAD(allocated);
> -	LIST_HEAD(dfs);
>   	u64 end;
>   	int err;
> -	int i;
> -
> -	if (size < mm->chunk_size)
> -		return -EINVAL;
> -
> -	if (!IS_ALIGNED(size | start, mm->chunk_size))
> -		return -EINVAL;
> -
> -	if (range_overflows(start, size, mm->size))
> -		return -EINVAL;
> -
> -	for (i = 0; i < mm->n_roots; ++i)
> -		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>   
>   	end = start + size - 1;
>   
> @@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>   		u64 block_start;
>   		u64 block_end;
>   
> -		block = list_first_entry_or_null(&dfs,
> +		block = list_first_entry_or_null(dfs,
>   						 struct drm_buddy_block,
>   						 tmp_link);
>   		if (!block)
> @@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>   				goto err_undo;
>   		}
>   
> -		list_add(&block->right->tmp_link, &dfs);
> -		list_add(&block->left->tmp_link, &dfs);
> +		list_add(&block->right->tmp_link, dfs);
> +		list_add(&block->left->tmp_link, dfs);
>   	} while (1);
>   
>   	list_splice_tail(&allocated, blocks);
> @@ -459,7 +484,143 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>   	drm_buddy_free_list(mm, &allocated);
>   	return err;
>   }
> -EXPORT_SYMBOL(drm_buddy_alloc_range);
> +
> +/**
> + * __drm_buddy_alloc_range - actual range allocation
> + *
> + * @mm: DRM buddy manager to allocate from
> + * @start: start of the allowed range for this block
> + * @size: size of the allocation
> + * @blocks: output list head to add allocated blocks

I guess no need to add kernel-doc for static functions.

> + *
> + * Intended for pre-allocating portions of the address space, for example to
> + * reserve a block for the initial framebuffer or similar
> + *
> + * Note that it's safe to chain together multiple alloc_ranges
> + * with the same blocks list
> + *
> + * Returns:
> + * 0 on success, error code on failure.
> + */
> +static int __drm_buddy_alloc_range(struct drm_buddy *mm,
> +				   u64 start,
> +				   u64 size,
> +				   struct list_head *blocks)
> +{
> +	LIST_HEAD(dfs);
> +	int i;
> +
> +	for (i = 0; i < mm->n_roots; ++i)
> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
> +
> +	return __alloc_range(mm, &dfs, start, size, blocks);
> +}
> +
> +/**
> + * drm_buddy_alloc_blocks - allocate power-of-two blocks
> + *
> + * @mm: DRM buddy manager to allocate from
> + * @start: start of the allowed range for this block
> + * @end: end of the allowed range for this block
> + * @size: size of the allocation
> + * @min_page_size: alignment of the allocation
> + * @blocks: output list head to add allocated blocks
> + * @flags: DRM_BUDDY_*_ALLOCATION flags
> + *
> + * alloc_range_bias() called on range limitations, which traverses
> + * the tree and returns the desired block.
> + *
> + * alloc_from_freelist() called when *no* range restrictions
> + * are enforced, which picks the block from the freelist.
> + *
> + * blocks are allocated in order, the order value here translates to:
> + *
> + * 0 = 2^0 * mm->chunk_size
> + * 1 = 2^1 * mm->chunk_size
> + * 2 = 2^2 * mm->chunk_size

The order stuff here can be dropped I think, since this is now just 'size'.

> + *
> + * Returns:
> + * 0 on success, error code on failure.
> + */
> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> +			   u64 start, u64 end, u64 size,
> +			   u64 min_page_size,
> +			   struct list_head *blocks,
> +			   unsigned long flags)
> +{
> +	struct drm_buddy_block *block = NULL;
> +	unsigned int min_order, order;
> +	unsigned long pages;
> +	LIST_HEAD(allocated);
> +	int err;
> +
> +	if (size < mm->chunk_size)
> +		return -EINVAL;
> +
> +	if (min_page_size < mm->chunk_size)
> +		return -EINVAL;
> +
> +	if (!is_power_of_2(min_page_size))
> +		return -EINVAL;
> +
> +	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
> +		return -EINVAL;
> +
> +	if (end > mm->size)
> +		return -EINVAL;
> +
> +	if (range_overflows(start, size, mm->size))
> +		return -EINVAL;
> +
> +	/* Actual range allocation */
> +	if (start + size == end)
> +		return __drm_buddy_alloc_range(mm, start, size, blocks);
> +
> +	pages = size >> ilog2(mm->chunk_size);
> +	order = fls(pages) - 1;
> +	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +
> +	do {
> +		order = min(order, (unsigned int)fls(pages) - 1);
> +		BUG_ON(order > mm->max_order);
> +		BUG_ON(order < min_order);
> +
> +		do {
> +			if (flags & DRM_BUDDY_RANGE_ALLOCATION)

Do we need an explicit flag for this? Can we not just check:

if (start || end < size)

We already implicitly determine the "actual range allocation" above. So 
that would just leave the TODPOWN flag?

Otherwise,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>

> +				/* Allocate traversing within the range */
> +				block = alloc_range_bias(mm, start, end, order);
> +			else
> +				/* Allocate from freelist */
> +				block = alloc_from_freelist(mm, order, flags);
> +
> +			if (!IS_ERR(block))
> +				break;
> +
> +			if (order-- == min_order) {
> +				err = -ENOSPC;
> +				goto err_free;
> +			}
> +		} while (1);
> +
> +		mark_allocated(block);
> +		mm->avail -= drm_buddy_block_size(mm, block);
> +		kmemleak_update_trace(block);
> +		list_add_tail(&block->link, &allocated);
> +
> +		pages -= BIT(order);
> +
> +		if (!pages)
> +			break;
> +	} while (1);
> +
> +	list_splice_tail(&allocated, blocks);
> +	return 0;
> +
> +err_free:
> +	drm_buddy_free_list(mm, &allocated);
> +	return err;
> +}
> +EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>   
>   /**
>    * drm_buddy_block_print - print block information
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 247714bab044..a328a38fab07 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>   	struct i915_ttm_buddy_resource *bman_res;
>   	struct drm_buddy *mm = &bman->mm;
> -	unsigned long n_pages;
> -	unsigned int min_order;
> +	unsigned long n_pages, lpfn;
>   	u64 min_page_size;
>   	u64 size;
>   	int err;
>   
> -	GEM_BUG_ON(place->fpfn || place->lpfn);
> +	lpfn = place->lpfn;
> +	if (!lpfn)
> +		lpfn = man->size;
>   
>   	bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
>   	if (!bman_res)
> @@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	INIT_LIST_HEAD(&bman_res->blocks);
>   	bman_res->mm = mm;
>   
> +	if (place->fpfn || lpfn != man->size)
> +		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
>   	GEM_BUG_ON(!bman_res->base.num_pages);
>   	size = bman_res->base.num_pages << PAGE_SHIFT;
>   
> @@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   		min_page_size = bo->page_alignment << PAGE_SHIFT;
>   
>   	GEM_BUG_ON(min_page_size < mm->chunk_size);
> -	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +
>   	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +		unsigned long pages;
> +
>   		size = roundup_pow_of_two(size);
> -		min_order = ilog2(size) - ilog2(mm->chunk_size);
> +		min_page_size = size;
> +
> +		pages = size >> ilog2(mm->chunk_size);
> +		if (pages > lpfn)
> +			lpfn = pages;
>   	}
>   
>   	if (size > mm->size) {
> @@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   
>   	n_pages = size >> ilog2(mm->chunk_size);
>   
> -	do {
> -		struct drm_buddy_block *block;
> -		unsigned int order;
> -
> -		order = fls(n_pages) - 1;
> -		GEM_BUG_ON(order > mm->max_order);
> -		GEM_BUG_ON(order < min_order);
> -
> -		do {
> -			mutex_lock(&bman->lock);
> -			block = drm_buddy_alloc_blocks(mm, order);
> -			mutex_unlock(&bman->lock);
> -			if (!IS_ERR(block))
> -				break;
> -
> -			if (order-- == min_order) {
> -				err = -ENOSPC;
> -				goto err_free_blocks;
> -			}
> -		} while (1);
> -
> -		n_pages -= BIT(order);
> -
> -		list_add_tail(&block->link, &bman_res->blocks);
> -
> -		if (!n_pages)
> -			break;
> -	} while (1);
> +	mutex_lock(&bman->lock);
> +	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> +				     (u64)lpfn << PAGE_SHIFT,
> +				     (u64)n_pages << PAGE_SHIFT,
> +				     min_page_size,
> +				     &bman_res->blocks,
> +				     bman_res->flags);
> +	mutex_unlock(&bman->lock);
> +	if (unlikely(err))
> +		goto err_free_blocks;
>   
>   	*res = &bman_res->base;
>   	return 0;
> @@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
>   {
>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>   	struct drm_buddy *mm = &bman->mm;
> +	unsigned long flags = 0;
>   	int ret;
>   
> +	flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
>   	mutex_lock(&bman->lock);
> -	ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
> +	ret = drm_buddy_alloc_blocks(mm, start,
> +				     start + size,
> +				     size, mm->chunk_size,
> +				     &bman->reserved,
> +				     flags);
>   	mutex_unlock(&bman->lock);
>   
>   	return ret;
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> index 312077941411..72c90b432e87 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> @@ -20,6 +20,7 @@ struct drm_buddy;
>    *
>    * @base: struct ttm_resource base class we extend
>    * @blocks: the list of struct i915_buddy_block for this resource/allocation
> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>    * @mm: the struct i915_buddy_mm for this resource
>    *
>    * Extends the struct ttm_resource to manage an address space allocation with
> @@ -28,6 +29,7 @@ struct drm_buddy;
>   struct i915_ttm_buddy_resource {
>   	struct ttm_resource base;
>   	struct list_head blocks;
> +	unsigned long flags;
>   	struct drm_buddy *mm;
>   };
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index f524db152413..54f25a372f27 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -22,6 +22,8 @@
>   	start__ >= max__ || size__ > max__ - start__; \
>   })
>   
> +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
> +
>   struct drm_buddy_block {
>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
> @@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
>   
>   void drm_buddy_fini(struct drm_buddy *mm);
>   
> -struct drm_buddy_block *
> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
> -
> -int drm_buddy_alloc_range(struct drm_buddy *mm,
> -			  struct list_head *blocks,
> -			  u64 start, u64 size);
> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> +			   u64 start, u64 end, u64 size,
> +			   u64 min_page_size,
> +			   struct list_head *blocks,
> +			   unsigned long flags);
>   
>   void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
>   
> 
> base-commit: 627e2885b51ab503a98aa89f0a0bd68416c731fc

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v11 1/5] drm: improve drm_buddy_alloc function
@ 2022-02-10 16:10   ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2022-02-10 16:10 UTC (permalink / raw)
  To: Arunpravin, dri-devel, intel-gfx, amd-gfx
  Cc: alexander.deucher, tzimmermann, jani.nikula, christian.koenig, daniel

On 27/01/2022 14:11, Arunpravin wrote:
> - Make drm_buddy_alloc a single function to handle
>    range allocation and non-range allocation demands
> 
> - Implemented a new function alloc_range() which allocates
>    the requested power-of-two block comply with range limitations
> 
> - Moved order computation and memory alignment logic from
>    i915 driver to drm buddy
> 
> v2:
>    merged below changes to keep the build unbroken
>     - drm_buddy_alloc_range() becomes obsolete and may be removed
>     - enable ttm range allocation (fpfn / lpfn) support in i915 driver
>     - apply enhanced drm_buddy_alloc() function to i915 driver
> 
> v3(Matthew Auld):
>    - Fix alignment issues and remove unnecessary list_empty check
>    - add more validation checks for input arguments
>    - make alloc_range() block allocations as bottom-up
>    - optimize order computation logic
>    - replace uint64_t with u64, which is preferred in the kernel
> 
> v4(Matthew Auld):
>    - keep drm_buddy_alloc_range() function implementation for generic
>      actual range allocations
>    - keep alloc_range() implementation for end bias allocations
> 
> v5(Matthew Auld):
>    - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
>      as place->lpfn will currently always be zero for i915
> 
> v6(Matthew Auld):
>    - fixup potential uaf - If we are unlucky and can't allocate
>      enough memory when splitting blocks, where we temporarily
>      end up with the given block and its buddy on the respective
>      free list, then we need to ensure we delete both blocks,
>      and no just the buddy, before potentially freeing them
> 
>    - fix warnings reported by kernel test robot <lkp@intel.com>
> 
> v7(Matthew Auld):
>    - revert fixup potential uaf
>    - keep __alloc_range() add node to the list logic same as
>      drm_buddy_alloc_blocks() by having a temporary list variable
>    - at drm_buddy_alloc_blocks() keep i915 range_overflows macro
>      and add a new check for end variable
> 
> v8:
>    - fix warnings reported by kernel test robot <lkp@intel.com>
> 
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/drm_buddy.c                   | 315 +++++++++++++-----
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
>   include/drm/drm_buddy.h                       |  13 +-
>   4 files changed, 280 insertions(+), 117 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index d60878bc9c20..cfc160a1ef1a 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
>   }
>   EXPORT_SYMBOL(drm_buddy_free_list);
>   
> -/**
> - * drm_buddy_alloc_blocks - allocate power-of-two blocks
> - *
> - * @mm: DRM buddy manager to allocate from
> - * @order: size of the allocation
> - *
> - * The order value here translates to:
> - *
> - * 0 = 2^0 * mm->chunk_size
> - * 1 = 2^1 * mm->chunk_size
> - * 2 = 2^2 * mm->chunk_size
> - *
> - * Returns:
> - * allocated ptr to the &drm_buddy_block on success
> - */
> -struct drm_buddy_block *
> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
> +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> +{
> +	return s1 <= e2 && e1 >= s2;
> +}
> +
> +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
> +{
> +	return s1 <= s2 && e1 >= e2;
> +}
> +
> +static struct drm_buddy_block *
> +alloc_range_bias(struct drm_buddy *mm,
> +		 u64 start, u64 end,
> +		 unsigned int order)
> +{
> +	struct drm_buddy_block *block;
> +	struct drm_buddy_block *buddy;
> +	LIST_HEAD(dfs);
> +	int err;
> +	int i;
> +
> +	end = end - 1;
> +
> +	for (i = 0; i < mm->n_roots; ++i)
> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
> +
> +	do {
> +		u64 block_start;
> +		u64 block_end;
> +
> +		block = list_first_entry_or_null(&dfs,
> +						 struct drm_buddy_block,
> +						 tmp_link);
> +		if (!block)
> +			break;
> +
> +		list_del(&block->tmp_link);
> +
> +		if (drm_buddy_block_order(block) < order)
> +			continue;
> +
> +		block_start = drm_buddy_block_offset(block);
> +		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
> +
> +		if (!overlaps(start, end, block_start, block_end))
> +			continue;
> +
> +		if (drm_buddy_block_is_allocated(block))
> +			continue;
> +
> +		if (contains(start, end, block_start, block_end) &&
> +		    order == drm_buddy_block_order(block)) {
> +			/*
> +			 * Find the free block within the range.
> +			 */
> +			if (drm_buddy_block_is_free(block))
> +				return block;
> +
> +			continue;
> +		}
> +
> +		if (!drm_buddy_block_is_split(block)) {
> +			err = split_block(mm, block);
> +			if (unlikely(err))
> +				goto err_undo;
> +		}
> +
> +		list_add(&block->right->tmp_link, &dfs);
> +		list_add(&block->left->tmp_link, &dfs);
> +	} while (1);
> +
> +	return ERR_PTR(-ENOSPC);
> +
> +err_undo:
> +	/*
> +	 * We really don't want to leave around a bunch of split blocks, since
> +	 * bigger is better, so make sure we merge everything back before we
> +	 * free the allocated blocks.
> +	 */
> +	buddy = get_buddy(block);
> +	if (buddy &&
> +	    (drm_buddy_block_is_free(block) &&
> +	     drm_buddy_block_is_free(buddy)))
> +		__drm_buddy_free(mm, block);
> +	return ERR_PTR(err);
> +}
> +
> +static struct drm_buddy_block *
> +alloc_from_freelist(struct drm_buddy *mm,
> +		    unsigned int order,
> +		    unsigned long flags)
>   {
>   	struct drm_buddy_block *block = NULL;
>   	unsigned int i;
> @@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>   	while (i != order) {
>   		err = split_block(mm, block);
>   		if (unlikely(err))
> -			goto out_free;
> +			goto err_undo;
>   
> -		/* Go low */
> -		block = block->left;
> +		block = block->right;
>   		i--;
>   	}
> -
> -	mark_allocated(block);
> -	mm->avail -= drm_buddy_block_size(mm, block);
> -	kmemleak_update_trace(block);
>   	return block;
>   
> -out_free:
> +err_undo:
>   	if (i != order)
>   		__drm_buddy_free(mm, block);
>   	return ERR_PTR(err);
>   }
> -EXPORT_SYMBOL(drm_buddy_alloc_blocks);
> -
> -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> -{
> -	return s1 <= e2 && e1 >= s2;
> -}
>   
> -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
> -{
> -	return s1 <= s2 && e1 >= e2;
> -}
> -
> -/**
> - * drm_buddy_alloc_range - allocate range
> - *
> - * @mm: DRM buddy manager to allocate from
> - * @blocks: output list head to add allocated blocks
> - * @start: start of the allowed range for this block
> - * @size: size of the allocation
> - *
> - * Intended for pre-allocating portions of the address space, for example to
> - * reserve a block for the initial framebuffer or similar, hence the expectation
> - * here is that drm_buddy_alloc_blocks() is still the main vehicle for
> - * allocations, so if that's not the case then the drm_mm range allocator is
> - * probably a much better fit, and so you should probably go use that instead.
> - *
> - * Note that it's safe to chain together multiple alloc_ranges
> - * with the same blocks list
> - *
> - * Returns:
> - * 0 on success, error code on failure.
> - */
> -int drm_buddy_alloc_range(struct drm_buddy *mm,
> -			  struct list_head *blocks,
> -			  u64 start, u64 size)
> +static int __alloc_range(struct drm_buddy *mm,
> +			 struct list_head *dfs,
> +			 u64 start, u64 size,
> +			 struct list_head *blocks)
>   {
>   	struct drm_buddy_block *block;
>   	struct drm_buddy_block *buddy;
>   	LIST_HEAD(allocated);
> -	LIST_HEAD(dfs);
>   	u64 end;
>   	int err;
> -	int i;
> -
> -	if (size < mm->chunk_size)
> -		return -EINVAL;
> -
> -	if (!IS_ALIGNED(size | start, mm->chunk_size))
> -		return -EINVAL;
> -
> -	if (range_overflows(start, size, mm->size))
> -		return -EINVAL;
> -
> -	for (i = 0; i < mm->n_roots; ++i)
> -		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>   
>   	end = start + size - 1;
>   
> @@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>   		u64 block_start;
>   		u64 block_end;
>   
> -		block = list_first_entry_or_null(&dfs,
> +		block = list_first_entry_or_null(dfs,
>   						 struct drm_buddy_block,
>   						 tmp_link);
>   		if (!block)
> @@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>   				goto err_undo;
>   		}
>   
> -		list_add(&block->right->tmp_link, &dfs);
> -		list_add(&block->left->tmp_link, &dfs);
> +		list_add(&block->right->tmp_link, dfs);
> +		list_add(&block->left->tmp_link, dfs);
>   	} while (1);
>   
>   	list_splice_tail(&allocated, blocks);
> @@ -459,7 +484,143 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>   	drm_buddy_free_list(mm, &allocated);
>   	return err;
>   }
> -EXPORT_SYMBOL(drm_buddy_alloc_range);
> +
> +/**
> + * __drm_buddy_alloc_range - actual range allocation
> + *
> + * @mm: DRM buddy manager to allocate from
> + * @start: start of the allowed range for this block
> + * @size: size of the allocation
> + * @blocks: output list head to add allocated blocks

I guess no need to add kernel-doc for static functions.

> + *
> + * Intended for pre-allocating portions of the address space, for example to
> + * reserve a block for the initial framebuffer or similar
> + *
> + * Note that it's safe to chain together multiple alloc_ranges
> + * with the same blocks list
> + *
> + * Returns:
> + * 0 on success, error code on failure.
> + */
> +static int __drm_buddy_alloc_range(struct drm_buddy *mm,
> +				   u64 start,
> +				   u64 size,
> +				   struct list_head *blocks)
> +{
> +	LIST_HEAD(dfs);
> +	int i;
> +
> +	for (i = 0; i < mm->n_roots; ++i)
> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
> +
> +	return __alloc_range(mm, &dfs, start, size, blocks);
> +}
> +
> +/**
> + * drm_buddy_alloc_blocks - allocate power-of-two blocks
> + *
> + * @mm: DRM buddy manager to allocate from
> + * @start: start of the allowed range for this block
> + * @end: end of the allowed range for this block
> + * @size: size of the allocation
> + * @min_page_size: alignment of the allocation
> + * @blocks: output list head to add allocated blocks
> + * @flags: DRM_BUDDY_*_ALLOCATION flags
> + *
> + * alloc_range_bias() called on range limitations, which traverses
> + * the tree and returns the desired block.
> + *
> + * alloc_from_freelist() called when *no* range restrictions
> + * are enforced, which picks the block from the freelist.
> + *
> + * blocks are allocated in order, the order value here translates to:
> + *
> + * 0 = 2^0 * mm->chunk_size
> + * 1 = 2^1 * mm->chunk_size
> + * 2 = 2^2 * mm->chunk_size

The order stuff here can be dropped I think, since this is now just 'size'.

> + *
> + * Returns:
> + * 0 on success, error code on failure.
> + */
> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> +			   u64 start, u64 end, u64 size,
> +			   u64 min_page_size,
> +			   struct list_head *blocks,
> +			   unsigned long flags)
> +{
> +	struct drm_buddy_block *block = NULL;
> +	unsigned int min_order, order;
> +	unsigned long pages;
> +	LIST_HEAD(allocated);
> +	int err;
> +
> +	if (size < mm->chunk_size)
> +		return -EINVAL;
> +
> +	if (min_page_size < mm->chunk_size)
> +		return -EINVAL;
> +
> +	if (!is_power_of_2(min_page_size))
> +		return -EINVAL;
> +
> +	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
> +		return -EINVAL;
> +
> +	if (end > mm->size)
> +		return -EINVAL;
> +
> +	if (range_overflows(start, size, mm->size))
> +		return -EINVAL;
> +
> +	/* Actual range allocation */
> +	if (start + size == end)
> +		return __drm_buddy_alloc_range(mm, start, size, blocks);
> +
> +	pages = size >> ilog2(mm->chunk_size);
> +	order = fls(pages) - 1;
> +	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +
> +	do {
> +		order = min(order, (unsigned int)fls(pages) - 1);
> +		BUG_ON(order > mm->max_order);
> +		BUG_ON(order < min_order);
> +
> +		do {
> +			if (flags & DRM_BUDDY_RANGE_ALLOCATION)

Do we need an explicit flag for this? Can we not just check:

if (start || end < size)

We already implicitly determine the "actual range allocation" above. So 
that would just leave the TODPOWN flag?

Otherwise,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>

> +				/* Allocate traversing within the range */
> +				block = alloc_range_bias(mm, start, end, order);
> +			else
> +				/* Allocate from freelist */
> +				block = alloc_from_freelist(mm, order, flags);
> +
> +			if (!IS_ERR(block))
> +				break;
> +
> +			if (order-- == min_order) {
> +				err = -ENOSPC;
> +				goto err_free;
> +			}
> +		} while (1);
> +
> +		mark_allocated(block);
> +		mm->avail -= drm_buddy_block_size(mm, block);
> +		kmemleak_update_trace(block);
> +		list_add_tail(&block->link, &allocated);
> +
> +		pages -= BIT(order);
> +
> +		if (!pages)
> +			break;
> +	} while (1);
> +
> +	list_splice_tail(&allocated, blocks);
> +	return 0;
> +
> +err_free:
> +	drm_buddy_free_list(mm, &allocated);
> +	return err;
> +}
> +EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>   
>   /**
>    * drm_buddy_block_print - print block information
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 247714bab044..a328a38fab07 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>   	struct i915_ttm_buddy_resource *bman_res;
>   	struct drm_buddy *mm = &bman->mm;
> -	unsigned long n_pages;
> -	unsigned int min_order;
> +	unsigned long n_pages, lpfn;
>   	u64 min_page_size;
>   	u64 size;
>   	int err;
>   
> -	GEM_BUG_ON(place->fpfn || place->lpfn);
> +	lpfn = place->lpfn;
> +	if (!lpfn)
> +		lpfn = man->size;
>   
>   	bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
>   	if (!bman_res)
> @@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	INIT_LIST_HEAD(&bman_res->blocks);
>   	bman_res->mm = mm;
>   
> +	if (place->fpfn || lpfn != man->size)
> +		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
>   	GEM_BUG_ON(!bman_res->base.num_pages);
>   	size = bman_res->base.num_pages << PAGE_SHIFT;
>   
> @@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   		min_page_size = bo->page_alignment << PAGE_SHIFT;
>   
>   	GEM_BUG_ON(min_page_size < mm->chunk_size);
> -	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +
>   	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> +		unsigned long pages;
> +
>   		size = roundup_pow_of_two(size);
> -		min_order = ilog2(size) - ilog2(mm->chunk_size);
> +		min_page_size = size;
> +
> +		pages = size >> ilog2(mm->chunk_size);
> +		if (pages > lpfn)
> +			lpfn = pages;
>   	}
>   
>   	if (size > mm->size) {
> @@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   
>   	n_pages = size >> ilog2(mm->chunk_size);
>   
> -	do {
> -		struct drm_buddy_block *block;
> -		unsigned int order;
> -
> -		order = fls(n_pages) - 1;
> -		GEM_BUG_ON(order > mm->max_order);
> -		GEM_BUG_ON(order < min_order);
> -
> -		do {
> -			mutex_lock(&bman->lock);
> -			block = drm_buddy_alloc_blocks(mm, order);
> -			mutex_unlock(&bman->lock);
> -			if (!IS_ERR(block))
> -				break;
> -
> -			if (order-- == min_order) {
> -				err = -ENOSPC;
> -				goto err_free_blocks;
> -			}
> -		} while (1);
> -
> -		n_pages -= BIT(order);
> -
> -		list_add_tail(&block->link, &bman_res->blocks);
> -
> -		if (!n_pages)
> -			break;
> -	} while (1);
> +	mutex_lock(&bman->lock);
> +	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> +				     (u64)lpfn << PAGE_SHIFT,
> +				     (u64)n_pages << PAGE_SHIFT,
> +				     min_page_size,
> +				     &bman_res->blocks,
> +				     bman_res->flags);
> +	mutex_unlock(&bman->lock);
> +	if (unlikely(err))
> +		goto err_free_blocks;
>   
>   	*res = &bman_res->base;
>   	return 0;
> @@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
>   {
>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>   	struct drm_buddy *mm = &bman->mm;
> +	unsigned long flags = 0;
>   	int ret;
>   
> +	flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
>   	mutex_lock(&bman->lock);
> -	ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
> +	ret = drm_buddy_alloc_blocks(mm, start,
> +				     start + size,
> +				     size, mm->chunk_size,
> +				     &bman->reserved,
> +				     flags);
>   	mutex_unlock(&bman->lock);
>   
>   	return ret;
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> index 312077941411..72c90b432e87 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> @@ -20,6 +20,7 @@ struct drm_buddy;
>    *
>    * @base: struct ttm_resource base class we extend
>    * @blocks: the list of struct i915_buddy_block for this resource/allocation
> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>    * @mm: the struct i915_buddy_mm for this resource
>    *
>    * Extends the struct ttm_resource to manage an address space allocation with
> @@ -28,6 +29,7 @@ struct drm_buddy;
>   struct i915_ttm_buddy_resource {
>   	struct ttm_resource base;
>   	struct list_head blocks;
> +	unsigned long flags;
>   	struct drm_buddy *mm;
>   };
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index f524db152413..54f25a372f27 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -22,6 +22,8 @@
>   	start__ >= max__ || size__ > max__ - start__; \
>   })
>   
> +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
> +
>   struct drm_buddy_block {
>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
> @@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
>   
>   void drm_buddy_fini(struct drm_buddy *mm);
>   
> -struct drm_buddy_block *
> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
> -
> -int drm_buddy_alloc_range(struct drm_buddy *mm,
> -			  struct list_head *blocks,
> -			  u64 start, u64 size);
> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> +			   u64 start, u64 end, u64 size,
> +			   u64 min_page_size,
> +			   struct list_head *blocks,
> +			   unsigned long flags);
>   
>   void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
>   
> 
> base-commit: 627e2885b51ab503a98aa89f0a0bd68416c731fc

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
  2022-02-08 11:20           ` [Intel-gfx] " Arunpravin
@ 2022-02-10 17:52             ` Matthew Auld
  -1 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2022-02-10 17:52 UTC (permalink / raw)
  To: Arunpravin, Christian König, Matthew Auld
  Cc: Intel Graphics Development, ML dri-devel, amd-gfx list,
	tzimmermann, alexander.deucher, Christian König

On 08/02/2022 11:20, Arunpravin wrote:
> 
> 
> On 04/02/22 6:53 pm, Christian König wrote:
>> Am 04.02.22 um 12:22 schrieb Arunpravin:
>>> On 28/01/22 7:48 pm, Matthew Auld wrote:
>>>> On Thu, 27 Jan 2022 at 14:11, Arunpravin
>>>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>>>> - Remove drm_mm references and replace with drm buddy functionalities
>>>>> - Add res cursor support for drm buddy
>>>>>
>>>>> v2(Matthew Auld):
>>>>>     - replace spinlock with mutex as we call kmem_cache_zalloc
>>>>>       (..., GFP_KERNEL) in drm_buddy_alloc() function
>>>>>
>>>>>     - lock drm_buddy_block_trim() function as it calls
>>>>>       mark_free/mark_split are all globally visible
>>>>>
>>>>> v3(Matthew Auld):
>>>>>     - remove trim method error handling as we address the failure case
>>>>>       at drm_buddy_block_trim() function
>>>>>
>>>>> v4:
>>>>>     - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>>
>>>>> v5:
>>>>>     - fix merge conflict issue
>>>>>
>>>>> v6:
>>>>>     - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>>
>>>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/Kconfig                       |   1 +
>>>>>    .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>>>>    4 files changed, 231 insertions(+), 133 deletions(-)
>>>> <snip>
>>>>
>>>>> -/**
>>>>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>>>>> - *
>>>>> - * @mem: ttm_resource to update
>>>>> - * @node: just allocated node
>>>>> - *
>>>>> - * Calculate a virtual BO start address to easily check if everything is CPU
>>>>> - * accessible.
>>>>> - */
>>>>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>>>> -                                      struct drm_mm_node *node)
>>>>> -{
>>>>> -       unsigned long start;
>>>>> -
>>>>> -       start = node->start + node->size;
>>>>> -       if (start > mem->num_pages)
>>>>> -               start -= mem->num_pages;
>>>>> -       else
>>>>> -               start = 0;
>>>>> -       mem->start = max(mem->start, start);
>>>>> -}
>>>>> -
>>>>>    /**
>>>>>     * amdgpu_vram_mgr_new - allocate new ranges
>>>>>     *
>>>>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>                                  const struct ttm_place *place,
>>>>>                                  struct ttm_resource **res)
>>>>>    {
>>>>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>>>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>>>>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>>>>           struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>>>           struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>>>> -       struct ttm_range_mgr_node *node;
>>>>> -       struct drm_mm *mm = &mgr->mm;
>>>>> -       enum drm_mm_insert_mode mode;
>>>>> +       struct amdgpu_vram_mgr_node *node;
>>>>> +       struct drm_buddy *mm = &mgr->mm;
>>>>> +       struct drm_buddy_block *block;
>>>>>           unsigned i;
>>>>>           int r;
>>>>>
>>>>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>                   goto error_sub;
>>>>>           }
>>>>>
>>>>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>>                   pages_per_node = ~0ul;
>>>>> -               num_nodes = 1;
>>>>> -       } else {
>>>>> +       else {
>>>>>    #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>>>                   pages_per_node = HPAGE_PMD_NR;
>>>>>    #else
>>>>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>    #endif
>>>>>                   pages_per_node = max_t(uint32_t, pages_per_node,
>>>>>                                          tbo->page_alignment);
>>>>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>>>>           }
>>>>>
>>>>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>>>>> -                       GFP_KERNEL | __GFP_ZERO);
>>>>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>>>>           if (!node) {
>>>>>                   r = -ENOMEM;
>>>>>                   goto error_sub;
>>>>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>
>>>>>           ttm_resource_init(tbo, place, &node->base);
>>>>>
>>>>> -       mode = DRM_MM_INSERT_BEST;
>>>>> +       INIT_LIST_HEAD(&node->blocks);
>>>>> +
>>>>>           if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>>>> -               mode = DRM_MM_INSERT_HIGH;
>>>>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>>>>> +
>>>>> +       if (place->fpfn || lpfn != man->size)
>>>>> +               /* Allocate blocks in desired range */
>>>>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>>> +
>>>>> +       min_page_size = mgr->default_page_size;
>>>>> +       BUG_ON(min_page_size < mm->chunk_size);
>>>>>
>>>>>           pages_left = node->base.num_pages;
>>>>>
>>>>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>           pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>>>>
>>>>>           i = 0;
>>>>> -       spin_lock(&mgr->lock);
>>>>>           while (pages_left) {
>>>>> -               uint32_t alignment = tbo->page_alignment;
>>>>> -
>>>>>                   if (pages >= pages_per_node)
>>>>> -                       alignment = pages_per_node;
>>>>> -
>>>>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>>>>> -                                               alignment, 0, place->fpfn,
>>>>> -                                               lpfn, mode);
>>>>> -               if (unlikely(r)) {
>>>>> -                       if (pages > pages_per_node) {
>>>>> -                               if (is_power_of_2(pages))
>>>>> -                                       pages = pages / 2;
>>>>> -                               else
>>>>> -                                       pages = rounddown_pow_of_two(pages);
>>>>> -                               continue;
>>>>> -                       }
>>>>> -                       goto error_free;
>>>>> +                       pages = pages_per_node;
>>>>> +
>>>>> +               n_pages = pages;
>>>>> +
>>>>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>>> +                       n_pages = roundup_pow_of_two(n_pages);
>>>>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>>>>> +
>>>>> +                       if (n_pages > lpfn)
>>>>> +                               lpfn = n_pages;
>>>>>                   }
>>>>>
>>>>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>>>>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>>>>> +               mutex_lock(&mgr->lock);
>>>>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>>>>> +                                          (u64)lpfn << PAGE_SHIFT,
>>>>> +                                          (u64)n_pages << PAGE_SHIFT,
>>>>> +                                          min_page_size,
>>>>> +                                          &node->blocks,
>>>>> +                                          node->flags);
>>>>> +               mutex_unlock(&mgr->lock);
>>>>> +               if (unlikely(r))
>>>>> +                       goto error_free_blocks;
>>>>> +
>>>>>                   pages_left -= pages;
>>>>>                   ++i;
>>>>>
>>>>>                   if (pages > pages_left)
>>>>>                           pages = pages_left;
>>>>>           }
>>>>> -       spin_unlock(&mgr->lock);
>>>>> +
>>>>> +       /* Free unused pages for contiguous allocation */
>>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>>>>> +
>>>>> +               mutex_lock(&mgr->lock);
>>>>> +               drm_buddy_block_trim(mm,
>>>>> +                                    actual_size,
>>>>> +                                    &node->blocks);
>>>>> +               mutex_unlock(&mgr->lock);
>>>>> +       }
>>>>> +
>>>>> +       list_for_each_entry(block, &node->blocks, link)
>>>>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>>>>> +
>>>>> +       block = list_first_entry_or_null(&node->blocks,
>>>>> +                                        struct drm_buddy_block,
>>>>> +                                        link);
>>>>> +       if (!block) {
>>>>> +               r = -ENOENT;
>>>>> +               goto error_free_res;
>>>>> +       }
>>>>> +
>>>>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
>>>> Hmm, does this work? It looks like there are various places checking
>>>> that res->start + res->num_pages <= visible_size, which IIUC should
>>>> only return true when the entire object is placed in the mappable
>>>> portion. i915 is doing something similar. Also it looks like
>>>> ttm_resource_compat() is potentially relying on this, like when moving
>>>> something from non-mappable -> mappable in
>>>> amdgpu_bo_fault_reserve_notify()?
>>>>
>>>> Perhaps something like:
>>>>
>>>> if (vis_usage == num_pages)
>>>>       base.start = 0;
>>>> else
>>>>       base.start = visible_size;
>>>>
>>>> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
>>>>
>>> hmm, I wonder how it works, may be we didn't go through the corner case
>>> where res->start + res->num_pages > visible_size
>>>
>>> in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
>>> set the ttm_place.lpfn = visible_pfn at
>>> amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
>>> function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
>>> alloc_range_bias() in drm_buddy.c.
>>>
>>> Here we get blocks chained together in random order complying
>>> visible_pfn range. say for instance num_pages = 13
>>> we may get,
>>> Block 1 addr - 500 (order-3)
>>> Block 2 addr - 400 (order-2)
>>> Block 3 addr - 600 (order-0)
>>>
>>> I think currently base.start = Block 1 start address fetched from the
>>> list and the address 500 assigned to it, which is good for the resource
>>> access since we access the blocks using the list link
>>>
>>> But for the check res->start + res->num_pages <= visible_size in few
>>> places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
>>> doesn't work since the function looks for nodes in continuous address to
>>> calculate the start address. AFAIK, assigning the start address (400 +
>>> num_pages <= visible_size) mislead in our case since we use linked list
>>>
>>> how about replacing the check with a bool type return function which
>>> checks the each block start address + block size <= visible_size?
>>
>> Yeah, we already have that in the TTM code. It's just not used
>> everywhere IIRC.
> 
> Hi Christian,
> here we have a problem, many places in ttm and amdgpu, we are using the
> tbo->resource->start + bo->resource->num_pages operation, this doesn't
> work in case of drm buddy since it allocates blocks in different
> locations which are chained together using linked list.

AFAICT that was already the case with the existing code, since it looks 
like you can get an array of discontinuous drm_mm blocks. 
amdgpu_vram_mgr_virt_start() looks to be handling that by creating 
fake/virtual res->start offset, which is effectively the maximum end pfn 
over the range of drm_mm blocks(whilst also accounting for num_pages), 
and yes if there are multiple blocks then the res->start might be more 
of a "virtual" offset. AFAIK that scheme should also work as-is with the 
buddy.

>>
>> The node->start can just be set to the invalid offset for now and should
>> be removed as soon as we don't need it any more.
> Assigning the start block offset to resource->start doesn't work,
> If we set node->start to invalid offset, we get an incorrect value?
>>
>> Regards,
>> Christian.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu
@ 2022-02-10 17:52             ` Matthew Auld
  0 siblings, 0 replies; 34+ messages in thread
From: Matthew Auld @ 2022-02-10 17:52 UTC (permalink / raw)
  To: Arunpravin, Christian König, Matthew Auld
  Cc: Intel Graphics Development, ML dri-devel, amd-gfx list,
	tzimmermann, alexander.deucher, Christian König

On 08/02/2022 11:20, Arunpravin wrote:
> 
> 
> On 04/02/22 6:53 pm, Christian König wrote:
>> Am 04.02.22 um 12:22 schrieb Arunpravin:
>>> On 28/01/22 7:48 pm, Matthew Auld wrote:
>>>> On Thu, 27 Jan 2022 at 14:11, Arunpravin
>>>> <Arunpravin.PaneerSelvam@amd.com> wrote:
>>>>> - Remove drm_mm references and replace with drm buddy functionalities
>>>>> - Add res cursor support for drm buddy
>>>>>
>>>>> v2(Matthew Auld):
>>>>>     - replace spinlock with mutex as we call kmem_cache_zalloc
>>>>>       (..., GFP_KERNEL) in drm_buddy_alloc() function
>>>>>
>>>>>     - lock drm_buddy_block_trim() function as it calls
>>>>>       mark_free/mark_split are all globally visible
>>>>>
>>>>> v3(Matthew Auld):
>>>>>     - remove trim method error handling as we address the failure case
>>>>>       at drm_buddy_block_trim() function
>>>>>
>>>>> v4:
>>>>>     - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>>
>>>>> v5:
>>>>>     - fix merge conflict issue
>>>>>
>>>>> v6:
>>>>>     - fix warnings reported by kernel test robot <lkp@intel.com>
>>>>>
>>>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/Kconfig                       |   1 +
>>>>>    .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    |  97 +++++--
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   7 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 259 ++++++++++--------
>>>>>    4 files changed, 231 insertions(+), 133 deletions(-)
>>>> <snip>
>>>>
>>>>> -/**
>>>>> - * amdgpu_vram_mgr_virt_start - update virtual start address
>>>>> - *
>>>>> - * @mem: ttm_resource to update
>>>>> - * @node: just allocated node
>>>>> - *
>>>>> - * Calculate a virtual BO start address to easily check if everything is CPU
>>>>> - * accessible.
>>>>> - */
>>>>> -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>>>> -                                      struct drm_mm_node *node)
>>>>> -{
>>>>> -       unsigned long start;
>>>>> -
>>>>> -       start = node->start + node->size;
>>>>> -       if (start > mem->num_pages)
>>>>> -               start -= mem->num_pages;
>>>>> -       else
>>>>> -               start = 0;
>>>>> -       mem->start = max(mem->start, start);
>>>>> -}
>>>>> -
>>>>>    /**
>>>>>     * amdgpu_vram_mgr_new - allocate new ranges
>>>>>     *
>>>>> @@ -366,13 +357,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>                                  const struct ttm_place *place,
>>>>>                                  struct ttm_resource **res)
>>>>>    {
>>>>> -       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>>>> +       unsigned long lpfn, pages_per_node, pages_left, pages, n_pages;
>>>>> +       u64 vis_usage = 0, mem_bytes, max_bytes, min_page_size;
>>>>>           struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>>>           struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>>> -       uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>>>> -       struct ttm_range_mgr_node *node;
>>>>> -       struct drm_mm *mm = &mgr->mm;
>>>>> -       enum drm_mm_insert_mode mode;
>>>>> +       struct amdgpu_vram_mgr_node *node;
>>>>> +       struct drm_buddy *mm = &mgr->mm;
>>>>> +       struct drm_buddy_block *block;
>>>>>           unsigned i;
>>>>>           int r;
>>>>>
>>>>> @@ -391,10 +382,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>                   goto error_sub;
>>>>>           }
>>>>>
>>>>> -       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>>>                   pages_per_node = ~0ul;
>>>>> -               num_nodes = 1;
>>>>> -       } else {
>>>>> +       else {
>>>>>    #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>>>                   pages_per_node = HPAGE_PMD_NR;
>>>>>    #else
>>>>> @@ -403,11 +393,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>    #endif
>>>>>                   pages_per_node = max_t(uint32_t, pages_per_node,
>>>>>                                          tbo->page_alignment);
>>>>> -               num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>>>>>           }
>>>>>
>>>>> -       node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>>>>> -                       GFP_KERNEL | __GFP_ZERO);
>>>>> +       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>>>>           if (!node) {
>>>>>                   r = -ENOMEM;
>>>>>                   goto error_sub;
>>>>> @@ -415,9 +403,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>
>>>>>           ttm_resource_init(tbo, place, &node->base);
>>>>>
>>>>> -       mode = DRM_MM_INSERT_BEST;
>>>>> +       INIT_LIST_HEAD(&node->blocks);
>>>>> +
>>>>>           if (place->flags & TTM_PL_FLAG_TOPDOWN)
>>>>> -               mode = DRM_MM_INSERT_HIGH;
>>>>> +               node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>>>>> +
>>>>> +       if (place->fpfn || lpfn != man->size)
>>>>> +               /* Allocate blocks in desired range */
>>>>> +               node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>>>>> +
>>>>> +       min_page_size = mgr->default_page_size;
>>>>> +       BUG_ON(min_page_size < mm->chunk_size);
>>>>>
>>>>>           pages_left = node->base.num_pages;
>>>>>
>>>>> @@ -425,36 +421,61 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>>>           pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>>>>>
>>>>>           i = 0;
>>>>> -       spin_lock(&mgr->lock);
>>>>>           while (pages_left) {
>>>>> -               uint32_t alignment = tbo->page_alignment;
>>>>> -
>>>>>                   if (pages >= pages_per_node)
>>>>> -                       alignment = pages_per_node;
>>>>> -
>>>>> -               r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>>>>> -                                               alignment, 0, place->fpfn,
>>>>> -                                               lpfn, mode);
>>>>> -               if (unlikely(r)) {
>>>>> -                       if (pages > pages_per_node) {
>>>>> -                               if (is_power_of_2(pages))
>>>>> -                                       pages = pages / 2;
>>>>> -                               else
>>>>> -                                       pages = rounddown_pow_of_two(pages);
>>>>> -                               continue;
>>>>> -                       }
>>>>> -                       goto error_free;
>>>>> +                       pages = pages_per_node;
>>>>> +
>>>>> +               n_pages = pages;
>>>>> +
>>>>> +               if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>>> +                       n_pages = roundup_pow_of_two(n_pages);
>>>>> +                       min_page_size = (u64)n_pages << PAGE_SHIFT;
>>>>> +
>>>>> +                       if (n_pages > lpfn)
>>>>> +                               lpfn = n_pages;
>>>>>                   }
>>>>>
>>>>> -               vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
>>>>> -               amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
>>>>> +               mutex_lock(&mgr->lock);
>>>>> +               r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>>>>> +                                          (u64)lpfn << PAGE_SHIFT,
>>>>> +                                          (u64)n_pages << PAGE_SHIFT,
>>>>> +                                          min_page_size,
>>>>> +                                          &node->blocks,
>>>>> +                                          node->flags);
>>>>> +               mutex_unlock(&mgr->lock);
>>>>> +               if (unlikely(r))
>>>>> +                       goto error_free_blocks;
>>>>> +
>>>>>                   pages_left -= pages;
>>>>>                   ++i;
>>>>>
>>>>>                   if (pages > pages_left)
>>>>>                           pages = pages_left;
>>>>>           }
>>>>> -       spin_unlock(&mgr->lock);
>>>>> +
>>>>> +       /* Free unused pages for contiguous allocation */
>>>>> +       if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>>> +               u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
>>>>> +
>>>>> +               mutex_lock(&mgr->lock);
>>>>> +               drm_buddy_block_trim(mm,
>>>>> +                                    actual_size,
>>>>> +                                    &node->blocks);
>>>>> +               mutex_unlock(&mgr->lock);
>>>>> +       }
>>>>> +
>>>>> +       list_for_each_entry(block, &node->blocks, link)
>>>>> +               vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>>>>> +
>>>>> +       block = list_first_entry_or_null(&node->blocks,
>>>>> +                                        struct drm_buddy_block,
>>>>> +                                        link);
>>>>> +       if (!block) {
>>>>> +               r = -ENOENT;
>>>>> +               goto error_free_res;
>>>>> +       }
>>>>> +
>>>>> +       node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
>>>> Hmm, does this work? It looks like there are various places checking
>>>> that res->start + res->num_pages <= visible_size, which IIUC should
>>>> only return true when the entire object is placed in the mappable
>>>> portion. i915 is doing something similar. Also it looks like
>>>> ttm_resource_compat() is potentially relying on this, like when moving
>>>> something from non-mappable -> mappable in
>>>> amdgpu_bo_fault_reserve_notify()?
>>>>
>>>> Perhaps something like:
>>>>
>>>> if (vis_usage == num_pages)
>>>>       base.start = 0;
>>>> else
>>>>       base.start = visible_size;
>>>>
>>>> Otherwise I guess just keep amdgpu_vram_mgr_virt_start()?
>>>>
>>> hmm, I wonder how it works, may be we didn't go through the corner case
>>> where res->start + res->num_pages > visible_size
>>>
>>> in amdgpu if AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag is enabled, we
>>> set the ttm_place.lpfn = visible_pfn at
>>> amdgpu_bo_placement_from_domain(). Hence, in amdgpu_vram_mgr_new()
>>> function DRM_BUDDY_RANGE_ALLOCATION flag is enabled, which calls the
>>> alloc_range_bias() in drm_buddy.c.
>>>
>>> Here we get blocks chained together in random order complying
>>> visible_pfn range. say for instance num_pages = 13
>>> we may get,
>>> Block 1 addr - 500 (order-3)
>>> Block 2 addr - 400 (order-2)
>>> Block 3 addr - 600 (order-0)
>>>
>>> I think currently base.start = Block 1 start address fetched from the
>>> list and the address 500 assigned to it, which is good for the resource
>>> access since we access the blocks using the list link
>>>
>>> But for the check res->start + res->num_pages <= visible_size in few
>>> places, this doesn't work. AFAIK, keeping amdgpu_vram_mgr_virt_start()
>>> doesn't work since the function looks for nodes in continuous address to
>>> calculate the start address. AFAIK, assigning the start address (400 +
>>> num_pages <= visible_size) mislead in our case since we use linked list
>>>
>>> how about replacing the check with a bool type return function which
>>> checks the each block start address + block size <= visible_size?
>>
>> Yeah, we already have that in the TTM code. It's just not used
>> everywhere IIRC.
> 
> Hi Christian,
> here we have a problem, many places in ttm and amdgpu, we are using the
> tbo->resource->start + bo->resource->num_pages operation, this doesn't
> work in case of drm buddy since it allocates blocks in different
> locations which are chained together using linked list.

AFAICT that was already the case with the existing code, since it looks 
like you can get an array of discontinuous drm_mm blocks. 
amdgpu_vram_mgr_virt_start() looks to be handling that by creating 
fake/virtual res->start offset, which is effectively the maximum end pfn 
over the range of drm_mm blocks(whilst also accounting for num_pages), 
and yes if there are multiple blocks then the res->start might be more 
of a "virtual" offset. AFAIK that scheme should also work as-is with the 
buddy.

>>
>> The node->start can just be set to the invalid offset for now and should
>> be removed as soon as we don't need it any more.
> Assigning the start block offset to resource->start doesn't work,
> If we set node->start to invalid offset, we get an incorrect value?
>>
>> Regards,
>> Christian.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v11 1/5] drm: improve drm_buddy_alloc function
  2022-02-10 16:10   ` [Intel-gfx] " Matthew Auld
  (?)
@ 2022-02-11  9:27     ` Arunpravin
  -1 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-02-11  9:27 UTC (permalink / raw)
  To: Matthew Auld, dri-devel, intel-gfx, amd-gfx
  Cc: alexander.deucher, tzimmermann, christian.koenig



On 10/02/22 9:40 pm, Matthew Auld wrote:
> On 27/01/2022 14:11, Arunpravin wrote:
>> - Make drm_buddy_alloc a single function to handle
>>    range allocation and non-range allocation demands
>>
>> - Implemented a new function alloc_range() which allocates
>>    the requested power-of-two block comply with range limitations
>>
>> - Moved order computation and memory alignment logic from
>>    i915 driver to drm buddy
>>
>> v2:
>>    merged below changes to keep the build unbroken
>>     - drm_buddy_alloc_range() becomes obsolete and may be removed
>>     - enable ttm range allocation (fpfn / lpfn) support in i915 driver
>>     - apply enhanced drm_buddy_alloc() function to i915 driver
>>
>> v3(Matthew Auld):
>>    - Fix alignment issues and remove unnecessary list_empty check
>>    - add more validation checks for input arguments
>>    - make alloc_range() block allocations as bottom-up
>>    - optimize order computation logic
>>    - replace uint64_t with u64, which is preferred in the kernel
>>
>> v4(Matthew Auld):
>>    - keep drm_buddy_alloc_range() function implementation for generic
>>      actual range allocations
>>    - keep alloc_range() implementation for end bias allocations
>>
>> v5(Matthew Auld):
>>    - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
>>      as place->lpfn will currently always be zero for i915
>>
>> v6(Matthew Auld):
>>    - fixup potential uaf - If we are unlucky and can't allocate
>>      enough memory when splitting blocks, where we temporarily
>>      end up with the given block and its buddy on the respective
>>      free list, then we need to ensure we delete both blocks,
>>      and no just the buddy, before potentially freeing them
>>
>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> v7(Matthew Auld):
>>    - revert fixup potential uaf
>>    - keep __alloc_range() add node to the list logic same as
>>      drm_buddy_alloc_blocks() by having a temporary list variable
>>    - at drm_buddy_alloc_blocks() keep i915 range_overflows macro
>>      and add a new check for end variable
>>
>> v8:
>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c                   | 315 +++++++++++++-----
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
>>   include/drm/drm_buddy.h                       |  13 +-
>>   4 files changed, 280 insertions(+), 117 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index d60878bc9c20..cfc160a1ef1a 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_list);
>>   
>> -/**
>> - * drm_buddy_alloc_blocks - allocate power-of-two blocks
>> - *
>> - * @mm: DRM buddy manager to allocate from
>> - * @order: size of the allocation
>> - *
>> - * The order value here translates to:
>> - *
>> - * 0 = 2^0 * mm->chunk_size
>> - * 1 = 2^1 * mm->chunk_size
>> - * 2 = 2^2 * mm->chunk_size
>> - *
>> - * Returns:
>> - * allocated ptr to the &drm_buddy_block on success
>> - */
>> -struct drm_buddy_block *
>> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>> +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> +{
>> +	return s1 <= e2 && e1 >= s2;
>> +}
>> +
>> +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>> +{
>> +	return s1 <= s2 && e1 >= e2;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +alloc_range_bias(struct drm_buddy *mm,
>> +		 u64 start, u64 end,
>> +		 unsigned int order)
>> +{
>> +	struct drm_buddy_block *block;
>> +	struct drm_buddy_block *buddy;
>> +	LIST_HEAD(dfs);
>> +	int err;
>> +	int i;
>> +
>> +	end = end - 1;
>> +
>> +	for (i = 0; i < mm->n_roots; ++i)
>> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>> +
>> +	do {
>> +		u64 block_start;
>> +		u64 block_end;
>> +
>> +		block = list_first_entry_or_null(&dfs,
>> +						 struct drm_buddy_block,
>> +						 tmp_link);
>> +		if (!block)
>> +			break;
>> +
>> +		list_del(&block->tmp_link);
>> +
>> +		if (drm_buddy_block_order(block) < order)
>> +			continue;
>> +
>> +		block_start = drm_buddy_block_offset(block);
>> +		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
>> +
>> +		if (!overlaps(start, end, block_start, block_end))
>> +			continue;
>> +
>> +		if (drm_buddy_block_is_allocated(block))
>> +			continue;
>> +
>> +		if (contains(start, end, block_start, block_end) &&
>> +		    order == drm_buddy_block_order(block)) {
>> +			/*
>> +			 * Find the free block within the range.
>> +			 */
>> +			if (drm_buddy_block_is_free(block))
>> +				return block;
>> +
>> +			continue;
>> +		}
>> +
>> +		if (!drm_buddy_block_is_split(block)) {
>> +			err = split_block(mm, block);
>> +			if (unlikely(err))
>> +				goto err_undo;
>> +		}
>> +
>> +		list_add(&block->right->tmp_link, &dfs);
>> +		list_add(&block->left->tmp_link, &dfs);
>> +	} while (1);
>> +
>> +	return ERR_PTR(-ENOSPC);
>> +
>> +err_undo:
>> +	/*
>> +	 * We really don't want to leave around a bunch of split blocks, since
>> +	 * bigger is better, so make sure we merge everything back before we
>> +	 * free the allocated blocks.
>> +	 */
>> +	buddy = get_buddy(block);
>> +	if (buddy &&
>> +	    (drm_buddy_block_is_free(block) &&
>> +	     drm_buddy_block_is_free(buddy)))
>> +		__drm_buddy_free(mm, block);
>> +	return ERR_PTR(err);
>> +}
>> +
>> +static struct drm_buddy_block *
>> +alloc_from_freelist(struct drm_buddy *mm,
>> +		    unsigned int order,
>> +		    unsigned long flags)
>>   {
>>   	struct drm_buddy_block *block = NULL;
>>   	unsigned int i;
>> @@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>>   	while (i != order) {
>>   		err = split_block(mm, block);
>>   		if (unlikely(err))
>> -			goto out_free;
>> +			goto err_undo;
>>   
>> -		/* Go low */
>> -		block = block->left;
>> +		block = block->right;
>>   		i--;
>>   	}
>> -
>> -	mark_allocated(block);
>> -	mm->avail -= drm_buddy_block_size(mm, block);
>> -	kmemleak_update_trace(block);
>>   	return block;
>>   
>> -out_free:
>> +err_undo:
>>   	if (i != order)
>>   		__drm_buddy_free(mm, block);
>>   	return ERR_PTR(err);
>>   }
>> -EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>> -
>> -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> -{
>> -	return s1 <= e2 && e1 >= s2;
>> -}
>>   
>> -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>> -{
>> -	return s1 <= s2 && e1 >= e2;
>> -}
>> -
>> -/**
>> - * drm_buddy_alloc_range - allocate range
>> - *
>> - * @mm: DRM buddy manager to allocate from
>> - * @blocks: output list head to add allocated blocks
>> - * @start: start of the allowed range for this block
>> - * @size: size of the allocation
>> - *
>> - * Intended for pre-allocating portions of the address space, for example to
>> - * reserve a block for the initial framebuffer or similar, hence the expectation
>> - * here is that drm_buddy_alloc_blocks() is still the main vehicle for
>> - * allocations, so if that's not the case then the drm_mm range allocator is
>> - * probably a much better fit, and so you should probably go use that instead.
>> - *
>> - * Note that it's safe to chain together multiple alloc_ranges
>> - * with the same blocks list
>> - *
>> - * Returns:
>> - * 0 on success, error code on failure.
>> - */
>> -int drm_buddy_alloc_range(struct drm_buddy *mm,
>> -			  struct list_head *blocks,
>> -			  u64 start, u64 size)
>> +static int __alloc_range(struct drm_buddy *mm,
>> +			 struct list_head *dfs,
>> +			 u64 start, u64 size,
>> +			 struct list_head *blocks)
>>   {
>>   	struct drm_buddy_block *block;
>>   	struct drm_buddy_block *buddy;
>>   	LIST_HEAD(allocated);
>> -	LIST_HEAD(dfs);
>>   	u64 end;
>>   	int err;
>> -	int i;
>> -
>> -	if (size < mm->chunk_size)
>> -		return -EINVAL;
>> -
>> -	if (!IS_ALIGNED(size | start, mm->chunk_size))
>> -		return -EINVAL;
>> -
>> -	if (range_overflows(start, size, mm->size))
>> -		return -EINVAL;
>> -
>> -	for (i = 0; i < mm->n_roots; ++i)
>> -		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>>   
>>   	end = start + size - 1;
>>   
>> @@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>>   		u64 block_start;
>>   		u64 block_end;
>>   
>> -		block = list_first_entry_or_null(&dfs,
>> +		block = list_first_entry_or_null(dfs,
>>   						 struct drm_buddy_block,
>>   						 tmp_link);
>>   		if (!block)
>> @@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>>   				goto err_undo;
>>   		}
>>   
>> -		list_add(&block->right->tmp_link, &dfs);
>> -		list_add(&block->left->tmp_link, &dfs);
>> +		list_add(&block->right->tmp_link, dfs);
>> +		list_add(&block->left->tmp_link, dfs);
>>   	} while (1);
>>   
>>   	list_splice_tail(&allocated, blocks);
>> @@ -459,7 +484,143 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>>   	drm_buddy_free_list(mm, &allocated);
>>   	return err;
>>   }
>> -EXPORT_SYMBOL(drm_buddy_alloc_range);
>> +
>> +/**
>> + * __drm_buddy_alloc_range - actual range allocation
>> + *
>> + * @mm: DRM buddy manager to allocate from
>> + * @start: start of the allowed range for this block
>> + * @size: size of the allocation
>> + * @blocks: output list head to add allocated blocks
> 
> I guess no need to add kernel-doc for static functions.
> 
>> + *
>> + * Intended for pre-allocating portions of the address space, for example to
>> + * reserve a block for the initial framebuffer or similar
>> + *
>> + * Note that it's safe to chain together multiple alloc_ranges
>> + * with the same blocks list
>> + *
>> + * Returns:
>> + * 0 on success, error code on failure.
>> + */
>> +static int __drm_buddy_alloc_range(struct drm_buddy *mm,
>> +				   u64 start,
>> +				   u64 size,
>> +				   struct list_head *blocks)
>> +{
>> +	LIST_HEAD(dfs);
>> +	int i;
>> +
>> +	for (i = 0; i < mm->n_roots; ++i)
>> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>> +
>> +	return __alloc_range(mm, &dfs, start, size, blocks);
>> +}
>> +
>> +/**
>> + * drm_buddy_alloc_blocks - allocate power-of-two blocks
>> + *
>> + * @mm: DRM buddy manager to allocate from
>> + * @start: start of the allowed range for this block
>> + * @end: end of the allowed range for this block
>> + * @size: size of the allocation
>> + * @min_page_size: alignment of the allocation
>> + * @blocks: output list head to add allocated blocks
>> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>> + *
>> + * alloc_range_bias() called on range limitations, which traverses
>> + * the tree and returns the desired block.
>> + *
>> + * alloc_from_freelist() called when *no* range restrictions
>> + * are enforced, which picks the block from the freelist.
>> + *
>> + * blocks are allocated in order, the order value here translates to:
>> + *
>> + * 0 = 2^0 * mm->chunk_size
>> + * 1 = 2^1 * mm->chunk_size
>> + * 2 = 2^2 * mm->chunk_size
> 
> The order stuff here can be dropped I think, since this is now just 'size'.
> 
>> + *
>> + * Returns:
>> + * 0 on success, error code on failure.
>> + */
>> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> +			   u64 start, u64 end, u64 size,
>> +			   u64 min_page_size,
>> +			   struct list_head *blocks,
>> +			   unsigned long flags)
>> +{
>> +	struct drm_buddy_block *block = NULL;
>> +	unsigned int min_order, order;
>> +	unsigned long pages;
>> +	LIST_HEAD(allocated);
>> +	int err;
>> +
>> +	if (size < mm->chunk_size)
>> +		return -EINVAL;
>> +
>> +	if (min_page_size < mm->chunk_size)
>> +		return -EINVAL;
>> +
>> +	if (!is_power_of_2(min_page_size))
>> +		return -EINVAL;
>> +
>> +	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
>> +		return -EINVAL;
>> +
>> +	if (end > mm->size)
>> +		return -EINVAL;
>> +
>> +	if (range_overflows(start, size, mm->size))
>> +		return -EINVAL;
>> +
>> +	/* Actual range allocation */
>> +	if (start + size == end)
>> +		return __drm_buddy_alloc_range(mm, start, size, blocks);
>> +
>> +	pages = size >> ilog2(mm->chunk_size);
>> +	order = fls(pages) - 1;
>> +	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +
>> +	do {
>> +		order = min(order, (unsigned int)fls(pages) - 1);
>> +		BUG_ON(order > mm->max_order);
>> +		BUG_ON(order < min_order);
>> +
>> +		do {
>> +			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
> 
> Do we need an explicit flag for this? Can we not just check:
> 
> if (start || end < size)

amdgpu failed to load as some requirements are not met with the above
condition,

if we keep (start || end != size), amdgpu loads successfully.
> 
> We already implicitly determine the "actual range allocation" above. So 
> that would just leave the TODPOWN flag

ok, now we have only the TOPDOWN flag
#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 0)

> 
> Otherwise,
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> 
>> +				/* Allocate traversing within the range */
>> +				block = alloc_range_bias(mm, start, end, order);
>> +			else
>> +				/* Allocate from freelist */
>> +				block = alloc_from_freelist(mm, order, flags);
>> +
>> +			if (!IS_ERR(block))
>> +				break;
>> +
>> +			if (order-- == min_order) {
>> +				err = -ENOSPC;
>> +				goto err_free;
>> +			}
>> +		} while (1);
>> +
>> +		mark_allocated(block);
>> +		mm->avail -= drm_buddy_block_size(mm, block);
>> +		kmemleak_update_trace(block);
>> +		list_add_tail(&block->link, &allocated);
>> +
>> +		pages -= BIT(order);
>> +
>> +		if (!pages)
>> +			break;
>> +	} while (1);
>> +
>> +	list_splice_tail(&allocated, blocks);
>> +	return 0;
>> +
>> +err_free:
>> +	drm_buddy_free_list(mm, &allocated);
>> +	return err;
>> +}
>> +EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>>   
>>   /**
>>    * drm_buddy_block_print - print block information
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index 247714bab044..a328a38fab07 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>   	struct i915_ttm_buddy_resource *bman_res;
>>   	struct drm_buddy *mm = &bman->mm;
>> -	unsigned long n_pages;
>> -	unsigned int min_order;
>> +	unsigned long n_pages, lpfn;
>>   	u64 min_page_size;
>>   	u64 size;
>>   	int err;
>>   
>> -	GEM_BUG_ON(place->fpfn || place->lpfn);
>> +	lpfn = place->lpfn;
>> +	if (!lpfn)
>> +		lpfn = man->size;
>>   
>>   	bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
>>   	if (!bman_res)
>> @@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   	INIT_LIST_HEAD(&bman_res->blocks);
>>   	bman_res->mm = mm;
>>   
>> +	if (place->fpfn || lpfn != man->size)
>> +		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>>   	GEM_BUG_ON(!bman_res->base.num_pages);
>>   	size = bman_res->base.num_pages << PAGE_SHIFT;
>>   
>> @@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   		min_page_size = bo->page_alignment << PAGE_SHIFT;
>>   
>>   	GEM_BUG_ON(min_page_size < mm->chunk_size);
>> -	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +
>>   	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +		unsigned long pages;
>> +
>>   		size = roundup_pow_of_two(size);
>> -		min_order = ilog2(size) - ilog2(mm->chunk_size);
>> +		min_page_size = size;
>> +
>> +		pages = size >> ilog2(mm->chunk_size);
>> +		if (pages > lpfn)
>> +			lpfn = pages;
>>   	}
>>   
>>   	if (size > mm->size) {
>> @@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   
>>   	n_pages = size >> ilog2(mm->chunk_size);
>>   
>> -	do {
>> -		struct drm_buddy_block *block;
>> -		unsigned int order;
>> -
>> -		order = fls(n_pages) - 1;
>> -		GEM_BUG_ON(order > mm->max_order);
>> -		GEM_BUG_ON(order < min_order);
>> -
>> -		do {
>> -			mutex_lock(&bman->lock);
>> -			block = drm_buddy_alloc_blocks(mm, order);
>> -			mutex_unlock(&bman->lock);
>> -			if (!IS_ERR(block))
>> -				break;
>> -
>> -			if (order-- == min_order) {
>> -				err = -ENOSPC;
>> -				goto err_free_blocks;
>> -			}
>> -		} while (1);
>> -
>> -		n_pages -= BIT(order);
>> -
>> -		list_add_tail(&block->link, &bman_res->blocks);
>> -
>> -		if (!n_pages)
>> -			break;
>> -	} while (1);
>> +	mutex_lock(&bman->lock);
>> +	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>> +				     (u64)lpfn << PAGE_SHIFT,
>> +				     (u64)n_pages << PAGE_SHIFT,
>> +				     min_page_size,
>> +				     &bman_res->blocks,
>> +				     bman_res->flags);
>> +	mutex_unlock(&bman->lock);
>> +	if (unlikely(err))
>> +		goto err_free_blocks;
>>   
>>   	*res = &bman_res->base;
>>   	return 0;
>> @@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
>>   {
>>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>   	struct drm_buddy *mm = &bman->mm;
>> +	unsigned long flags = 0;
>>   	int ret;
>>   
>> +	flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>>   	mutex_lock(&bman->lock);
>> -	ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
>> +	ret = drm_buddy_alloc_blocks(mm, start,
>> +				     start + size,
>> +				     size, mm->chunk_size,
>> +				     &bman->reserved,
>> +				     flags);
>>   	mutex_unlock(&bman->lock);
>>   
>>   	return ret;
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> index 312077941411..72c90b432e87 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> @@ -20,6 +20,7 @@ struct drm_buddy;
>>    *
>>    * @base: struct ttm_resource base class we extend
>>    * @blocks: the list of struct i915_buddy_block for this resource/allocation
>> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>>    * @mm: the struct i915_buddy_mm for this resource
>>    *
>>    * Extends the struct ttm_resource to manage an address space allocation with
>> @@ -28,6 +29,7 @@ struct drm_buddy;
>>   struct i915_ttm_buddy_resource {
>>   	struct ttm_resource base;
>>   	struct list_head blocks;
>> +	unsigned long flags;
>>   	struct drm_buddy *mm;
>>   };
>>   
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index f524db152413..54f25a372f27 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -22,6 +22,8 @@
>>   	start__ >= max__ || size__ > max__ - start__; \
>>   })
>>   
>> +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
>> +
>>   struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>> @@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
>>   
>>   void drm_buddy_fini(struct drm_buddy *mm);
>>   
>> -struct drm_buddy_block *
>> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
>> -
>> -int drm_buddy_alloc_range(struct drm_buddy *mm,
>> -			  struct list_head *blocks,
>> -			  u64 start, u64 size);
>> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> +			   u64 start, u64 end, u64 size,
>> +			   u64 min_page_size,
>> +			   struct list_head *blocks,
>> +			   unsigned long flags);
>>   
>>   void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
>>   
>>
>> base-commit: 627e2885b51ab503a98aa89f0a0bd68416c731fc

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH v11 1/5] drm: improve drm_buddy_alloc function
@ 2022-02-11  9:27     ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-02-11  9:27 UTC (permalink / raw)
  To: Matthew Auld, dri-devel, intel-gfx, amd-gfx
  Cc: alexander.deucher, tzimmermann, christian.koenig



On 10/02/22 9:40 pm, Matthew Auld wrote:
> On 27/01/2022 14:11, Arunpravin wrote:
>> - Make drm_buddy_alloc a single function to handle
>>    range allocation and non-range allocation demands
>>
>> - Implemented a new function alloc_range() which allocates
>>    the requested power-of-two block comply with range limitations
>>
>> - Moved order computation and memory alignment logic from
>>    i915 driver to drm buddy
>>
>> v2:
>>    merged below changes to keep the build unbroken
>>     - drm_buddy_alloc_range() becomes obsolete and may be removed
>>     - enable ttm range allocation (fpfn / lpfn) support in i915 driver
>>     - apply enhanced drm_buddy_alloc() function to i915 driver
>>
>> v3(Matthew Auld):
>>    - Fix alignment issues and remove unnecessary list_empty check
>>    - add more validation checks for input arguments
>>    - make alloc_range() block allocations as bottom-up
>>    - optimize order computation logic
>>    - replace uint64_t with u64, which is preferred in the kernel
>>
>> v4(Matthew Auld):
>>    - keep drm_buddy_alloc_range() function implementation for generic
>>      actual range allocations
>>    - keep alloc_range() implementation for end bias allocations
>>
>> v5(Matthew Auld):
>>    - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
>>      as place->lpfn will currently always be zero for i915
>>
>> v6(Matthew Auld):
>>    - fixup potential uaf - If we are unlucky and can't allocate
>>      enough memory when splitting blocks, where we temporarily
>>      end up with the given block and its buddy on the respective
>>      free list, then we need to ensure we delete both blocks,
>>      and no just the buddy, before potentially freeing them
>>
>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> v7(Matthew Auld):
>>    - revert fixup potential uaf
>>    - keep __alloc_range() add node to the list logic same as
>>      drm_buddy_alloc_blocks() by having a temporary list variable
>>    - at drm_buddy_alloc_blocks() keep i915 range_overflows macro
>>      and add a new check for end variable
>>
>> v8:
>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c                   | 315 +++++++++++++-----
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
>>   include/drm/drm_buddy.h                       |  13 +-
>>   4 files changed, 280 insertions(+), 117 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index d60878bc9c20..cfc160a1ef1a 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_list);
>>   
>> -/**
>> - * drm_buddy_alloc_blocks - allocate power-of-two blocks
>> - *
>> - * @mm: DRM buddy manager to allocate from
>> - * @order: size of the allocation
>> - *
>> - * The order value here translates to:
>> - *
>> - * 0 = 2^0 * mm->chunk_size
>> - * 1 = 2^1 * mm->chunk_size
>> - * 2 = 2^2 * mm->chunk_size
>> - *
>> - * Returns:
>> - * allocated ptr to the &drm_buddy_block on success
>> - */
>> -struct drm_buddy_block *
>> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>> +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> +{
>> +	return s1 <= e2 && e1 >= s2;
>> +}
>> +
>> +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>> +{
>> +	return s1 <= s2 && e1 >= e2;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +alloc_range_bias(struct drm_buddy *mm,
>> +		 u64 start, u64 end,
>> +		 unsigned int order)
>> +{
>> +	struct drm_buddy_block *block;
>> +	struct drm_buddy_block *buddy;
>> +	LIST_HEAD(dfs);
>> +	int err;
>> +	int i;
>> +
>> +	end = end - 1;
>> +
>> +	for (i = 0; i < mm->n_roots; ++i)
>> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>> +
>> +	do {
>> +		u64 block_start;
>> +		u64 block_end;
>> +
>> +		block = list_first_entry_or_null(&dfs,
>> +						 struct drm_buddy_block,
>> +						 tmp_link);
>> +		if (!block)
>> +			break;
>> +
>> +		list_del(&block->tmp_link);
>> +
>> +		if (drm_buddy_block_order(block) < order)
>> +			continue;
>> +
>> +		block_start = drm_buddy_block_offset(block);
>> +		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
>> +
>> +		if (!overlaps(start, end, block_start, block_end))
>> +			continue;
>> +
>> +		if (drm_buddy_block_is_allocated(block))
>> +			continue;
>> +
>> +		if (contains(start, end, block_start, block_end) &&
>> +		    order == drm_buddy_block_order(block)) {
>> +			/*
>> +			 * Find the free block within the range.
>> +			 */
>> +			if (drm_buddy_block_is_free(block))
>> +				return block;
>> +
>> +			continue;
>> +		}
>> +
>> +		if (!drm_buddy_block_is_split(block)) {
>> +			err = split_block(mm, block);
>> +			if (unlikely(err))
>> +				goto err_undo;
>> +		}
>> +
>> +		list_add(&block->right->tmp_link, &dfs);
>> +		list_add(&block->left->tmp_link, &dfs);
>> +	} while (1);
>> +
>> +	return ERR_PTR(-ENOSPC);
>> +
>> +err_undo:
>> +	/*
>> +	 * We really don't want to leave around a bunch of split blocks, since
>> +	 * bigger is better, so make sure we merge everything back before we
>> +	 * free the allocated blocks.
>> +	 */
>> +	buddy = get_buddy(block);
>> +	if (buddy &&
>> +	    (drm_buddy_block_is_free(block) &&
>> +	     drm_buddy_block_is_free(buddy)))
>> +		__drm_buddy_free(mm, block);
>> +	return ERR_PTR(err);
>> +}
>> +
>> +static struct drm_buddy_block *
>> +alloc_from_freelist(struct drm_buddy *mm,
>> +		    unsigned int order,
>> +		    unsigned long flags)
>>   {
>>   	struct drm_buddy_block *block = NULL;
>>   	unsigned int i;
>> @@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>>   	while (i != order) {
>>   		err = split_block(mm, block);
>>   		if (unlikely(err))
>> -			goto out_free;
>> +			goto err_undo;
>>   
>> -		/* Go low */
>> -		block = block->left;
>> +		block = block->right;
>>   		i--;
>>   	}
>> -
>> -	mark_allocated(block);
>> -	mm->avail -= drm_buddy_block_size(mm, block);
>> -	kmemleak_update_trace(block);
>>   	return block;
>>   
>> -out_free:
>> +err_undo:
>>   	if (i != order)
>>   		__drm_buddy_free(mm, block);
>>   	return ERR_PTR(err);
>>   }
>> -EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>> -
>> -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> -{
>> -	return s1 <= e2 && e1 >= s2;
>> -}
>>   
>> -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>> -{
>> -	return s1 <= s2 && e1 >= e2;
>> -}
>> -
>> -/**
>> - * drm_buddy_alloc_range - allocate range
>> - *
>> - * @mm: DRM buddy manager to allocate from
>> - * @blocks: output list head to add allocated blocks
>> - * @start: start of the allowed range for this block
>> - * @size: size of the allocation
>> - *
>> - * Intended for pre-allocating portions of the address space, for example to
>> - * reserve a block for the initial framebuffer or similar, hence the expectation
>> - * here is that drm_buddy_alloc_blocks() is still the main vehicle for
>> - * allocations, so if that's not the case then the drm_mm range allocator is
>> - * probably a much better fit, and so you should probably go use that instead.
>> - *
>> - * Note that it's safe to chain together multiple alloc_ranges
>> - * with the same blocks list
>> - *
>> - * Returns:
>> - * 0 on success, error code on failure.
>> - */
>> -int drm_buddy_alloc_range(struct drm_buddy *mm,
>> -			  struct list_head *blocks,
>> -			  u64 start, u64 size)
>> +static int __alloc_range(struct drm_buddy *mm,
>> +			 struct list_head *dfs,
>> +			 u64 start, u64 size,
>> +			 struct list_head *blocks)
>>   {
>>   	struct drm_buddy_block *block;
>>   	struct drm_buddy_block *buddy;
>>   	LIST_HEAD(allocated);
>> -	LIST_HEAD(dfs);
>>   	u64 end;
>>   	int err;
>> -	int i;
>> -
>> -	if (size < mm->chunk_size)
>> -		return -EINVAL;
>> -
>> -	if (!IS_ALIGNED(size | start, mm->chunk_size))
>> -		return -EINVAL;
>> -
>> -	if (range_overflows(start, size, mm->size))
>> -		return -EINVAL;
>> -
>> -	for (i = 0; i < mm->n_roots; ++i)
>> -		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>>   
>>   	end = start + size - 1;
>>   
>> @@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>>   		u64 block_start;
>>   		u64 block_end;
>>   
>> -		block = list_first_entry_or_null(&dfs,
>> +		block = list_first_entry_or_null(dfs,
>>   						 struct drm_buddy_block,
>>   						 tmp_link);
>>   		if (!block)
>> @@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>>   				goto err_undo;
>>   		}
>>   
>> -		list_add(&block->right->tmp_link, &dfs);
>> -		list_add(&block->left->tmp_link, &dfs);
>> +		list_add(&block->right->tmp_link, dfs);
>> +		list_add(&block->left->tmp_link, dfs);
>>   	} while (1);
>>   
>>   	list_splice_tail(&allocated, blocks);
>> @@ -459,7 +484,143 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>>   	drm_buddy_free_list(mm, &allocated);
>>   	return err;
>>   }
>> -EXPORT_SYMBOL(drm_buddy_alloc_range);
>> +
>> +/**
>> + * __drm_buddy_alloc_range - actual range allocation
>> + *
>> + * @mm: DRM buddy manager to allocate from
>> + * @start: start of the allowed range for this block
>> + * @size: size of the allocation
>> + * @blocks: output list head to add allocated blocks
> 
> I guess no need to add kernel-doc for static functions.
> 
>> + *
>> + * Intended for pre-allocating portions of the address space, for example to
>> + * reserve a block for the initial framebuffer or similar
>> + *
>> + * Note that it's safe to chain together multiple alloc_ranges
>> + * with the same blocks list
>> + *
>> + * Returns:
>> + * 0 on success, error code on failure.
>> + */
>> +static int __drm_buddy_alloc_range(struct drm_buddy *mm,
>> +				   u64 start,
>> +				   u64 size,
>> +				   struct list_head *blocks)
>> +{
>> +	LIST_HEAD(dfs);
>> +	int i;
>> +
>> +	for (i = 0; i < mm->n_roots; ++i)
>> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>> +
>> +	return __alloc_range(mm, &dfs, start, size, blocks);
>> +}
>> +
>> +/**
>> + * drm_buddy_alloc_blocks - allocate power-of-two blocks
>> + *
>> + * @mm: DRM buddy manager to allocate from
>> + * @start: start of the allowed range for this block
>> + * @end: end of the allowed range for this block
>> + * @size: size of the allocation
>> + * @min_page_size: alignment of the allocation
>> + * @blocks: output list head to add allocated blocks
>> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>> + *
>> + * alloc_range_bias() called on range limitations, which traverses
>> + * the tree and returns the desired block.
>> + *
>> + * alloc_from_freelist() called when *no* range restrictions
>> + * are enforced, which picks the block from the freelist.
>> + *
>> + * blocks are allocated in order, the order value here translates to:
>> + *
>> + * 0 = 2^0 * mm->chunk_size
>> + * 1 = 2^1 * mm->chunk_size
>> + * 2 = 2^2 * mm->chunk_size
> 
> The order stuff here can be dropped I think, since this is now just 'size'.
> 
>> + *
>> + * Returns:
>> + * 0 on success, error code on failure.
>> + */
>> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> +			   u64 start, u64 end, u64 size,
>> +			   u64 min_page_size,
>> +			   struct list_head *blocks,
>> +			   unsigned long flags)
>> +{
>> +	struct drm_buddy_block *block = NULL;
>> +	unsigned int min_order, order;
>> +	unsigned long pages;
>> +	LIST_HEAD(allocated);
>> +	int err;
>> +
>> +	if (size < mm->chunk_size)
>> +		return -EINVAL;
>> +
>> +	if (min_page_size < mm->chunk_size)
>> +		return -EINVAL;
>> +
>> +	if (!is_power_of_2(min_page_size))
>> +		return -EINVAL;
>> +
>> +	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
>> +		return -EINVAL;
>> +
>> +	if (end > mm->size)
>> +		return -EINVAL;
>> +
>> +	if (range_overflows(start, size, mm->size))
>> +		return -EINVAL;
>> +
>> +	/* Actual range allocation */
>> +	if (start + size == end)
>> +		return __drm_buddy_alloc_range(mm, start, size, blocks);
>> +
>> +	pages = size >> ilog2(mm->chunk_size);
>> +	order = fls(pages) - 1;
>> +	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +
>> +	do {
>> +		order = min(order, (unsigned int)fls(pages) - 1);
>> +		BUG_ON(order > mm->max_order);
>> +		BUG_ON(order < min_order);
>> +
>> +		do {
>> +			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
> 
> Do we need an explicit flag for this? Can we not just check:
> 
> if (start || end < size)

amdgpu failed to load as some requirements are not met with the above
condition,

if we keep (start || end != size), amdgpu loads successfully.
> 
> We already implicitly determine the "actual range allocation" above. So 
> that would just leave the TODPOWN flag

ok, now we have only the TOPDOWN flag
#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 0)

> 
> Otherwise,
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> 
>> +				/* Allocate traversing within the range */
>> +				block = alloc_range_bias(mm, start, end, order);
>> +			else
>> +				/* Allocate from freelist */
>> +				block = alloc_from_freelist(mm, order, flags);
>> +
>> +			if (!IS_ERR(block))
>> +				break;
>> +
>> +			if (order-- == min_order) {
>> +				err = -ENOSPC;
>> +				goto err_free;
>> +			}
>> +		} while (1);
>> +
>> +		mark_allocated(block);
>> +		mm->avail -= drm_buddy_block_size(mm, block);
>> +		kmemleak_update_trace(block);
>> +		list_add_tail(&block->link, &allocated);
>> +
>> +		pages -= BIT(order);
>> +
>> +		if (!pages)
>> +			break;
>> +	} while (1);
>> +
>> +	list_splice_tail(&allocated, blocks);
>> +	return 0;
>> +
>> +err_free:
>> +	drm_buddy_free_list(mm, &allocated);
>> +	return err;
>> +}
>> +EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>>   
>>   /**
>>    * drm_buddy_block_print - print block information
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index 247714bab044..a328a38fab07 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>   	struct i915_ttm_buddy_resource *bman_res;
>>   	struct drm_buddy *mm = &bman->mm;
>> -	unsigned long n_pages;
>> -	unsigned int min_order;
>> +	unsigned long n_pages, lpfn;
>>   	u64 min_page_size;
>>   	u64 size;
>>   	int err;
>>   
>> -	GEM_BUG_ON(place->fpfn || place->lpfn);
>> +	lpfn = place->lpfn;
>> +	if (!lpfn)
>> +		lpfn = man->size;
>>   
>>   	bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
>>   	if (!bman_res)
>> @@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   	INIT_LIST_HEAD(&bman_res->blocks);
>>   	bman_res->mm = mm;
>>   
>> +	if (place->fpfn || lpfn != man->size)
>> +		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>>   	GEM_BUG_ON(!bman_res->base.num_pages);
>>   	size = bman_res->base.num_pages << PAGE_SHIFT;
>>   
>> @@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   		min_page_size = bo->page_alignment << PAGE_SHIFT;
>>   
>>   	GEM_BUG_ON(min_page_size < mm->chunk_size);
>> -	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +
>>   	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +		unsigned long pages;
>> +
>>   		size = roundup_pow_of_two(size);
>> -		min_order = ilog2(size) - ilog2(mm->chunk_size);
>> +		min_page_size = size;
>> +
>> +		pages = size >> ilog2(mm->chunk_size);
>> +		if (pages > lpfn)
>> +			lpfn = pages;
>>   	}
>>   
>>   	if (size > mm->size) {
>> @@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   
>>   	n_pages = size >> ilog2(mm->chunk_size);
>>   
>> -	do {
>> -		struct drm_buddy_block *block;
>> -		unsigned int order;
>> -
>> -		order = fls(n_pages) - 1;
>> -		GEM_BUG_ON(order > mm->max_order);
>> -		GEM_BUG_ON(order < min_order);
>> -
>> -		do {
>> -			mutex_lock(&bman->lock);
>> -			block = drm_buddy_alloc_blocks(mm, order);
>> -			mutex_unlock(&bman->lock);
>> -			if (!IS_ERR(block))
>> -				break;
>> -
>> -			if (order-- == min_order) {
>> -				err = -ENOSPC;
>> -				goto err_free_blocks;
>> -			}
>> -		} while (1);
>> -
>> -		n_pages -= BIT(order);
>> -
>> -		list_add_tail(&block->link, &bman_res->blocks);
>> -
>> -		if (!n_pages)
>> -			break;
>> -	} while (1);
>> +	mutex_lock(&bman->lock);
>> +	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>> +				     (u64)lpfn << PAGE_SHIFT,
>> +				     (u64)n_pages << PAGE_SHIFT,
>> +				     min_page_size,
>> +				     &bman_res->blocks,
>> +				     bman_res->flags);
>> +	mutex_unlock(&bman->lock);
>> +	if (unlikely(err))
>> +		goto err_free_blocks;
>>   
>>   	*res = &bman_res->base;
>>   	return 0;
>> @@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
>>   {
>>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>   	struct drm_buddy *mm = &bman->mm;
>> +	unsigned long flags = 0;
>>   	int ret;
>>   
>> +	flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>>   	mutex_lock(&bman->lock);
>> -	ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
>> +	ret = drm_buddy_alloc_blocks(mm, start,
>> +				     start + size,
>> +				     size, mm->chunk_size,
>> +				     &bman->reserved,
>> +				     flags);
>>   	mutex_unlock(&bman->lock);
>>   
>>   	return ret;
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> index 312077941411..72c90b432e87 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> @@ -20,6 +20,7 @@ struct drm_buddy;
>>    *
>>    * @base: struct ttm_resource base class we extend
>>    * @blocks: the list of struct i915_buddy_block for this resource/allocation
>> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>>    * @mm: the struct i915_buddy_mm for this resource
>>    *
>>    * Extends the struct ttm_resource to manage an address space allocation with
>> @@ -28,6 +29,7 @@ struct drm_buddy;
>>   struct i915_ttm_buddy_resource {
>>   	struct ttm_resource base;
>>   	struct list_head blocks;
>> +	unsigned long flags;
>>   	struct drm_buddy *mm;
>>   };
>>   
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index f524db152413..54f25a372f27 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -22,6 +22,8 @@
>>   	start__ >= max__ || size__ > max__ - start__; \
>>   })
>>   
>> +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
>> +
>>   struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>> @@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
>>   
>>   void drm_buddy_fini(struct drm_buddy *mm);
>>   
>> -struct drm_buddy_block *
>> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
>> -
>> -int drm_buddy_alloc_range(struct drm_buddy *mm,
>> -			  struct list_head *blocks,
>> -			  u64 start, u64 size);
>> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> +			   u64 start, u64 end, u64 size,
>> +			   u64 min_page_size,
>> +			   struct list_head *blocks,
>> +			   unsigned long flags);
>>   
>>   void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
>>   
>>
>> base-commit: 627e2885b51ab503a98aa89f0a0bd68416c731fc

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH v11 1/5] drm: improve drm_buddy_alloc function
@ 2022-02-11  9:27     ` Arunpravin
  0 siblings, 0 replies; 34+ messages in thread
From: Arunpravin @ 2022-02-11  9:27 UTC (permalink / raw)
  To: Matthew Auld, dri-devel, intel-gfx, amd-gfx
  Cc: alexander.deucher, tzimmermann, jani.nikula, christian.koenig, daniel



On 10/02/22 9:40 pm, Matthew Auld wrote:
> On 27/01/2022 14:11, Arunpravin wrote:
>> - Make drm_buddy_alloc a single function to handle
>>    range allocation and non-range allocation demands
>>
>> - Implemented a new function alloc_range() which allocates
>>    the requested power-of-two block comply with range limitations
>>
>> - Moved order computation and memory alignment logic from
>>    i915 driver to drm buddy
>>
>> v2:
>>    merged below changes to keep the build unbroken
>>     - drm_buddy_alloc_range() becomes obsolete and may be removed
>>     - enable ttm range allocation (fpfn / lpfn) support in i915 driver
>>     - apply enhanced drm_buddy_alloc() function to i915 driver
>>
>> v3(Matthew Auld):
>>    - Fix alignment issues and remove unnecessary list_empty check
>>    - add more validation checks for input arguments
>>    - make alloc_range() block allocations as bottom-up
>>    - optimize order computation logic
>>    - replace uint64_t with u64, which is preferred in the kernel
>>
>> v4(Matthew Auld):
>>    - keep drm_buddy_alloc_range() function implementation for generic
>>      actual range allocations
>>    - keep alloc_range() implementation for end bias allocations
>>
>> v5(Matthew Auld):
>>    - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
>>      as place->lpfn will currently always be zero for i915
>>
>> v6(Matthew Auld):
>>    - fixup potential uaf - If we are unlucky and can't allocate
>>      enough memory when splitting blocks, where we temporarily
>>      end up with the given block and its buddy on the respective
>>      free list, then we need to ensure we delete both blocks,
>>      and no just the buddy, before potentially freeing them
>>
>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> v7(Matthew Auld):
>>    - revert fixup potential uaf
>>    - keep __alloc_range() add node to the list logic same as
>>      drm_buddy_alloc_blocks() by having a temporary list variable
>>    - at drm_buddy_alloc_blocks() keep i915 range_overflows macro
>>      and add a new check for end variable
>>
>> v8:
>>    - fix warnings reported by kernel test robot <lkp@intel.com>
>>
>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c                   | 315 +++++++++++++-----
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
>>   include/drm/drm_buddy.h                       |  13 +-
>>   4 files changed, 280 insertions(+), 117 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index d60878bc9c20..cfc160a1ef1a 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_list);
>>   
>> -/**
>> - * drm_buddy_alloc_blocks - allocate power-of-two blocks
>> - *
>> - * @mm: DRM buddy manager to allocate from
>> - * @order: size of the allocation
>> - *
>> - * The order value here translates to:
>> - *
>> - * 0 = 2^0 * mm->chunk_size
>> - * 1 = 2^1 * mm->chunk_size
>> - * 2 = 2^2 * mm->chunk_size
>> - *
>> - * Returns:
>> - * allocated ptr to the &drm_buddy_block on success
>> - */
>> -struct drm_buddy_block *
>> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>> +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> +{
>> +	return s1 <= e2 && e1 >= s2;
>> +}
>> +
>> +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>> +{
>> +	return s1 <= s2 && e1 >= e2;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +alloc_range_bias(struct drm_buddy *mm,
>> +		 u64 start, u64 end,
>> +		 unsigned int order)
>> +{
>> +	struct drm_buddy_block *block;
>> +	struct drm_buddy_block *buddy;
>> +	LIST_HEAD(dfs);
>> +	int err;
>> +	int i;
>> +
>> +	end = end - 1;
>> +
>> +	for (i = 0; i < mm->n_roots; ++i)
>> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>> +
>> +	do {
>> +		u64 block_start;
>> +		u64 block_end;
>> +
>> +		block = list_first_entry_or_null(&dfs,
>> +						 struct drm_buddy_block,
>> +						 tmp_link);
>> +		if (!block)
>> +			break;
>> +
>> +		list_del(&block->tmp_link);
>> +
>> +		if (drm_buddy_block_order(block) < order)
>> +			continue;
>> +
>> +		block_start = drm_buddy_block_offset(block);
>> +		block_end = block_start + drm_buddy_block_size(mm, block) - 1;
>> +
>> +		if (!overlaps(start, end, block_start, block_end))
>> +			continue;
>> +
>> +		if (drm_buddy_block_is_allocated(block))
>> +			continue;
>> +
>> +		if (contains(start, end, block_start, block_end) &&
>> +		    order == drm_buddy_block_order(block)) {
>> +			/*
>> +			 * Find the free block within the range.
>> +			 */
>> +			if (drm_buddy_block_is_free(block))
>> +				return block;
>> +
>> +			continue;
>> +		}
>> +
>> +		if (!drm_buddy_block_is_split(block)) {
>> +			err = split_block(mm, block);
>> +			if (unlikely(err))
>> +				goto err_undo;
>> +		}
>> +
>> +		list_add(&block->right->tmp_link, &dfs);
>> +		list_add(&block->left->tmp_link, &dfs);
>> +	} while (1);
>> +
>> +	return ERR_PTR(-ENOSPC);
>> +
>> +err_undo:
>> +	/*
>> +	 * We really don't want to leave around a bunch of split blocks, since
>> +	 * bigger is better, so make sure we merge everything back before we
>> +	 * free the allocated blocks.
>> +	 */
>> +	buddy = get_buddy(block);
>> +	if (buddy &&
>> +	    (drm_buddy_block_is_free(block) &&
>> +	     drm_buddy_block_is_free(buddy)))
>> +		__drm_buddy_free(mm, block);
>> +	return ERR_PTR(err);
>> +}
>> +
>> +static struct drm_buddy_block *
>> +alloc_from_freelist(struct drm_buddy *mm,
>> +		    unsigned int order,
>> +		    unsigned long flags)
>>   {
>>   	struct drm_buddy_block *block = NULL;
>>   	unsigned int i;
>> @@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>>   	while (i != order) {
>>   		err = split_block(mm, block);
>>   		if (unlikely(err))
>> -			goto out_free;
>> +			goto err_undo;
>>   
>> -		/* Go low */
>> -		block = block->left;
>> +		block = block->right;
>>   		i--;
>>   	}
>> -
>> -	mark_allocated(block);
>> -	mm->avail -= drm_buddy_block_size(mm, block);
>> -	kmemleak_update_trace(block);
>>   	return block;
>>   
>> -out_free:
>> +err_undo:
>>   	if (i != order)
>>   		__drm_buddy_free(mm, block);
>>   	return ERR_PTR(err);
>>   }
>> -EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>> -
>> -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> -{
>> -	return s1 <= e2 && e1 >= s2;
>> -}
>>   
>> -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>> -{
>> -	return s1 <= s2 && e1 >= e2;
>> -}
>> -
>> -/**
>> - * drm_buddy_alloc_range - allocate range
>> - *
>> - * @mm: DRM buddy manager to allocate from
>> - * @blocks: output list head to add allocated blocks
>> - * @start: start of the allowed range for this block
>> - * @size: size of the allocation
>> - *
>> - * Intended for pre-allocating portions of the address space, for example to
>> - * reserve a block for the initial framebuffer or similar, hence the expectation
>> - * here is that drm_buddy_alloc_blocks() is still the main vehicle for
>> - * allocations, so if that's not the case then the drm_mm range allocator is
>> - * probably a much better fit, and so you should probably go use that instead.
>> - *
>> - * Note that it's safe to chain together multiple alloc_ranges
>> - * with the same blocks list
>> - *
>> - * Returns:
>> - * 0 on success, error code on failure.
>> - */
>> -int drm_buddy_alloc_range(struct drm_buddy *mm,
>> -			  struct list_head *blocks,
>> -			  u64 start, u64 size)
>> +static int __alloc_range(struct drm_buddy *mm,
>> +			 struct list_head *dfs,
>> +			 u64 start, u64 size,
>> +			 struct list_head *blocks)
>>   {
>>   	struct drm_buddy_block *block;
>>   	struct drm_buddy_block *buddy;
>>   	LIST_HEAD(allocated);
>> -	LIST_HEAD(dfs);
>>   	u64 end;
>>   	int err;
>> -	int i;
>> -
>> -	if (size < mm->chunk_size)
>> -		return -EINVAL;
>> -
>> -	if (!IS_ALIGNED(size | start, mm->chunk_size))
>> -		return -EINVAL;
>> -
>> -	if (range_overflows(start, size, mm->size))
>> -		return -EINVAL;
>> -
>> -	for (i = 0; i < mm->n_roots; ++i)
>> -		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>>   
>>   	end = start + size - 1;
>>   
>> @@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>>   		u64 block_start;
>>   		u64 block_end;
>>   
>> -		block = list_first_entry_or_null(&dfs,
>> +		block = list_first_entry_or_null(dfs,
>>   						 struct drm_buddy_block,
>>   						 tmp_link);
>>   		if (!block)
>> @@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>>   				goto err_undo;
>>   		}
>>   
>> -		list_add(&block->right->tmp_link, &dfs);
>> -		list_add(&block->left->tmp_link, &dfs);
>> +		list_add(&block->right->tmp_link, dfs);
>> +		list_add(&block->left->tmp_link, dfs);
>>   	} while (1);
>>   
>>   	list_splice_tail(&allocated, blocks);
>> @@ -459,7 +484,143 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>>   	drm_buddy_free_list(mm, &allocated);
>>   	return err;
>>   }
>> -EXPORT_SYMBOL(drm_buddy_alloc_range);
>> +
>> +/**
>> + * __drm_buddy_alloc_range - actual range allocation
>> + *
>> + * @mm: DRM buddy manager to allocate from
>> + * @start: start of the allowed range for this block
>> + * @size: size of the allocation
>> + * @blocks: output list head to add allocated blocks
> 
> I guess no need to add kernel-doc for static functions.
> 
>> + *
>> + * Intended for pre-allocating portions of the address space, for example to
>> + * reserve a block for the initial framebuffer or similar
>> + *
>> + * Note that it's safe to chain together multiple alloc_ranges
>> + * with the same blocks list
>> + *
>> + * Returns:
>> + * 0 on success, error code on failure.
>> + */
>> +static int __drm_buddy_alloc_range(struct drm_buddy *mm,
>> +				   u64 start,
>> +				   u64 size,
>> +				   struct list_head *blocks)
>> +{
>> +	LIST_HEAD(dfs);
>> +	int i;
>> +
>> +	for (i = 0; i < mm->n_roots; ++i)
>> +		list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>> +
>> +	return __alloc_range(mm, &dfs, start, size, blocks);
>> +}
>> +
>> +/**
>> + * drm_buddy_alloc_blocks - allocate power-of-two blocks
>> + *
>> + * @mm: DRM buddy manager to allocate from
>> + * @start: start of the allowed range for this block
>> + * @end: end of the allowed range for this block
>> + * @size: size of the allocation
>> + * @min_page_size: alignment of the allocation
>> + * @blocks: output list head to add allocated blocks
>> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>> + *
>> + * alloc_range_bias() called on range limitations, which traverses
>> + * the tree and returns the desired block.
>> + *
>> + * alloc_from_freelist() called when *no* range restrictions
>> + * are enforced, which picks the block from the freelist.
>> + *
>> + * blocks are allocated in order, the order value here translates to:
>> + *
>> + * 0 = 2^0 * mm->chunk_size
>> + * 1 = 2^1 * mm->chunk_size
>> + * 2 = 2^2 * mm->chunk_size
> 
> The order stuff here can be dropped I think, since this is now just 'size'.
> 
>> + *
>> + * Returns:
>> + * 0 on success, error code on failure.
>> + */
>> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> +			   u64 start, u64 end, u64 size,
>> +			   u64 min_page_size,
>> +			   struct list_head *blocks,
>> +			   unsigned long flags)
>> +{
>> +	struct drm_buddy_block *block = NULL;
>> +	unsigned int min_order, order;
>> +	unsigned long pages;
>> +	LIST_HEAD(allocated);
>> +	int err;
>> +
>> +	if (size < mm->chunk_size)
>> +		return -EINVAL;
>> +
>> +	if (min_page_size < mm->chunk_size)
>> +		return -EINVAL;
>> +
>> +	if (!is_power_of_2(min_page_size))
>> +		return -EINVAL;
>> +
>> +	if (!IS_ALIGNED(start | end | size, mm->chunk_size))
>> +		return -EINVAL;
>> +
>> +	if (end > mm->size)
>> +		return -EINVAL;
>> +
>> +	if (range_overflows(start, size, mm->size))
>> +		return -EINVAL;
>> +
>> +	/* Actual range allocation */
>> +	if (start + size == end)
>> +		return __drm_buddy_alloc_range(mm, start, size, blocks);
>> +
>> +	pages = size >> ilog2(mm->chunk_size);
>> +	order = fls(pages) - 1;
>> +	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +
>> +	do {
>> +		order = min(order, (unsigned int)fls(pages) - 1);
>> +		BUG_ON(order > mm->max_order);
>> +		BUG_ON(order < min_order);
>> +
>> +		do {
>> +			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
> 
> Do we need an explicit flag for this? Can we not just check:
> 
> if (start || end < size)

amdgpu failed to load as some requirements are not met with the above
condition,

if we keep (start || end != size), amdgpu loads successfully.
> 
> We already implicitly determine the "actual range allocation" above. So 
> that would just leave the TODPOWN flag

ok, now we have only the TOPDOWN flag
#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 0)

> 
> Otherwise,
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> 
>> +				/* Allocate traversing within the range */
>> +				block = alloc_range_bias(mm, start, end, order);
>> +			else
>> +				/* Allocate from freelist */
>> +				block = alloc_from_freelist(mm, order, flags);
>> +
>> +			if (!IS_ERR(block))
>> +				break;
>> +
>> +			if (order-- == min_order) {
>> +				err = -ENOSPC;
>> +				goto err_free;
>> +			}
>> +		} while (1);
>> +
>> +		mark_allocated(block);
>> +		mm->avail -= drm_buddy_block_size(mm, block);
>> +		kmemleak_update_trace(block);
>> +		list_add_tail(&block->link, &allocated);
>> +
>> +		pages -= BIT(order);
>> +
>> +		if (!pages)
>> +			break;
>> +	} while (1);
>> +
>> +	list_splice_tail(&allocated, blocks);
>> +	return 0;
>> +
>> +err_free:
>> +	drm_buddy_free_list(mm, &allocated);
>> +	return err;
>> +}
>> +EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>>   
>>   /**
>>    * drm_buddy_block_print - print block information
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index 247714bab044..a328a38fab07 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>   	struct i915_ttm_buddy_resource *bman_res;
>>   	struct drm_buddy *mm = &bman->mm;
>> -	unsigned long n_pages;
>> -	unsigned int min_order;
>> +	unsigned long n_pages, lpfn;
>>   	u64 min_page_size;
>>   	u64 size;
>>   	int err;
>>   
>> -	GEM_BUG_ON(place->fpfn || place->lpfn);
>> +	lpfn = place->lpfn;
>> +	if (!lpfn)
>> +		lpfn = man->size;
>>   
>>   	bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
>>   	if (!bman_res)
>> @@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   	INIT_LIST_HEAD(&bman_res->blocks);
>>   	bman_res->mm = mm;
>>   
>> +	if (place->fpfn || lpfn != man->size)
>> +		bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>>   	GEM_BUG_ON(!bman_res->base.num_pages);
>>   	size = bman_res->base.num_pages << PAGE_SHIFT;
>>   
>> @@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   		min_page_size = bo->page_alignment << PAGE_SHIFT;
>>   
>>   	GEM_BUG_ON(min_page_size < mm->chunk_size);
>> -	min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +
>>   	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +		unsigned long pages;
>> +
>>   		size = roundup_pow_of_two(size);
>> -		min_order = ilog2(size) - ilog2(mm->chunk_size);
>> +		min_page_size = size;
>> +
>> +		pages = size >> ilog2(mm->chunk_size);
>> +		if (pages > lpfn)
>> +			lpfn = pages;
>>   	}
>>   
>>   	if (size > mm->size) {
>> @@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>>   
>>   	n_pages = size >> ilog2(mm->chunk_size);
>>   
>> -	do {
>> -		struct drm_buddy_block *block;
>> -		unsigned int order;
>> -
>> -		order = fls(n_pages) - 1;
>> -		GEM_BUG_ON(order > mm->max_order);
>> -		GEM_BUG_ON(order < min_order);
>> -
>> -		do {
>> -			mutex_lock(&bman->lock);
>> -			block = drm_buddy_alloc_blocks(mm, order);
>> -			mutex_unlock(&bman->lock);
>> -			if (!IS_ERR(block))
>> -				break;
>> -
>> -			if (order-- == min_order) {
>> -				err = -ENOSPC;
>> -				goto err_free_blocks;
>> -			}
>> -		} while (1);
>> -
>> -		n_pages -= BIT(order);
>> -
>> -		list_add_tail(&block->link, &bman_res->blocks);
>> -
>> -		if (!n_pages)
>> -			break;
>> -	} while (1);
>> +	mutex_lock(&bman->lock);
>> +	err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
>> +				     (u64)lpfn << PAGE_SHIFT,
>> +				     (u64)n_pages << PAGE_SHIFT,
>> +				     min_page_size,
>> +				     &bman_res->blocks,
>> +				     bman_res->flags);
>> +	mutex_unlock(&bman->lock);
>> +	if (unlikely(err))
>> +		goto err_free_blocks;
>>   
>>   	*res = &bman_res->base;
>>   	return 0;
>> @@ -268,10 +260,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
>>   {
>>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>   	struct drm_buddy *mm = &bman->mm;
>> +	unsigned long flags = 0;
>>   	int ret;
>>   
>> +	flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>>   	mutex_lock(&bman->lock);
>> -	ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
>> +	ret = drm_buddy_alloc_blocks(mm, start,
>> +				     start + size,
>> +				     size, mm->chunk_size,
>> +				     &bman->reserved,
>> +				     flags);
>>   	mutex_unlock(&bman->lock);
>>   
>>   	return ret;
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> index 312077941411..72c90b432e87 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> @@ -20,6 +20,7 @@ struct drm_buddy;
>>    *
>>    * @base: struct ttm_resource base class we extend
>>    * @blocks: the list of struct i915_buddy_block for this resource/allocation
>> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>>    * @mm: the struct i915_buddy_mm for this resource
>>    *
>>    * Extends the struct ttm_resource to manage an address space allocation with
>> @@ -28,6 +29,7 @@ struct drm_buddy;
>>   struct i915_ttm_buddy_resource {
>>   	struct ttm_resource base;
>>   	struct list_head blocks;
>> +	unsigned long flags;
>>   	struct drm_buddy *mm;
>>   };
>>   
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index f524db152413..54f25a372f27 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -22,6 +22,8 @@
>>   	start__ >= max__ || size__ > max__ - start__; \
>>   })
>>   
>> +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
>> +
>>   struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>> @@ -131,12 +133,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
>>   
>>   void drm_buddy_fini(struct drm_buddy *mm);
>>   
>> -struct drm_buddy_block *
>> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
>> -
>> -int drm_buddy_alloc_range(struct drm_buddy *mm,
>> -			  struct list_head *blocks,
>> -			  u64 start, u64 size);
>> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>> +			   u64 start, u64 end, u64 size,
>> +			   u64 min_page_size,
>> +			   struct list_head *blocks,
>> +			   unsigned long flags);
>>   
>>   void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
>>   
>>
>> base-commit: 627e2885b51ab503a98aa89f0a0bd68416c731fc

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2022-02-11  9:16 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-27 14:11 [PATCH v11 1/5] drm: improve drm_buddy_alloc function Arunpravin
2022-01-27 14:11 ` Arunpravin
2022-01-27 14:11 ` [Intel-gfx] " Arunpravin
2022-01-27 14:11 ` [PATCH v11 2/5] drm: implement top-down allocation method Arunpravin
2022-01-27 14:11   ` Arunpravin
2022-01-27 14:11   ` [Intel-gfx] " Arunpravin
2022-01-27 14:11 ` [PATCH v11 3/5] drm: implement a method to free unused pages Arunpravin
2022-01-27 14:11   ` Arunpravin
2022-01-27 14:11   ` [Intel-gfx] " Arunpravin
2022-01-27 14:11 ` [PATCH v11 4/5] drm/amdgpu: move vram inline functions into a header Arunpravin
2022-01-27 14:11   ` Arunpravin
2022-01-27 14:11   ` [Intel-gfx] " Arunpravin
2022-01-27 14:11 ` [PATCH v11 5/5] drm/amdgpu: add drm buddy support to amdgpu Arunpravin
2022-01-27 14:11   ` Arunpravin
2022-01-27 14:11   ` [Intel-gfx] " Arunpravin
2022-01-28 14:18   ` Matthew Auld
2022-01-28 14:18     ` [Intel-gfx] " Matthew Auld
2022-02-04 11:22     ` Arunpravin
2022-02-04 11:22       ` [Intel-gfx] " Arunpravin
2022-02-04 13:23       ` Christian König
2022-02-04 13:23         ` [Intel-gfx] " Christian König
2022-02-08 11:20         ` Arunpravin
2022-02-08 11:20           ` [Intel-gfx] " Arunpravin
2022-02-10 17:52           ` Matthew Auld
2022-02-10 17:52             ` [Intel-gfx] " Matthew Auld
2022-01-27 15:52 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v11,1/5] drm: improve drm_buddy_alloc function Patchwork
2022-01-27 16:23 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-01-27 21:45 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2022-02-10 16:10 ` [PATCH v11 1/5] " Matthew Auld
2022-02-10 16:10   ` Matthew Auld
2022-02-10 16:10   ` [Intel-gfx] " Matthew Auld
2022-02-11  9:27   ` Arunpravin
2022-02-11  9:27     ` Arunpravin
2022-02-11  9:27     ` [Intel-gfx] " Arunpravin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.