dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature
@ 2024-02-08 15:49 Arunpravin Paneer Selvam
  2024-02-08 15:49 ` [PATCH v6 2/3] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-02-08 15:49 UTC (permalink / raw)
  To: dri-devel, amd-gfx, intel-gfx
  Cc: christian.koenig, alexander.deucher, matthew.auld,
	felix.kuehling, mario.limonciello, Arunpravin Paneer Selvam

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
    cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
    drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

v2: (Matthew)
  - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
    operation within drm buddy.
  - Write a macro block_incompatible() to allocate the required blocks.
  - Update the xe driver for the drm_buddy_free_list change in arguments.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Suggested-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c                   | 192 ++++++++++++++----
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c          |   4 +-
 include/drm/drm_buddy.h                       |  18 +-
 6 files changed, 187 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	return 0;
 
 error_free_blocks:
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mutex_unlock(&mgr->lock);
 error_fini:
 	ttm_resource_fini(man, &vres->base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 
 	amdgpu_vram_mgr_do_reserve(man);
 
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(vis_usage, &mgr->vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 		kfree(rsv);
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
-		drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+		drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
 		kfree(rsv);
 	}
 	if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..33ad0cfbd54c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
 	__list_add(&block->link, node->link.prev, &node->link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+	block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
 	block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
 	mark_free(mm, block->left);
 	mark_free(mm, block->right);
 
+	if (drm_buddy_block_is_clear(block)) {
+		mark_cleared(block->left);
+		mark_cleared(block->right);
+		clear_reset(block);
+	}
+
 	mark_split(block);
 
 	return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
 		if (!drm_buddy_block_is_free(buddy))
 			break;
 
+		if (drm_buddy_block_is_clear(block) !=
+		    drm_buddy_block_is_clear(buddy))
+			break;
+
+		if (drm_buddy_block_is_clear(block))
+			mark_cleared(parent);
+
 		list_del(&buddy->link);
 
 		drm_block_free(mm, block);
@@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
 	BUG_ON(!drm_buddy_block_is_allocated(block));
 	mm->avail += drm_buddy_block_size(mm, block);
+	if (drm_buddy_block_is_clear(block))
+		mm->clear_avail += drm_buddy_block_size(mm, block);
+
 	__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
 
-/**
- * drm_buddy_free_list - free blocks
- *
- * @mm: DRM buddy manager
- * @objects: input list head to free blocks
- */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+static void __drm_buddy_free_list(struct drm_buddy *mm,
+				  struct list_head *objects,
+				  bool mark_clear,
+				  bool mark_dirty)
 {
 	struct drm_buddy_block *block, *on;
 
+	WARN_ON(mark_dirty && mark_clear);
+
 	list_for_each_entry_safe(block, on, objects, link) {
+		if (mark_clear)
+			mark_cleared(block);
+		else if (mark_dirty)
+			clear_reset(block);
 		drm_buddy_free_block(mm, block);
 		cond_resched();
 	}
 	INIT_LIST_HEAD(objects);
 }
+
+static void drm_buddy_free_list_internal(struct drm_buddy *mm,
+					 struct list_head *objects)
+{
+	/*
+	 * Don't touch the clear/dirty bit, since allocation is still internal
+	 * at this point. For example we might have just failed part of the
+	 * allocation.
+	 */
+	__drm_buddy_free_list(mm, objects, false, false);
+}
+
+/**
+ * drm_buddy_free_list - free blocks
+ *
+ * @mm: DRM buddy manager
+ * @objects: input list head to free blocks
+ * @flags: optional flags like DRM_BUDDY_CLEARED
+ */
+void drm_buddy_free_list(struct drm_buddy *mm,
+			 struct list_head *objects,
+			 unsigned int flags)
+{
+	bool mark_clear = flags & DRM_BUDDY_CLEARED;
+
+	WARN_ON(flags & ~(DRM_BUDDY_CLEARED));
+
+	__drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
+}
 EXPORT_SYMBOL(drm_buddy_free_list);
 
 static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
@@ -327,10 +385,19 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
 	return s1 <= s2 && e1 >= e2;
 }
 
+static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags)
+{
+	bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
+
+	return needs_clear != drm_buddy_block_is_clear(block);
+}
+
 static struct drm_buddy_block *
-alloc_range_bias(struct drm_buddy *mm,
-		 u64 start, u64 end,
-		 unsigned int order)
+__alloc_range_bias(struct drm_buddy *mm,
+		   u64 start, u64 end,
+		   unsigned int order,
+		   unsigned long flags,
+		   bool fallback)
 {
 	struct drm_buddy_block *block;
 	struct drm_buddy_block *buddy;
@@ -369,6 +436,9 @@ alloc_range_bias(struct drm_buddy *mm,
 
 		if (contains(start, end, block_start, block_end) &&
 		    order == drm_buddy_block_order(block)) {
+			if (!fallback && block_incompatible(block, flags))
+				continue;
+
 			/*
 			 * Find the free block within the range.
 			 */
@@ -405,25 +475,52 @@ alloc_range_bias(struct drm_buddy *mm,
 }
 
 static struct drm_buddy_block *
-get_maxblock(struct drm_buddy *mm, unsigned int order)
+__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
+			     u64 start, u64 end,
+			     unsigned int order,
+			     unsigned long flags)
 {
-	struct drm_buddy_block *max_block = NULL, *node;
+	struct drm_buddy_block *block;
+	bool fallback = 0;
+
+	block = __alloc_range_bias(mm, start, end, order,
+				   flags, fallback);
+	if (IS_ERR(block))
+		return __alloc_range_bias(mm, start, end, order,
+					  flags, !fallback);
+
+	return block;
+}
+
+static struct drm_buddy_block *
+get_maxblock(struct drm_buddy *mm, unsigned int order,
+	     unsigned long flags)
+{
+	struct drm_buddy_block *max_block = NULL, *block = NULL;
 	unsigned int i;
 
 	for (i = order; i <= mm->max_order; ++i) {
-		if (!list_empty(&mm->free_list[i])) {
-			node = list_last_entry(&mm->free_list[i],
-					       struct drm_buddy_block,
-					       link);
-			if (!max_block) {
-				max_block = node;
+		struct drm_buddy_block *tmp_block;
+
+		list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
+			if (block_incompatible(tmp_block, flags))
 				continue;
-			}
 
-			if (drm_buddy_block_offset(node) >
-			    drm_buddy_block_offset(max_block)) {
-				max_block = node;
-			}
+			block = tmp_block;
+			break;
+		}
+
+		if (!block)
+			continue;
+
+		if (!max_block) {
+			max_block = block;
+			continue;
+		}
+
+		if (drm_buddy_block_offset(block) >
+		    drm_buddy_block_offset(max_block)) {
+			max_block = block;
 		}
 	}
 
@@ -440,11 +537,29 @@ alloc_from_freelist(struct drm_buddy *mm,
 	int err;
 
 	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-		block = get_maxblock(mm, order);
+		block = get_maxblock(mm, order, flags);
 		if (block)
 			/* Store the obtained block order */
 			tmp = drm_buddy_block_order(block);
 	} else {
+		for (tmp = order; tmp <= mm->max_order; ++tmp) {
+			struct drm_buddy_block *tmp_block;
+
+			list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
+				if (block_incompatible(tmp_block, flags))
+					continue;
+
+				block = tmp_block;
+				break;
+			}
+
+			if (block)
+				break;
+		}
+	}
+
+	if (!block) {
+		/* Fallback method */
 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
 			if (!list_empty(&mm->free_list[tmp])) {
 				block = list_last_entry(&mm->free_list[tmp],
@@ -454,10 +569,10 @@ alloc_from_freelist(struct drm_buddy *mm,
 					break;
 			}
 		}
-	}
 
-	if (!block)
-		return ERR_PTR(-ENOSPC);
+		if (!block)
+			return ERR_PTR(-ENOSPC);
+	}
 
 	BUG_ON(!drm_buddy_block_is_free(block));
 
@@ -524,6 +639,8 @@ static int __alloc_range(struct drm_buddy *mm,
 			mark_allocated(block);
 			total_allocated += drm_buddy_block_size(mm, block);
 			mm->avail -= drm_buddy_block_size(mm, block);
+			if (drm_buddy_block_is_clear(block))
+				mm->clear_avail -= drm_buddy_block_size(mm, block);
 			list_add_tail(&block->link, &allocated);
 			continue;
 		}
@@ -558,7 +675,7 @@ static int __alloc_range(struct drm_buddy *mm,
 		list_splice_tail(&allocated, blocks);
 		*total_allocated_on_err = total_allocated;
 	} else {
-		drm_buddy_free_list(mm, &allocated);
+		drm_buddy_free_list_internal(mm, &allocated);
 	}
 
 	return err;
@@ -624,11 +741,11 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 			list_splice(&blocks_lhs, blocks);
 			return 0;
 		} else if (err != -ENOSPC) {
-			drm_buddy_free_list(mm, blocks);
+			drm_buddy_free_list_internal(mm, blocks);
 			return err;
 		}
 		/* Free blocks for the next iteration */
-		drm_buddy_free_list(mm, blocks);
+		drm_buddy_free_list_internal(mm, blocks);
 	}
 
 	return -ENOSPC;
@@ -684,6 +801,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 	list_del(&block->link);
 	mark_free(mm, block);
 	mm->avail += drm_buddy_block_size(mm, block);
+	if (drm_buddy_block_is_clear(block))
+		mm->clear_avail += drm_buddy_block_size(mm, block);
 
 	/* Prevent recursively freeing this node */
 	parent = block->parent;
@@ -695,6 +814,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 	if (err) {
 		mark_allocated(block);
 		mm->avail -= drm_buddy_block_size(mm, block);
+		if (drm_buddy_block_is_clear(block))
+			mm->clear_avail -= drm_buddy_block_size(mm, block);
 		list_add(&block->link, blocks);
 	}
 
@@ -782,7 +903,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 		do {
 			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
 				/* Allocate traversing within the range */
-				block = alloc_range_bias(mm, start, end, order);
+				block = __drm_buddy_alloc_range_bias(mm, start, end,
+								     order, flags);
 			else
 				/* Allocate from freelist */
 				block = alloc_from_freelist(mm, order, flags);
@@ -808,6 +930,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 
 		mark_allocated(block);
 		mm->avail -= drm_buddy_block_size(mm, block);
+		if (drm_buddy_block_is_clear(block))
+			mm->clear_avail -= drm_buddy_block_size(mm, block);
 		kmemleak_update_trace(block);
 		list_add_tail(&block->link, &allocated);
 
@@ -846,7 +970,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 	return 0;
 
 err_free:
-	drm_buddy_free_list(mm, &allocated);
+	drm_buddy_free_list_internal(mm, &allocated);
 	return err;
 }
 EXPORT_SYMBOL(drm_buddy_alloc_blocks);
@@ -879,8 +1003,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
 {
 	int order;
 
-	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n",
-		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
+	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
+		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
 
 	for (order = mm->max_order; order >= 0; order--) {
 		struct drm_buddy_block *block;
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 0d735d5c2b35..942345548bc3 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	return 0;
 
 err_free_blocks:
-	drm_buddy_free_list(mm, &bman_res->blocks);
+	drm_buddy_free_list(mm, &bman_res->blocks, 0);
 	mutex_unlock(&bman->lock);
 err_free_res:
 	ttm_resource_fini(man, &bman_res->base);
@@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 
 	mutex_lock(&bman->lock);
-	drm_buddy_free_list(&bman->mm, &bman_res->blocks);
+	drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
 	bman->visible_avail += bman_res->used_visible_size;
 	mutex_unlock(&bman->lock);
 
@@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
 	ttm_set_driver_manager(bdev, type, NULL);
 
 	mutex_lock(&bman->lock);
-	drm_buddy_free_list(mm, &bman->reserved);
+	drm_buddy_free_list(mm, &bman->reserved, 0);
 	drm_buddy_fini(mm);
 	bman->visible_avail += bman->visible_reserved;
 	WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..e0860fce9ebd 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -83,7 +83,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 							  top, max_order);
 	}
 
-	drm_buddy_free_list(&mm, &holes);
+	drm_buddy_free_list(&mm, &holes, 0);
 
 	/* Nothing larger than blocks of chunk_size now available */
 	for (order = 1; order <= max_order; order++) {
@@ -95,7 +95,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 	}
 
 	list_splice_tail(&holes, &blocks);
-	drm_buddy_free_list(&mm, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
 	drm_buddy_fini(&mm);
 }
 
@@ -190,7 +190,7 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
 
 	list_del(&block->link);
 	drm_buddy_free_block(&mm, block);
-	drm_buddy_free_list(&mm, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
 	drm_buddy_fini(&mm);
 }
 
@@ -236,7 +236,7 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
 							   size, size, &tmp, flags),
 						  "buddy_alloc unexpectedly succeeded, it should be full!");
 
-	drm_buddy_free_list(&mm, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
 	drm_buddy_fini(&mm);
 }
 
@@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
 						drm_buddy_block_size(&mm, block),
 						BIT_ULL(mm.max_order) * PAGE_SIZE);
 
-	drm_buddy_free_list(&mm, &allocated);
+	drm_buddy_free_list(&mm, &allocated, 0);
 	drm_buddy_fini(&mm);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 115ec745e502..1ad678b62c4a 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -196,7 +196,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 	return 0;
 
 error_free_blocks:
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mutex_unlock(&mgr->lock);
 error_fini:
 	ttm_resource_fini(man, &vres->base);
@@ -214,7 +214,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
 	struct drm_buddy *mm = &mgr->mm;
 
 	mutex_lock(&mgr->lock);
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mgr->visible_avail += vres->used_visible_size;
 	mutex_unlock(&mgr->lock);
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index a5b39fc01003..d81c596dfa38 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -6,6 +6,7 @@
 #ifndef __DRM_BUDDY_H__
 #define __DRM_BUDDY_H__
 
+#include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -25,15 +26,19 @@
 #define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
 #define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
 #define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
+#define DRM_BUDDY_CLEARED			BIT(4)
 
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
 #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
+#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
+
 #define   DRM_BUDDY_ALLOCATED	   (1 << 10)
 #define   DRM_BUDDY_FREE	   (2 << 10)
 #define   DRM_BUDDY_SPLIT	   (3 << 10)
 /* Free to be used, if needed in the future */
-#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
+#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
 #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
 	u64 header;
 
@@ -86,6 +91,7 @@ struct drm_buddy {
 	u64 chunk_size;
 	u64 size;
 	u64 avail;
+	u64 clear_avail;
 };
 
 static inline u64
@@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct drm_buddy_block *block)
 	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
 }
 
+static inline bool
+drm_buddy_block_is_clear(struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_CLEAR;
+}
+
 static inline bool
 drm_buddy_block_is_free(struct drm_buddy_block *block)
 {
@@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 
 void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
 
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
+void drm_buddy_free_list(struct drm_buddy *mm,
+			 struct list_head *objects,
+			 unsigned int flags);
 
 void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
 void drm_buddy_block_print(struct drm_buddy *mm,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v6 2/3] drm/amdgpu: Enable clear page functionality
  2024-02-08 15:49 [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
@ 2024-02-08 15:49 ` Arunpravin Paneer Selvam
  2024-02-08 15:50 ` [PATCH v6 3/3] drm/buddy: Add defragmentation support Arunpravin Paneer Selvam
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 14+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-02-08 15:49 UTC (permalink / raw)
  To: dri-devel, amd-gfx, intel-gfx
  Cc: christian.koenig, alexander.deucher, matthew.auld,
	felix.kuehling, mario.limonciello, Arunpravin Paneer Selvam

Add clear page support in vram memory region.

v1:(Christian)
  - Dont handle clear page as TTM flag since when moving the BO back
    in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
    clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
    amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared(&cursor) just above the size
    calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
    in the free path to properly wait for fences etc.. (Christian)

v3:(Christian)
  - Remove buffer clear code in VRAM manager instead change the
    AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
    the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared(&cursor) check.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b671b0665492..c673e070199f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -595,8 +596,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 	if (!amdgpu_bo_support_uswc(bo->flags))
 		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-	if (adev->ras_enabled)
-		bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+	bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
 	bo->tbo.bdev = &adev->mman.bdev;
 	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -626,15 +626,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
 	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-		struct dma_fence *fence;
+		struct dma_fence *fence = NULL;
 
-		r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+		r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
 		if (unlikely(r))
 			goto fail_unreserve;
 
-		dma_resv_add_fence(bo->tbo.base.resv, fence,
-				   DMA_RESV_USAGE_KERNEL);
-		dma_fence_put(fence);
+		if (fence) {
+			dma_resv_add_fence(bo->tbo.base.resv, fence,
+					   DMA_RESV_USAGE_KERNEL);
+			dma_fence_put(fence);
+		}
 	}
 	if (!bp->resv)
 		amdgpu_bo_unreserve(bo);
@@ -1348,8 +1350,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 	if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
 		return;
 
-	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
+	r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
 	if (!WARN_ON(r)) {
+		struct amdgpu_vram_mgr_resource *vres;
+
+		vres = to_amdgpu_vram_mgr_resource(bo->resource);
+		vres->flags |= DRM_BUDDY_CLEARED;
 		amdgpu_bo_fence(abo, fence, false);
 		dma_fence_put(fence);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 	}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+	struct drm_buddy_block *block;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		if (!amdgpu_vram_mgr_is_cleared(block))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8722beba494e..bcbffe909b47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -378,11 +378,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
 		struct dma_fence *wipe_fence = NULL;
 
-		r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
-					false);
+		r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+				       false);
 		if (r) {
 			goto error;
 		} else if (wipe_fence) {
+			struct amdgpu_vram_mgr_resource *vres;
+
+			vres = to_amdgpu_vram_mgr_resource(bo->resource);
+			vres->flags |= DRM_BUDDY_CLEARED;
 			dma_fence_put(fence);
 			fence = wipe_fence;
 		}
@@ -2214,6 +2218,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
 	return 0;
 }
 
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+			    struct dma_resv *resv,
+			    struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_res_cursor cursor;
+	struct dma_fence *f = NULL;
+	u64 addr;
+	int r;
+
+	if (!adev->mman.buffer_funcs_enabled)
+		return -EINVAL;
+
+	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+	mutex_lock(&adev->mman.gtt_window_lock);
+	while (cursor.remaining) {
+		struct dma_fence *next = NULL;
+		u64 size;
+
+		if (amdgpu_res_cleared(&cursor)) {
+			amdgpu_res_next(&cursor, cursor.size);
+			continue;
+		}
+
+		/* Never clear more than 256MiB at once to avoid timeouts */
+		size = min(cursor.size, 256ULL << 20);
+
+		r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+					  1, ring, false, &size, &addr);
+		if (r)
+			goto err;
+
+		r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+					&next, true, true);
+		if (r)
+			goto err;
+
+		dma_fence_put(f);
+		f = next;
+
+		amdgpu_res_next(&cursor, size);
+	}
+err:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return r;
+}
+
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 65ec82141a8e..b404d89d52e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -38,8 +38,6 @@
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
 
-#define AMDGPU_POISON	0xd0bed0be
-
 extern const struct attribute_group amdgpu_vram_mgr_attr_group;
 extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
 
@@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 			       uint64_t size, bool tmz,
 			       struct dma_resv *resv,
 			       struct dma_fence **f);
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+			    struct dma_resv *resv,
+			    struct dma_fence **fence);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index c0c851409241..e494f5bf136a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
 	u64 vis_usage = 0, max_bytes, min_block_size;
 	struct amdgpu_vram_mgr_resource *vres;
 	u64 size, remaining_size, lpfn, fpfn;
@@ -501,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
 		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
 
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+
 	if (fpfn || lpfn != mgr->mm.size)
 		/* Allocate blocks in desired range */
 		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@ -604,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 
 	amdgpu_vram_mgr_do_reserve(man);
 
-	drm_buddy_free_list(mm, &vres->blocks, 0);
+	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
 	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(vis_usage, &mgr->vis_usage);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 0e04e42cf809..8478522d7366 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
 	return (u64)PAGE_SIZE << drm_buddy_block_order(block);
 }
 
+static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_is_clear(block);
+}
+
 static inline struct amdgpu_vram_mgr_resource *
 to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
 {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v6 3/3] drm/buddy: Add defragmentation support
  2024-02-08 15:49 [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
  2024-02-08 15:49 ` [PATCH v6 2/3] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
@ 2024-02-08 15:50 ` Arunpravin Paneer Selvam
  2024-02-16 12:23   ` Matthew Auld
  2024-02-16 11:49 ` [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
  2024-02-16 12:03 ` Matthew Auld
  3 siblings, 1 reply; 14+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-02-08 15:50 UTC (permalink / raw)
  To: dri-devel, amd-gfx, intel-gfx
  Cc: christian.koenig, alexander.deucher, matthew.auld,
	felix.kuehling, mario.limonciello, Arunpravin Paneer Selvam

Add a function to support defragmentation.

v1: Defragment the memory beginning from min_order
    till the required memory space is available.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/drm_buddy.c | 67 +++++++++++++++++++++++++++++++------
 include/drm/drm_buddy.h     |  3 ++
 2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 33ad0cfbd54c..fac423d2cb73 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
 }
 EXPORT_SYMBOL(drm_get_buddy);
 
-static void __drm_buddy_free(struct drm_buddy *mm,
-			     struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+				     struct drm_buddy_block *block,
+				     bool defrag)
 {
 	struct drm_buddy_block *parent;
+	unsigned int order;
 
 	while ((parent = block->parent)) {
 		struct drm_buddy_block *buddy;
@@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
 		if (!drm_buddy_block_is_free(buddy))
 			break;
 
-		if (drm_buddy_block_is_clear(block) !=
-		    drm_buddy_block_is_clear(buddy))
-			break;
+		if (!defrag) {
+			if (drm_buddy_block_is_clear(block) !=
+			    drm_buddy_block_is_clear(buddy))
+				break;
 
-		if (drm_buddy_block_is_clear(block))
-			mark_cleared(parent);
+			if (drm_buddy_block_is_clear(block))
+				mark_cleared(parent);
+		}
 
 		list_del(&buddy->link);
 
@@ -304,8 +308,49 @@ static void __drm_buddy_free(struct drm_buddy *mm,
 		block = parent;
 	}
 
+	order = drm_buddy_block_order(block);
 	mark_free(mm, block);
+
+	return order;
+}
+
+/**
+ * drm_buddy_defrag - Defragmentation routine
+ *
+ * @mm: DRM buddy manager
+ * @min_order: minimum order in the freelist to begin
+ * the defragmentation process
+ *
+ * Driver calls the defragmentation function when the
+ * requested memory allocation returns -ENOSPC.
+ */
+void drm_buddy_defrag(struct drm_buddy *mm,
+		      unsigned int min_order)
+{
+	struct drm_buddy_block *block;
+	struct list_head *list;
+	unsigned int order;
+	int i;
+
+	if (min_order > mm->max_order)
+		return;
+
+	for (i = min_order - 1; i >= 0; i--) {
+		list = &mm->free_list[i];
+		if (list_empty(list))
+			continue;
+
+		list_for_each_entry_reverse(block, list, link) {
+			if (!block->parent)
+				continue;
+
+			order = __drm_buddy_free(mm, block, 1);
+			if (order >= min_order)
+				return;
+		}
+	}
 }
+EXPORT_SYMBOL(drm_buddy_defrag);
 
 /**
  * drm_buddy_free_block - free a block
@@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 	if (drm_buddy_block_is_clear(block))
 		mm->clear_avail += drm_buddy_block_size(mm, block);
 
-	__drm_buddy_free(mm, block);
+	__drm_buddy_free(mm, block, 0);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
 
@@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
 	if (buddy &&
 	    (drm_buddy_block_is_free(block) &&
 	     drm_buddy_block_is_free(buddy)))
-		__drm_buddy_free(mm, block);
+		__drm_buddy_free(mm, block, 0);
 	return ERR_PTR(err);
 }
 
@@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
 
 err_undo:
 	if (tmp != order)
-		__drm_buddy_free(mm, block);
+		__drm_buddy_free(mm, block, 0);
 	return ERR_PTR(err);
 }
 
@@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
 	if (buddy &&
 	    (drm_buddy_block_is_free(block) &&
 	     drm_buddy_block_is_free(buddy)))
-		__drm_buddy_free(mm, block);
+		__drm_buddy_free(mm, block, 0);
 
 err_free:
 	if (err == -ENOSPC && total_allocated_on_err) {
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index d81c596dfa38..d0f63e7b5915 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -166,6 +166,9 @@ void drm_buddy_free_list(struct drm_buddy *mm,
 			 struct list_head *objects,
 			 unsigned int flags);
 
+void drm_buddy_defrag(struct drm_buddy *mm,
+		      unsigned int min_order);
+
 void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
 void drm_buddy_block_print(struct drm_buddy *mm,
 			   struct drm_buddy_block *block,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature
  2024-02-08 15:49 [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
  2024-02-08 15:49 ` [PATCH v6 2/3] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
  2024-02-08 15:50 ` [PATCH v6 3/3] drm/buddy: Add defragmentation support Arunpravin Paneer Selvam
@ 2024-02-16 11:49 ` Arunpravin Paneer Selvam
  2024-02-16 12:03 ` Matthew Auld
  3 siblings, 0 replies; 14+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-02-16 11:49 UTC (permalink / raw)
  To: dri-devel, amd-gfx, intel-gfx
  Cc: christian.koenig, alexander.deucher, matthew.auld,
	felix.kuehling, mario.limonciello

Hi Matthew,

Could you review the v6?

Thanks,
Arun.

On 2/8/2024 9:19 PM, Arunpravin Paneer Selvam wrote:
> - Add tracking clear page feature.
>
> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>    successfully clears the blocks in the free path. On the otherhand,
>    DRM buddy marks each block as cleared.
>
> - Track the available cleared pages size
>
> - If driver requests cleared memory we prefer cleared memory
>    but fallback to uncleared if we can't find the cleared blocks.
>    when driver requests uncleared memory we try to use uncleared but
>    fallback to cleared memory if necessary.
>
> - When a block gets freed we clear it and mark the freed block as cleared,
>    when there are buddies which are cleared as well we can merge them.
>    Otherwise, we prefer to keep the blocks as separated.
>
> v1: (Christian)
>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>      cleared. Else, reset the clear flag for each block in the list.
>
>    - For merging the 2 cleared blocks compare as below,
>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
>
> v2: (Matthew)
>    - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
>      operation within drm buddy.
>    - Write a macro block_incompatible() to allocate the required blocks.
>    - Update the xe driver for the drm_buddy_free_list change in arguments.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Suggested-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>   drivers/gpu/drm/drm_buddy.c                   | 192 ++++++++++++++----
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>   drivers/gpu/drm/xe/xe_ttm_vram_mgr.c          |   4 +-
>   include/drm/drm_buddy.h                       |  18 +-
>   6 files changed, 187 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index 8db880244324..c0c851409241 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>   	return 0;
>   
>   error_free_blocks:
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mutex_unlock(&mgr->lock);
>   error_fini:
>   	ttm_resource_fini(man, &vres->base);
> @@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>   
>   	amdgpu_vram_mgr_do_reserve(man);
>   
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mutex_unlock(&mgr->lock);
>   
>   	atomic64_sub(vis_usage, &mgr->vis_usage);
> @@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
>   		kfree(rsv);
>   
>   	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
> -		drm_buddy_free_list(&mgr->mm, &rsv->allocated);
> +		drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>   		kfree(rsv);
>   	}
>   	if (!adev->gmc.is_app_apu)
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index f57e6d74fb0e..33ad0cfbd54c 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
>   	__list_add(&block->link, node->link.prev, &node->link);
>   }
>   
> +static void clear_reset(struct drm_buddy_block *block)
> +{
> +	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
> +}
> +
> +static void mark_cleared(struct drm_buddy_block *block)
> +{
> +	block->header |= DRM_BUDDY_HEADER_CLEAR;
> +}
> +
>   static void mark_allocated(struct drm_buddy_block *block)
>   {
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>   	mark_free(mm, block->left);
>   	mark_free(mm, block->right);
>   
> +	if (drm_buddy_block_is_clear(block)) {
> +		mark_cleared(block->left);
> +		mark_cleared(block->right);
> +		clear_reset(block);
> +	}
> +
>   	mark_split(block);
>   
>   	return 0;
> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>   		if (!drm_buddy_block_is_free(buddy))
>   			break;
>   
> +		if (drm_buddy_block_is_clear(block) !=
> +		    drm_buddy_block_is_clear(buddy))
> +			break;
> +
> +		if (drm_buddy_block_is_clear(block))
> +			mark_cleared(parent);
> +
>   		list_del(&buddy->link);
>   
>   		drm_block_free(mm, block);
> @@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>   {
>   	BUG_ON(!drm_buddy_block_is_allocated(block));
>   	mm->avail += drm_buddy_block_size(mm, block);
> +	if (drm_buddy_block_is_clear(block))
> +		mm->clear_avail += drm_buddy_block_size(mm, block);
> +
>   	__drm_buddy_free(mm, block);
>   }
>   EXPORT_SYMBOL(drm_buddy_free_block);
>   
> -/**
> - * drm_buddy_free_list - free blocks
> - *
> - * @mm: DRM buddy manager
> - * @objects: input list head to free blocks
> - */
> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
> +static void __drm_buddy_free_list(struct drm_buddy *mm,
> +				  struct list_head *objects,
> +				  bool mark_clear,
> +				  bool mark_dirty)
>   {
>   	struct drm_buddy_block *block, *on;
>   
> +	WARN_ON(mark_dirty && mark_clear);
> +
>   	list_for_each_entry_safe(block, on, objects, link) {
> +		if (mark_clear)
> +			mark_cleared(block);
> +		else if (mark_dirty)
> +			clear_reset(block);
>   		drm_buddy_free_block(mm, block);
>   		cond_resched();
>   	}
>   	INIT_LIST_HEAD(objects);
>   }
> +
> +static void drm_buddy_free_list_internal(struct drm_buddy *mm,
> +					 struct list_head *objects)
> +{
> +	/*
> +	 * Don't touch the clear/dirty bit, since allocation is still internal
> +	 * at this point. For example we might have just failed part of the
> +	 * allocation.
> +	 */
> +	__drm_buddy_free_list(mm, objects, false, false);
> +}
> +
> +/**
> + * drm_buddy_free_list - free blocks
> + *
> + * @mm: DRM buddy manager
> + * @objects: input list head to free blocks
> + * @flags: optional flags like DRM_BUDDY_CLEARED
> + */
> +void drm_buddy_free_list(struct drm_buddy *mm,
> +			 struct list_head *objects,
> +			 unsigned int flags)
> +{
> +	bool mark_clear = flags & DRM_BUDDY_CLEARED;
> +
> +	WARN_ON(flags & ~(DRM_BUDDY_CLEARED));
> +
> +	__drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
> +}
>   EXPORT_SYMBOL(drm_buddy_free_list);
>   
>   static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> @@ -327,10 +385,19 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>   	return s1 <= s2 && e1 >= e2;
>   }
>   
> +static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags)
> +{
> +	bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
> +
> +	return needs_clear != drm_buddy_block_is_clear(block);
> +}
> +
>   static struct drm_buddy_block *
> -alloc_range_bias(struct drm_buddy *mm,
> -		 u64 start, u64 end,
> -		 unsigned int order)
> +__alloc_range_bias(struct drm_buddy *mm,
> +		   u64 start, u64 end,
> +		   unsigned int order,
> +		   unsigned long flags,
> +		   bool fallback)
>   {
>   	struct drm_buddy_block *block;
>   	struct drm_buddy_block *buddy;
> @@ -369,6 +436,9 @@ alloc_range_bias(struct drm_buddy *mm,
>   
>   		if (contains(start, end, block_start, block_end) &&
>   		    order == drm_buddy_block_order(block)) {
> +			if (!fallback && block_incompatible(block, flags))
> +				continue;
> +
>   			/*
>   			 * Find the free block within the range.
>   			 */
> @@ -405,25 +475,52 @@ alloc_range_bias(struct drm_buddy *mm,
>   }
>   
>   static struct drm_buddy_block *
> -get_maxblock(struct drm_buddy *mm, unsigned int order)
> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
> +			     u64 start, u64 end,
> +			     unsigned int order,
> +			     unsigned long flags)
>   {
> -	struct drm_buddy_block *max_block = NULL, *node;
> +	struct drm_buddy_block *block;
> +	bool fallback = 0;
> +
> +	block = __alloc_range_bias(mm, start, end, order,
> +				   flags, fallback);
> +	if (IS_ERR(block))
> +		return __alloc_range_bias(mm, start, end, order,
> +					  flags, !fallback);
> +
> +	return block;
> +}
> +
> +static struct drm_buddy_block *
> +get_maxblock(struct drm_buddy *mm, unsigned int order,
> +	     unsigned long flags)
> +{
> +	struct drm_buddy_block *max_block = NULL, *block = NULL;
>   	unsigned int i;
>   
>   	for (i = order; i <= mm->max_order; ++i) {
> -		if (!list_empty(&mm->free_list[i])) {
> -			node = list_last_entry(&mm->free_list[i],
> -					       struct drm_buddy_block,
> -					       link);
> -			if (!max_block) {
> -				max_block = node;
> +		struct drm_buddy_block *tmp_block;
> +
> +		list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
> +			if (block_incompatible(tmp_block, flags))
>   				continue;
> -			}
>   
> -			if (drm_buddy_block_offset(node) >
> -			    drm_buddy_block_offset(max_block)) {
> -				max_block = node;
> -			}
> +			block = tmp_block;
> +			break;
> +		}
> +
> +		if (!block)
> +			continue;
> +
> +		if (!max_block) {
> +			max_block = block;
> +			continue;
> +		}
> +
> +		if (drm_buddy_block_offset(block) >
> +		    drm_buddy_block_offset(max_block)) {
> +			max_block = block;
>   		}
>   	}
>   
> @@ -440,11 +537,29 @@ alloc_from_freelist(struct drm_buddy *mm,
>   	int err;
>   
>   	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
> -		block = get_maxblock(mm, order);
> +		block = get_maxblock(mm, order, flags);
>   		if (block)
>   			/* Store the obtained block order */
>   			tmp = drm_buddy_block_order(block);
>   	} else {
> +		for (tmp = order; tmp <= mm->max_order; ++tmp) {
> +			struct drm_buddy_block *tmp_block;
> +
> +			list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
> +				if (block_incompatible(tmp_block, flags))
> +					continue;
> +
> +				block = tmp_block;
> +				break;
> +			}
> +
> +			if (block)
> +				break;
> +		}
> +	}
> +
> +	if (!block) {
> +		/* Fallback method */
>   		for (tmp = order; tmp <= mm->max_order; ++tmp) {
>   			if (!list_empty(&mm->free_list[tmp])) {
>   				block = list_last_entry(&mm->free_list[tmp],
> @@ -454,10 +569,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>   					break;
>   			}
>   		}
> -	}
>   
> -	if (!block)
> -		return ERR_PTR(-ENOSPC);
> +		if (!block)
> +			return ERR_PTR(-ENOSPC);
> +	}
>   
>   	BUG_ON(!drm_buddy_block_is_free(block));
>   
> @@ -524,6 +639,8 @@ static int __alloc_range(struct drm_buddy *mm,
>   			mark_allocated(block);
>   			total_allocated += drm_buddy_block_size(mm, block);
>   			mm->avail -= drm_buddy_block_size(mm, block);
> +			if (drm_buddy_block_is_clear(block))
> +				mm->clear_avail -= drm_buddy_block_size(mm, block);
>   			list_add_tail(&block->link, &allocated);
>   			continue;
>   		}
> @@ -558,7 +675,7 @@ static int __alloc_range(struct drm_buddy *mm,
>   		list_splice_tail(&allocated, blocks);
>   		*total_allocated_on_err = total_allocated;
>   	} else {
> -		drm_buddy_free_list(mm, &allocated);
> +		drm_buddy_free_list_internal(mm, &allocated);
>   	}
>   
>   	return err;
> @@ -624,11 +741,11 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   			list_splice(&blocks_lhs, blocks);
>   			return 0;
>   		} else if (err != -ENOSPC) {
> -			drm_buddy_free_list(mm, blocks);
> +			drm_buddy_free_list_internal(mm, blocks);
>   			return err;
>   		}
>   		/* Free blocks for the next iteration */
> -		drm_buddy_free_list(mm, blocks);
> +		drm_buddy_free_list_internal(mm, blocks);
>   	}
>   
>   	return -ENOSPC;
> @@ -684,6 +801,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   	list_del(&block->link);
>   	mark_free(mm, block);
>   	mm->avail += drm_buddy_block_size(mm, block);
> +	if (drm_buddy_block_is_clear(block))
> +		mm->clear_avail += drm_buddy_block_size(mm, block);
>   
>   	/* Prevent recursively freeing this node */
>   	parent = block->parent;
> @@ -695,6 +814,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   	if (err) {
>   		mark_allocated(block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
> +		if (drm_buddy_block_is_clear(block))
> +			mm->clear_avail -= drm_buddy_block_size(mm, block);
>   		list_add(&block->link, blocks);
>   	}
>   
> @@ -782,7 +903,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   		do {
>   			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>   				/* Allocate traversing within the range */
> -				block = alloc_range_bias(mm, start, end, order);
> +				block = __drm_buddy_alloc_range_bias(mm, start, end,
> +								     order, flags);
>   			else
>   				/* Allocate from freelist */
>   				block = alloc_from_freelist(mm, order, flags);
> @@ -808,6 +930,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   
>   		mark_allocated(block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
> +		if (drm_buddy_block_is_clear(block))
> +			mm->clear_avail -= drm_buddy_block_size(mm, block);
>   		kmemleak_update_trace(block);
>   		list_add_tail(&block->link, &allocated);
>   
> @@ -846,7 +970,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   	return 0;
>   
>   err_free:
> -	drm_buddy_free_list(mm, &allocated);
> +	drm_buddy_free_list_internal(mm, &allocated);
>   	return err;
>   }
>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
> @@ -879,8 +1003,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>   {
>   	int order;
>   
> -	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n",
> -		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
> +	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
> +		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
>   
>   	for (order = mm->max_order; order >= 0; order--) {
>   		struct drm_buddy_block *block;
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 0d735d5c2b35..942345548bc3 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	return 0;
>   
>   err_free_blocks:
> -	drm_buddy_free_list(mm, &bman_res->blocks);
> +	drm_buddy_free_list(mm, &bman_res->blocks, 0);
>   	mutex_unlock(&bman->lock);
>   err_free_res:
>   	ttm_resource_fini(man, &bman_res->base);
> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>   
>   	mutex_lock(&bman->lock);
> -	drm_buddy_free_list(&bman->mm, &bman_res->blocks);
> +	drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>   	bman->visible_avail += bman_res->used_visible_size;
>   	mutex_unlock(&bman->lock);
>   
> @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
>   	ttm_set_driver_manager(bdev, type, NULL);
>   
>   	mutex_lock(&bman->lock);
> -	drm_buddy_free_list(mm, &bman->reserved);
> +	drm_buddy_free_list(mm, &bman->reserved, 0);
>   	drm_buddy_fini(mm);
>   	bman->visible_avail += bman->visible_reserved;
>   	WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
> index ea2af6bd9abe..e0860fce9ebd 100644
> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
> @@ -83,7 +83,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
>   							  top, max_order);
>   	}
>   
> -	drm_buddy_free_list(&mm, &holes);
> +	drm_buddy_free_list(&mm, &holes, 0);
>   
>   	/* Nothing larger than blocks of chunk_size now available */
>   	for (order = 1; order <= max_order; order++) {
> @@ -95,7 +95,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
>   	}
>   
>   	list_splice_tail(&holes, &blocks);
> -	drm_buddy_free_list(&mm, &blocks);
> +	drm_buddy_free_list(&mm, &blocks, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> @@ -190,7 +190,7 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
>   
>   	list_del(&block->link);
>   	drm_buddy_free_block(&mm, block);
> -	drm_buddy_free_list(&mm, &blocks);
> +	drm_buddy_free_list(&mm, &blocks, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> @@ -236,7 +236,7 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
>   							   size, size, &tmp, flags),
>   						  "buddy_alloc unexpectedly succeeded, it should be full!");
>   
> -	drm_buddy_free_list(&mm, &blocks);
> +	drm_buddy_free_list(&mm, &blocks, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
>   						drm_buddy_block_size(&mm, block),
>   						BIT_ULL(mm.max_order) * PAGE_SIZE);
>   
> -	drm_buddy_free_list(&mm, &allocated);
> +	drm_buddy_free_list(&mm, &allocated, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> index 115ec745e502..1ad678b62c4a 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> @@ -196,7 +196,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
>   	return 0;
>   
>   error_free_blocks:
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mutex_unlock(&mgr->lock);
>   error_fini:
>   	ttm_resource_fini(man, &vres->base);
> @@ -214,7 +214,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
>   	struct drm_buddy *mm = &mgr->mm;
>   
>   	mutex_lock(&mgr->lock);
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mgr->visible_avail += vres->used_visible_size;
>   	mutex_unlock(&mgr->lock);
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index a5b39fc01003..d81c596dfa38 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -6,6 +6,7 @@
>   #ifndef __DRM_BUDDY_H__
>   #define __DRM_BUDDY_H__
>   
> +#include <linux/bitfield.h>
>   #include <linux/bitops.h>
>   #include <linux/list.h>
>   #include <linux/slab.h>
> @@ -25,15 +26,19 @@
>   #define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
>   #define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
> +#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
> +#define DRM_BUDDY_CLEARED			BIT(4)
>   
>   struct drm_buddy_block {
>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
> +
>   #define   DRM_BUDDY_ALLOCATED	   (1 << 10)
>   #define   DRM_BUDDY_FREE	   (2 << 10)
>   #define   DRM_BUDDY_SPLIT	   (3 << 10)
>   /* Free to be used, if needed in the future */
> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>   	u64 header;
>   
> @@ -86,6 +91,7 @@ struct drm_buddy {
>   	u64 chunk_size;
>   	u64 size;
>   	u64 avail;
> +	u64 clear_avail;
>   };
>   
>   static inline u64
> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct drm_buddy_block *block)
>   	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>   }
>   
> +static inline bool
> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
> +{
> +	return block->header & DRM_BUDDY_HEADER_CLEAR;
> +}
> +
>   static inline bool
>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>   {
> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   
>   void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
>   
> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
> +void drm_buddy_free_list(struct drm_buddy *mm,
> +			 struct list_head *objects,
> +			 unsigned int flags);
>   
>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>   void drm_buddy_block_print(struct drm_buddy *mm,


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature
  2024-02-08 15:49 [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
                   ` (2 preceding siblings ...)
  2024-02-16 11:49 ` [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
@ 2024-02-16 12:03 ` Matthew Auld
  2024-02-21 12:23   ` Paneer Selvam, Arunpravin
  2024-02-21 12:40   ` Paneer Selvam, Arunpravin
  3 siblings, 2 replies; 14+ messages in thread
From: Matthew Auld @ 2024-02-16 12:03 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, dri-devel, amd-gfx, intel-gfx
  Cc: christian.koenig, alexander.deucher, felix.kuehling, mario.limonciello

On 08/02/2024 15:49, Arunpravin Paneer Selvam wrote:
> - Add tracking clear page feature.
> 
> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>    successfully clears the blocks in the free path. On the otherhand,
>    DRM buddy marks each block as cleared.
> 
> - Track the available cleared pages size
> 
> - If driver requests cleared memory we prefer cleared memory
>    but fallback to uncleared if we can't find the cleared blocks.
>    when driver requests uncleared memory we try to use uncleared but
>    fallback to cleared memory if necessary.
> 
> - When a block gets freed we clear it and mark the freed block as cleared,
>    when there are buddies which are cleared as well we can merge them.
>    Otherwise, we prefer to keep the blocks as separated.
> 
> v1: (Christian)
>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>      cleared. Else, reset the clear flag for each block in the list.
> 
>    - For merging the 2 cleared blocks compare as below,
>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
> 
> v2: (Matthew)
>    - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
>      operation within drm buddy.
>    - Write a macro block_incompatible() to allocate the required blocks.
>    - Update the xe driver for the drm_buddy_free_list change in arguments.
> 
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Suggested-by: Christian König <christian.koenig@amd.com>

Probably needs a new unit test.

I think we are missing something to forcefully re-merge everything at 
fini()? In theory we can just call the defrag routine. Otherwise we 
might trigger various warnings since the root(s) might still be split.

Also one nit below. Otherwise I think looks good.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>   drivers/gpu/drm/drm_buddy.c                   | 192 ++++++++++++++----
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>   drivers/gpu/drm/xe/xe_ttm_vram_mgr.c          |   4 +-
>   include/drm/drm_buddy.h                       |  18 +-
>   6 files changed, 187 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index 8db880244324..c0c851409241 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>   	return 0;
>   
>   error_free_blocks:
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mutex_unlock(&mgr->lock);
>   error_fini:
>   	ttm_resource_fini(man, &vres->base);
> @@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>   
>   	amdgpu_vram_mgr_do_reserve(man);
>   
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mutex_unlock(&mgr->lock);
>   
>   	atomic64_sub(vis_usage, &mgr->vis_usage);
> @@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
>   		kfree(rsv);
>   
>   	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
> -		drm_buddy_free_list(&mgr->mm, &rsv->allocated);
> +		drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>   		kfree(rsv);
>   	}
>   	if (!adev->gmc.is_app_apu)
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index f57e6d74fb0e..33ad0cfbd54c 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
>   	__list_add(&block->link, node->link.prev, &node->link);
>   }
>   
> +static void clear_reset(struct drm_buddy_block *block)
> +{
> +	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
> +}
> +
> +static void mark_cleared(struct drm_buddy_block *block)
> +{
> +	block->header |= DRM_BUDDY_HEADER_CLEAR;
> +}
> +
>   static void mark_allocated(struct drm_buddy_block *block)
>   {
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>   	mark_free(mm, block->left);
>   	mark_free(mm, block->right);
>   
> +	if (drm_buddy_block_is_clear(block)) {
> +		mark_cleared(block->left);
> +		mark_cleared(block->right);
> +		clear_reset(block);
> +	}
> +
>   	mark_split(block);
>   
>   	return 0;
> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>   		if (!drm_buddy_block_is_free(buddy))
>   			break;
>   
> +		if (drm_buddy_block_is_clear(block) !=
> +		    drm_buddy_block_is_clear(buddy))
> +			break;
> +
> +		if (drm_buddy_block_is_clear(block))
> +			mark_cleared(parent);
> +
>   		list_del(&buddy->link);
>   
>   		drm_block_free(mm, block);
> @@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>   {
>   	BUG_ON(!drm_buddy_block_is_allocated(block));
>   	mm->avail += drm_buddy_block_size(mm, block);
> +	if (drm_buddy_block_is_clear(block))
> +		mm->clear_avail += drm_buddy_block_size(mm, block);
> +
>   	__drm_buddy_free(mm, block);
>   }
>   EXPORT_SYMBOL(drm_buddy_free_block);
>   
> -/**
> - * drm_buddy_free_list - free blocks
> - *
> - * @mm: DRM buddy manager
> - * @objects: input list head to free blocks
> - */
> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
> +static void __drm_buddy_free_list(struct drm_buddy *mm,
> +				  struct list_head *objects,
> +				  bool mark_clear,
> +				  bool mark_dirty)
>   {
>   	struct drm_buddy_block *block, *on;
>   
> +	WARN_ON(mark_dirty && mark_clear);
> +
>   	list_for_each_entry_safe(block, on, objects, link) {
> +		if (mark_clear)
> +			mark_cleared(block);
> +		else if (mark_dirty)
> +			clear_reset(block);
>   		drm_buddy_free_block(mm, block);
>   		cond_resched();
>   	}
>   	INIT_LIST_HEAD(objects);
>   }
> +
> +static void drm_buddy_free_list_internal(struct drm_buddy *mm,
> +					 struct list_head *objects)
> +{
> +	/*
> +	 * Don't touch the clear/dirty bit, since allocation is still internal
> +	 * at this point. For example we might have just failed part of the
> +	 * allocation.
> +	 */
> +	__drm_buddy_free_list(mm, objects, false, false);
> +}
> +
> +/**
> + * drm_buddy_free_list - free blocks
> + *
> + * @mm: DRM buddy manager
> + * @objects: input list head to free blocks
> + * @flags: optional flags like DRM_BUDDY_CLEARED
> + */
> +void drm_buddy_free_list(struct drm_buddy *mm,
> +			 struct list_head *objects,
> +			 unsigned int flags)
> +{
> +	bool mark_clear = flags & DRM_BUDDY_CLEARED;
> +
> +	WARN_ON(flags & ~(DRM_BUDDY_CLEARED));
> +
> +	__drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
> +}
>   EXPORT_SYMBOL(drm_buddy_free_list);
>   
>   static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> @@ -327,10 +385,19 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>   	return s1 <= s2 && e1 >= e2;
>   }
>   
> +static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags)
> +{
> +	bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
> +
> +	return needs_clear != drm_buddy_block_is_clear(block);
> +}
> +
>   static struct drm_buddy_block *
> -alloc_range_bias(struct drm_buddy *mm,
> -		 u64 start, u64 end,
> -		 unsigned int order)
> +__alloc_range_bias(struct drm_buddy *mm,
> +		   u64 start, u64 end,
> +		   unsigned int order,
> +		   unsigned long flags,
> +		   bool fallback)
>   {
>   	struct drm_buddy_block *block;
>   	struct drm_buddy_block *buddy;
> @@ -369,6 +436,9 @@ alloc_range_bias(struct drm_buddy *mm,
>   
>   		if (contains(start, end, block_start, block_end) &&
>   		    order == drm_buddy_block_order(block)) {
> +			if (!fallback && block_incompatible(block, flags))
> +				continue;
> +
>   			/*
>   			 * Find the free block within the range.
>   			 */
> @@ -405,25 +475,52 @@ alloc_range_bias(struct drm_buddy *mm,
>   }
>   
>   static struct drm_buddy_block *
> -get_maxblock(struct drm_buddy *mm, unsigned int order)
> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
> +			     u64 start, u64 end,
> +			     unsigned int order,
> +			     unsigned long flags)
>   {
> -	struct drm_buddy_block *max_block = NULL, *node;
> +	struct drm_buddy_block *block;
> +	bool fallback = 0;
> +
> +	block = __alloc_range_bias(mm, start, end, order,
> +				   flags, fallback);
> +	if (IS_ERR(block))
> +		return __alloc_range_bias(mm, start, end, order,
> +					  flags, !fallback);
> +
> +	return block;
> +}
> +
> +static struct drm_buddy_block *
> +get_maxblock(struct drm_buddy *mm, unsigned int order,
> +	     unsigned long flags)
> +{
> +	struct drm_buddy_block *max_block = NULL, *block = NULL;
>   	unsigned int i;
>   
>   	for (i = order; i <= mm->max_order; ++i) {
> -		if (!list_empty(&mm->free_list[i])) {
> -			node = list_last_entry(&mm->free_list[i],
> -					       struct drm_buddy_block,
> -					       link);
> -			if (!max_block) {
> -				max_block = node;
> +		struct drm_buddy_block *tmp_block;
> +
> +		list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
> +			if (block_incompatible(tmp_block, flags))
>   				continue;
> -			}
>   
> -			if (drm_buddy_block_offset(node) >
> -			    drm_buddy_block_offset(max_block)) {
> -				max_block = node;
> -			}
> +			block = tmp_block;
> +			break;
> +		}
> +
> +		if (!block)
> +			continue;
> +
> +		if (!max_block) {
> +			max_block = block;
> +			continue;
> +		}
> +
> +		if (drm_buddy_block_offset(block) >
> +		    drm_buddy_block_offset(max_block)) {
> +			max_block = block;
>   		}
>   	}
>   
> @@ -440,11 +537,29 @@ alloc_from_freelist(struct drm_buddy *mm,
>   	int err;
>   
>   	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
> -		block = get_maxblock(mm, order);
> +		block = get_maxblock(mm, order, flags);
>   		if (block)
>   			/* Store the obtained block order */
>   			tmp = drm_buddy_block_order(block);
>   	} else {
> +		for (tmp = order; tmp <= mm->max_order; ++tmp) {
> +			struct drm_buddy_block *tmp_block;
> +
> +			list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
> +				if (block_incompatible(tmp_block, flags))
> +					continue;
> +
> +				block = tmp_block;
> +				break;
> +			}
> +
> +			if (block)
> +				break;
> +		}
> +	}
> +
> +	if (!block) {
> +		/* Fallback method */
>   		for (tmp = order; tmp <= mm->max_order; ++tmp) {
>   			if (!list_empty(&mm->free_list[tmp])) {
>   				block = list_last_entry(&mm->free_list[tmp],
> @@ -454,10 +569,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>   					break;
>   			}
>   		}
> -	}
>   
> -	if (!block)
> -		return ERR_PTR(-ENOSPC);
> +		if (!block)
> +			return ERR_PTR(-ENOSPC);
> +	}
>   
>   	BUG_ON(!drm_buddy_block_is_free(block));
>   
> @@ -524,6 +639,8 @@ static int __alloc_range(struct drm_buddy *mm,
>   			mark_allocated(block);
>   			total_allocated += drm_buddy_block_size(mm, block);
>   			mm->avail -= drm_buddy_block_size(mm, block);
> +			if (drm_buddy_block_is_clear(block))
> +				mm->clear_avail -= drm_buddy_block_size(mm, block);
>   			list_add_tail(&block->link, &allocated);
>   			continue;
>   		}
> @@ -558,7 +675,7 @@ static int __alloc_range(struct drm_buddy *mm,
>   		list_splice_tail(&allocated, blocks);
>   		*total_allocated_on_err = total_allocated;
>   	} else {
> -		drm_buddy_free_list(mm, &allocated);
> +		drm_buddy_free_list_internal(mm, &allocated);
>   	}
>   
>   	return err;
> @@ -624,11 +741,11 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   			list_splice(&blocks_lhs, blocks);
>   			return 0;
>   		} else if (err != -ENOSPC) {
> -			drm_buddy_free_list(mm, blocks);
> +			drm_buddy_free_list_internal(mm, blocks);
>   			return err;
>   		}
>   		/* Free blocks for the next iteration */
> -		drm_buddy_free_list(mm, blocks);
> +		drm_buddy_free_list_internal(mm, blocks);
>   	}
>   
>   	return -ENOSPC;
> @@ -684,6 +801,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   	list_del(&block->link);
>   	mark_free(mm, block);
>   	mm->avail += drm_buddy_block_size(mm, block);
> +	if (drm_buddy_block_is_clear(block))
> +		mm->clear_avail += drm_buddy_block_size(mm, block);
>   
>   	/* Prevent recursively freeing this node */
>   	parent = block->parent;
> @@ -695,6 +814,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   	if (err) {
>   		mark_allocated(block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
> +		if (drm_buddy_block_is_clear(block))
> +			mm->clear_avail -= drm_buddy_block_size(mm, block);
>   		list_add(&block->link, blocks);
>   	}
>   
> @@ -782,7 +903,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   		do {
>   			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>   				/* Allocate traversing within the range */
> -				block = alloc_range_bias(mm, start, end, order);
> +				block = __drm_buddy_alloc_range_bias(mm, start, end,
> +								     order, flags);
>   			else
>   				/* Allocate from freelist */
>   				block = alloc_from_freelist(mm, order, flags);
> @@ -808,6 +930,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   
>   		mark_allocated(block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
> +		if (drm_buddy_block_is_clear(block))
> +			mm->clear_avail -= drm_buddy_block_size(mm, block);
>   		kmemleak_update_trace(block);
>   		list_add_tail(&block->link, &allocated);
>   
> @@ -846,7 +970,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   	return 0;
>   
>   err_free:
> -	drm_buddy_free_list(mm, &allocated);
> +	drm_buddy_free_list_internal(mm, &allocated);
>   	return err;
>   }
>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
> @@ -879,8 +1003,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>   {
>   	int order;
>   
> -	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n",
> -		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
> +	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
> +		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
>   
>   	for (order = mm->max_order; order >= 0; order--) {
>   		struct drm_buddy_block *block;
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 0d735d5c2b35..942345548bc3 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	return 0;
>   
>   err_free_blocks:
> -	drm_buddy_free_list(mm, &bman_res->blocks);
> +	drm_buddy_free_list(mm, &bman_res->blocks, 0);
>   	mutex_unlock(&bman->lock);
>   err_free_res:
>   	ttm_resource_fini(man, &bman_res->base);
> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>   
>   	mutex_lock(&bman->lock);
> -	drm_buddy_free_list(&bman->mm, &bman_res->blocks);
> +	drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>   	bman->visible_avail += bman_res->used_visible_size;
>   	mutex_unlock(&bman->lock);
>   
> @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
>   	ttm_set_driver_manager(bdev, type, NULL);
>   
>   	mutex_lock(&bman->lock);
> -	drm_buddy_free_list(mm, &bman->reserved);
> +	drm_buddy_free_list(mm, &bman->reserved, 0);
>   	drm_buddy_fini(mm);
>   	bman->visible_avail += bman->visible_reserved;
>   	WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
> index ea2af6bd9abe..e0860fce9ebd 100644
> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
> @@ -83,7 +83,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
>   							  top, max_order);
>   	}
>   
> -	drm_buddy_free_list(&mm, &holes);
> +	drm_buddy_free_list(&mm, &holes, 0);
>   
>   	/* Nothing larger than blocks of chunk_size now available */
>   	for (order = 1; order <= max_order; order++) {
> @@ -95,7 +95,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
>   	}
>   
>   	list_splice_tail(&holes, &blocks);
> -	drm_buddy_free_list(&mm, &blocks);
> +	drm_buddy_free_list(&mm, &blocks, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> @@ -190,7 +190,7 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
>   
>   	list_del(&block->link);
>   	drm_buddy_free_block(&mm, block);
> -	drm_buddy_free_list(&mm, &blocks);
> +	drm_buddy_free_list(&mm, &blocks, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> @@ -236,7 +236,7 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
>   							   size, size, &tmp, flags),
>   						  "buddy_alloc unexpectedly succeeded, it should be full!");
>   
> -	drm_buddy_free_list(&mm, &blocks);
> +	drm_buddy_free_list(&mm, &blocks, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
>   						drm_buddy_block_size(&mm, block),
>   						BIT_ULL(mm.max_order) * PAGE_SIZE);
>   
> -	drm_buddy_free_list(&mm, &allocated);
> +	drm_buddy_free_list(&mm, &allocated, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> index 115ec745e502..1ad678b62c4a 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> @@ -196,7 +196,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
>   	return 0;
>   
>   error_free_blocks:
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mutex_unlock(&mgr->lock);
>   error_fini:
>   	ttm_resource_fini(man, &vres->base);
> @@ -214,7 +214,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
>   	struct drm_buddy *mm = &mgr->mm;
>   
>   	mutex_lock(&mgr->lock);
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mgr->visible_avail += vres->used_visible_size;
>   	mutex_unlock(&mgr->lock);
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index a5b39fc01003..d81c596dfa38 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -6,6 +6,7 @@
>   #ifndef __DRM_BUDDY_H__
>   #define __DRM_BUDDY_H__
>   
> +#include <linux/bitfield.h>
>   #include <linux/bitops.h>
>   #include <linux/list.h>
>   #include <linux/slab.h>
> @@ -25,15 +26,19 @@
>   #define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
>   #define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
> +#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
> +#define DRM_BUDDY_CLEARED			BIT(4)
>   
>   struct drm_buddy_block {
>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)

I guess better to keep this sorted...

> +
>   #define   DRM_BUDDY_ALLOCATED	   (1 << 10)
>   #define   DRM_BUDDY_FREE	   (2 << 10)
>   #define   DRM_BUDDY_SPLIT	   (3 << 10)

...so maybe move here?

>   /* Free to be used, if needed in the future */
> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>   	u64 header;
>   
> @@ -86,6 +91,7 @@ struct drm_buddy {
>   	u64 chunk_size;
>   	u64 size;
>   	u64 avail;
> +	u64 clear_avail;
>   };
>   
>   static inline u64
> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct drm_buddy_block *block)
>   	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>   }
>   
> +static inline bool
> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
> +{
> +	return block->header & DRM_BUDDY_HEADER_CLEAR;
> +}
> +
>   static inline bool
>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>   {
> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   
>   void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
>   
> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
> +void drm_buddy_free_list(struct drm_buddy *mm,
> +			 struct list_head *objects,
> +			 unsigned int flags);
>   
>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>   void drm_buddy_block_print(struct drm_buddy *mm,

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 3/3] drm/buddy: Add defragmentation support
  2024-02-08 15:50 ` [PATCH v6 3/3] drm/buddy: Add defragmentation support Arunpravin Paneer Selvam
@ 2024-02-16 12:23   ` Matthew Auld
  2024-02-16 12:33     ` Christian König
  0 siblings, 1 reply; 14+ messages in thread
From: Matthew Auld @ 2024-02-16 12:23 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, dri-devel, amd-gfx, intel-gfx
  Cc: christian.koenig, alexander.deucher, felix.kuehling, mario.limonciello

On 08/02/2024 15:50, Arunpravin Paneer Selvam wrote:
> Add a function to support defragmentation.
> 
> v1: Defragment the memory beginning from min_order
>      till the required memory space is available.
> 
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Suggested-by: Matthew Auld <matthew.auld@intel.com>
> ---
>   drivers/gpu/drm/drm_buddy.c | 67 +++++++++++++++++++++++++++++++------
>   include/drm/drm_buddy.h     |  3 ++

No users?

>   2 files changed, 59 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 33ad0cfbd54c..fac423d2cb73 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
>   }
>   EXPORT_SYMBOL(drm_get_buddy);
>   
> -static void __drm_buddy_free(struct drm_buddy *mm,
> -			     struct drm_buddy_block *block)
> +static unsigned int __drm_buddy_free(struct drm_buddy *mm,
> +				     struct drm_buddy_block *block,
> +				     bool defrag)
>   {
>   	struct drm_buddy_block *parent;
> +	unsigned int order;
>   
>   	while ((parent = block->parent)) {
>   		struct drm_buddy_block *buddy;
> @@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>   		if (!drm_buddy_block_is_free(buddy))
>   			break;
>   
> -		if (drm_buddy_block_is_clear(block) !=
> -		    drm_buddy_block_is_clear(buddy))
> -			break;
> +		if (!defrag) {
> +			if (drm_buddy_block_is_clear(block) !=
> +			    drm_buddy_block_is_clear(buddy))
> +				break;
>   
> -		if (drm_buddy_block_is_clear(block))
> -			mark_cleared(parent);
> +			if (drm_buddy_block_is_clear(block))
> +				mark_cleared(parent);
> +		}

Maybe check if the two blocks are incompatible and chuck a warn if they 
are not? Main thing is not to hide issues with split blocks that should 
have been merged before.

>   
>   		list_del(&buddy->link);
>   
> @@ -304,8 +308,49 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>   		block = parent;
>   	}
>   
> +	order = drm_buddy_block_order(block);
>   	mark_free(mm, block);
> +
> +	return order;
> +}
> +
> +/**
> + * drm_buddy_defrag - Defragmentation routine
> + *
> + * @mm: DRM buddy manager
> + * @min_order: minimum order in the freelist to begin
> + * the defragmentation process
> + *
> + * Driver calls the defragmentation function when the
> + * requested memory allocation returns -ENOSPC.
> + */
> +void drm_buddy_defrag(struct drm_buddy *mm,
> +		      unsigned int min_order)

Just wondering if we need "full defag" also? We would probably need to 
call this at fini() anyway.

> +{
> +	struct drm_buddy_block *block;
> +	struct list_head *list;
> +	unsigned int order;
> +	int i;
> +
> +	if (min_order > mm->max_order)
> +		return;
> +
> +	for (i = min_order - 1; i >= 0; i--) {

Need to be careful with min_order = 0 ?

> +		list = &mm->free_list[i];
> +		if (list_empty(list))
> +			continue;
> +
> +		list_for_each_entry_reverse(block, list, link) {

Don't we need the safe_reverse() variant here, since this is removing 
from the list?

> +			if (!block->parent)
> +				continue;
> +
> +			order = __drm_buddy_free(mm, block, 1);
> +			if (order >= min_order)
> +				return;
> +		}
> +	}
>   }
> +EXPORT_SYMBOL(drm_buddy_defrag);
>   
>   /**
>    * drm_buddy_free_block - free a block
> @@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>   	if (drm_buddy_block_is_clear(block))
>   		mm->clear_avail += drm_buddy_block_size(mm, block);
>   
> -	__drm_buddy_free(mm, block);
> +	__drm_buddy_free(mm, block, 0);
>   }
>   EXPORT_SYMBOL(drm_buddy_free_block);
>   
> @@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
>   	if (buddy &&
>   	    (drm_buddy_block_is_free(block) &&
>   	     drm_buddy_block_is_free(buddy)))
> -		__drm_buddy_free(mm, block);
> +		__drm_buddy_free(mm, block, 0);
>   	return ERR_PTR(err);
>   }
>   
> @@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>   
>   err_undo:
>   	if (tmp != order)
> -		__drm_buddy_free(mm, block);
> +		__drm_buddy_free(mm, block, 0);
>   	return ERR_PTR(err);
>   }
>   
> @@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
>   	if (buddy &&
>   	    (drm_buddy_block_is_free(block) &&
>   	     drm_buddy_block_is_free(buddy)))
> -		__drm_buddy_free(mm, block);
> +		__drm_buddy_free(mm, block, 0);
>   
>   err_free:
>   	if (err == -ENOSPC && total_allocated_on_err) {
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index d81c596dfa38..d0f63e7b5915 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -166,6 +166,9 @@ void drm_buddy_free_list(struct drm_buddy *mm,
>   			 struct list_head *objects,
>   			 unsigned int flags);
>   
> +void drm_buddy_defrag(struct drm_buddy *mm,
> +		      unsigned int min_order);
> +
>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>   void drm_buddy_block_print(struct drm_buddy *mm,
>   			   struct drm_buddy_block *block,

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 3/3] drm/buddy: Add defragmentation support
  2024-02-16 12:23   ` Matthew Auld
@ 2024-02-16 12:33     ` Christian König
  2024-02-16 13:21       ` Matthew Auld
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2024-02-16 12:33 UTC (permalink / raw)
  To: Matthew Auld, Arunpravin Paneer Selvam, dri-devel, amd-gfx, intel-gfx
  Cc: alexander.deucher, felix.kuehling, mario.limonciello

Am 16.02.24 um 13:23 schrieb Matthew Auld:
> On 08/02/2024 15:50, Arunpravin Paneer Selvam wrote:
>> Add a function to support defragmentation.
>>
>> v1: Defragment the memory beginning from min_order
>>      till the required memory space is available.
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> Suggested-by: Matthew Auld <matthew.auld@intel.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c | 67 +++++++++++++++++++++++++++++++------
>>   include/drm/drm_buddy.h     |  3 ++
>
> No users?

Other question is how can a buddy allocator fragment in the first place?

Christian.

>
>>   2 files changed, 59 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index 33ad0cfbd54c..fac423d2cb73 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
>>   }
>>   EXPORT_SYMBOL(drm_get_buddy);
>>   -static void __drm_buddy_free(struct drm_buddy *mm,
>> -                 struct drm_buddy_block *block)
>> +static unsigned int __drm_buddy_free(struct drm_buddy *mm,
>> +                     struct drm_buddy_block *block,
>> +                     bool defrag)
>>   {
>>       struct drm_buddy_block *parent;
>> +    unsigned int order;
>>         while ((parent = block->parent)) {
>>           struct drm_buddy_block *buddy;
>> @@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>>           if (!drm_buddy_block_is_free(buddy))
>>               break;
>>   -        if (drm_buddy_block_is_clear(block) !=
>> -            drm_buddy_block_is_clear(buddy))
>> -            break;
>> +        if (!defrag) {
>> +            if (drm_buddy_block_is_clear(block) !=
>> +                drm_buddy_block_is_clear(buddy))
>> +                break;
>>   -        if (drm_buddy_block_is_clear(block))
>> -            mark_cleared(parent);
>> +            if (drm_buddy_block_is_clear(block))
>> +                mark_cleared(parent);
>> +        }
>
> Maybe check if the two blocks are incompatible and chuck a warn if 
> they are not? Main thing is not to hide issues with split blocks that 
> should have been merged before.
>
>>             list_del(&buddy->link);
>>   @@ -304,8 +308,49 @@ static void __drm_buddy_free(struct drm_buddy 
>> *mm,
>>           block = parent;
>>       }
>>   +    order = drm_buddy_block_order(block);
>>       mark_free(mm, block);
>> +
>> +    return order;
>> +}
>> +
>> +/**
>> + * drm_buddy_defrag - Defragmentation routine
>> + *
>> + * @mm: DRM buddy manager
>> + * @min_order: minimum order in the freelist to begin
>> + * the defragmentation process
>> + *
>> + * Driver calls the defragmentation function when the
>> + * requested memory allocation returns -ENOSPC.
>> + */
>> +void drm_buddy_defrag(struct drm_buddy *mm,
>> +              unsigned int min_order)
>
> Just wondering if we need "full defag" also? We would probably need to 
> call this at fini() anyway.
>
>> +{
>> +    struct drm_buddy_block *block;
>> +    struct list_head *list;
>> +    unsigned int order;
>> +    int i;
>> +
>> +    if (min_order > mm->max_order)
>> +        return;
>> +
>> +    for (i = min_order - 1; i >= 0; i--) {
>
> Need to be careful with min_order = 0 ?
>
>> +        list = &mm->free_list[i];
>> +        if (list_empty(list))
>> +            continue;
>> +
>> +        list_for_each_entry_reverse(block, list, link) {
>
> Don't we need the safe_reverse() variant here, since this is removing 
> from the list?
>
>> +            if (!block->parent)
>> +                continue;
>> +
>> +            order = __drm_buddy_free(mm, block, 1);
>> +            if (order >= min_order)
>> +                return;
>> +        }
>> +    }
>>   }
>> +EXPORT_SYMBOL(drm_buddy_defrag);
>>     /**
>>    * drm_buddy_free_block - free a block
>> @@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>       if (drm_buddy_block_is_clear(block))
>>           mm->clear_avail += drm_buddy_block_size(mm, block);
>>   -    __drm_buddy_free(mm, block);
>> +    __drm_buddy_free(mm, block, 0);
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>   @@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
>>       if (buddy &&
>>           (drm_buddy_block_is_free(block) &&
>>            drm_buddy_block_is_free(buddy)))
>> -        __drm_buddy_free(mm, block);
>> +        __drm_buddy_free(mm, block, 0);
>>       return ERR_PTR(err);
>>   }
>>   @@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>>     err_undo:
>>       if (tmp != order)
>> -        __drm_buddy_free(mm, block);
>> +        __drm_buddy_free(mm, block, 0);
>>       return ERR_PTR(err);
>>   }
>>   @@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>       if (buddy &&
>>           (drm_buddy_block_is_free(block) &&
>>            drm_buddy_block_is_free(buddy)))
>> -        __drm_buddy_free(mm, block);
>> +        __drm_buddy_free(mm, block, 0);
>>     err_free:
>>       if (err == -ENOSPC && total_allocated_on_err) {
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index d81c596dfa38..d0f63e7b5915 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -166,6 +166,9 @@ void drm_buddy_free_list(struct drm_buddy *mm,
>>                struct list_head *objects,
>>                unsigned int flags);
>>   +void drm_buddy_defrag(struct drm_buddy *mm,
>> +              unsigned int min_order);
>> +
>>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>   void drm_buddy_block_print(struct drm_buddy *mm,
>>                  struct drm_buddy_block *block,


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 3/3] drm/buddy: Add defragmentation support
  2024-02-16 12:33     ` Christian König
@ 2024-02-16 13:21       ` Matthew Auld
  2024-02-16 14:02         ` Christian König
  0 siblings, 1 reply; 14+ messages in thread
From: Matthew Auld @ 2024-02-16 13:21 UTC (permalink / raw)
  To: Christian König, Arunpravin Paneer Selvam, dri-devel,
	amd-gfx, intel-gfx
  Cc: alexander.deucher, felix.kuehling, mario.limonciello

On 16/02/2024 12:33, Christian König wrote:
> Am 16.02.24 um 13:23 schrieb Matthew Auld:
>> On 08/02/2024 15:50, Arunpravin Paneer Selvam wrote:
>>> Add a function to support defragmentation.
>>>
>>> v1: Defragment the memory beginning from min_order
>>>      till the required memory space is available.
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam 
>>> <Arunpravin.PaneerSelvam@amd.com>
>>> Suggested-by: Matthew Auld <matthew.auld@intel.com>
>>> ---
>>>   drivers/gpu/drm/drm_buddy.c | 67 +++++++++++++++++++++++++++++++------
>>>   include/drm/drm_buddy.h     |  3 ++
>>
>> No users?
> 
> Other question is how can a buddy allocator fragment in the first place?

The fragmentation is due to pages now being tracked as dirty/clear. 
Should the allocator merge together a page that is dirty with a page 
that is cleared? When should it do that? User wants to be able to keep 
the two separate if possible. For example, freeing one single dirty page 
can dirty a huge swathe of your already cleared pages if they are merged 
together. Or do you have some some other ideas here?

> 
> Christian.
> 
>>
>>>   2 files changed, 59 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>> index 33ad0cfbd54c..fac423d2cb73 100644
>>> --- a/drivers/gpu/drm/drm_buddy.c
>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>> @@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
>>>   }
>>>   EXPORT_SYMBOL(drm_get_buddy);
>>>   -static void __drm_buddy_free(struct drm_buddy *mm,
>>> -                 struct drm_buddy_block *block)
>>> +static unsigned int __drm_buddy_free(struct drm_buddy *mm,
>>> +                     struct drm_buddy_block *block,
>>> +                     bool defrag)
>>>   {
>>>       struct drm_buddy_block *parent;
>>> +    unsigned int order;
>>>         while ((parent = block->parent)) {
>>>           struct drm_buddy_block *buddy;
>>> @@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>>>           if (!drm_buddy_block_is_free(buddy))
>>>               break;
>>>   -        if (drm_buddy_block_is_clear(block) !=
>>> -            drm_buddy_block_is_clear(buddy))
>>> -            break;
>>> +        if (!defrag) {
>>> +            if (drm_buddy_block_is_clear(block) !=
>>> +                drm_buddy_block_is_clear(buddy))
>>> +                break;
>>>   -        if (drm_buddy_block_is_clear(block))
>>> -            mark_cleared(parent);
>>> +            if (drm_buddy_block_is_clear(block))
>>> +                mark_cleared(parent);
>>> +        }
>>
>> Maybe check if the two blocks are incompatible and chuck a warn if 
>> they are not? Main thing is not to hide issues with split blocks that 
>> should have been merged before.
>>
>>>             list_del(&buddy->link);
>>>   @@ -304,8 +308,49 @@ static void __drm_buddy_free(struct drm_buddy 
>>> *mm,
>>>           block = parent;
>>>       }
>>>   +    order = drm_buddy_block_order(block);
>>>       mark_free(mm, block);
>>> +
>>> +    return order;
>>> +}
>>> +
>>> +/**
>>> + * drm_buddy_defrag - Defragmentation routine
>>> + *
>>> + * @mm: DRM buddy manager
>>> + * @min_order: minimum order in the freelist to begin
>>> + * the defragmentation process
>>> + *
>>> + * Driver calls the defragmentation function when the
>>> + * requested memory allocation returns -ENOSPC.
>>> + */
>>> +void drm_buddy_defrag(struct drm_buddy *mm,
>>> +              unsigned int min_order)
>>
>> Just wondering if we need "full defag" also? We would probably need to 
>> call this at fini() anyway.
>>
>>> +{
>>> +    struct drm_buddy_block *block;
>>> +    struct list_head *list;
>>> +    unsigned int order;
>>> +    int i;
>>> +
>>> +    if (min_order > mm->max_order)
>>> +        return;
>>> +
>>> +    for (i = min_order - 1; i >= 0; i--) {
>>
>> Need to be careful with min_order = 0 ?
>>
>>> +        list = &mm->free_list[i];
>>> +        if (list_empty(list))
>>> +            continue;
>>> +
>>> +        list_for_each_entry_reverse(block, list, link) {
>>
>> Don't we need the safe_reverse() variant here, since this is removing 
>> from the list?
>>
>>> +            if (!block->parent)
>>> +                continue;
>>> +
>>> +            order = __drm_buddy_free(mm, block, 1);
>>> +            if (order >= min_order)
>>> +                return;
>>> +        }
>>> +    }
>>>   }
>>> +EXPORT_SYMBOL(drm_buddy_defrag);
>>>     /**
>>>    * drm_buddy_free_block - free a block
>>> @@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>>       if (drm_buddy_block_is_clear(block))
>>>           mm->clear_avail += drm_buddy_block_size(mm, block);
>>>   -    __drm_buddy_free(mm, block);
>>> +    __drm_buddy_free(mm, block, 0);
>>>   }
>>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>>   @@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
>>>       if (buddy &&
>>>           (drm_buddy_block_is_free(block) &&
>>>            drm_buddy_block_is_free(buddy)))
>>> -        __drm_buddy_free(mm, block);
>>> +        __drm_buddy_free(mm, block, 0);
>>>       return ERR_PTR(err);
>>>   }
>>>   @@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>     err_undo:
>>>       if (tmp != order)
>>> -        __drm_buddy_free(mm, block);
>>> +        __drm_buddy_free(mm, block, 0);
>>>       return ERR_PTR(err);
>>>   }
>>>   @@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>>       if (buddy &&
>>>           (drm_buddy_block_is_free(block) &&
>>>            drm_buddy_block_is_free(buddy)))
>>> -        __drm_buddy_free(mm, block);
>>> +        __drm_buddy_free(mm, block, 0);
>>>     err_free:
>>>       if (err == -ENOSPC && total_allocated_on_err) {
>>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>> index d81c596dfa38..d0f63e7b5915 100644
>>> --- a/include/drm/drm_buddy.h
>>> +++ b/include/drm/drm_buddy.h
>>> @@ -166,6 +166,9 @@ void drm_buddy_free_list(struct drm_buddy *mm,
>>>                struct list_head *objects,
>>>                unsigned int flags);
>>>   +void drm_buddy_defrag(struct drm_buddy *mm,
>>> +              unsigned int min_order);
>>> +
>>>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>>   void drm_buddy_block_print(struct drm_buddy *mm,
>>>                  struct drm_buddy_block *block,
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 3/3] drm/buddy: Add defragmentation support
  2024-02-16 13:21       ` Matthew Auld
@ 2024-02-16 14:02         ` Christian König
  2024-02-16 14:47           ` Matthew Auld
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2024-02-16 14:02 UTC (permalink / raw)
  To: Matthew Auld, Christian König, Arunpravin Paneer Selvam,
	dri-devel, amd-gfx, intel-gfx
  Cc: alexander.deucher, felix.kuehling, mario.limonciello

Am 16.02.24 um 14:21 schrieb Matthew Auld:
> On 16/02/2024 12:33, Christian König wrote:
>> Am 16.02.24 um 13:23 schrieb Matthew Auld:
>>> On 08/02/2024 15:50, Arunpravin Paneer Selvam wrote:
>>>> Add a function to support defragmentation.
>>>>
>>>> v1: Defragment the memory beginning from min_order
>>>>      till the required memory space is available.
>>>>
>>>> Signed-off-by: Arunpravin Paneer Selvam 
>>>> <Arunpravin.PaneerSelvam@amd.com>
>>>> Suggested-by: Matthew Auld <matthew.auld@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/drm_buddy.c | 67 
>>>> +++++++++++++++++++++++++++++++------
>>>>   include/drm/drm_buddy.h     |  3 ++
>>>
>>> No users?
>>
>> Other question is how can a buddy allocator fragment in the first place?
>
> The fragmentation is due to pages now being tracked as dirty/clear. 
> Should the allocator merge together a page that is dirty with a page 
> that is cleared? When should it do that? User wants to be able to keep 
> the two separate if possible. For example, freeing one single dirty 
> page can dirty a huge swathe of your already cleared pages if they are 
> merged together. Or do you have some some other ideas here?

Sorry, that was not what I meant. I should probably have been clearer.

That dirty and clean pages are now kept separated is obvious, but why do 
you need to de-fragment them at some point?

Christian.

>
>>
>> Christian.
>>
>>>
>>>>   2 files changed, 59 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>>> index 33ad0cfbd54c..fac423d2cb73 100644
>>>> --- a/drivers/gpu/drm/drm_buddy.c
>>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>>> @@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
>>>>   }
>>>>   EXPORT_SYMBOL(drm_get_buddy);
>>>>   -static void __drm_buddy_free(struct drm_buddy *mm,
>>>> -                 struct drm_buddy_block *block)
>>>> +static unsigned int __drm_buddy_free(struct drm_buddy *mm,
>>>> +                     struct drm_buddy_block *block,
>>>> +                     bool defrag)
>>>>   {
>>>>       struct drm_buddy_block *parent;
>>>> +    unsigned int order;
>>>>         while ((parent = block->parent)) {
>>>>           struct drm_buddy_block *buddy;
>>>> @@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy 
>>>> *mm,
>>>>           if (!drm_buddy_block_is_free(buddy))
>>>>               break;
>>>>   -        if (drm_buddy_block_is_clear(block) !=
>>>> -            drm_buddy_block_is_clear(buddy))
>>>> -            break;
>>>> +        if (!defrag) {
>>>> +            if (drm_buddy_block_is_clear(block) !=
>>>> +                drm_buddy_block_is_clear(buddy))
>>>> +                break;
>>>>   -        if (drm_buddy_block_is_clear(block))
>>>> -            mark_cleared(parent);
>>>> +            if (drm_buddy_block_is_clear(block))
>>>> +                mark_cleared(parent);
>>>> +        }
>>>
>>> Maybe check if the two blocks are incompatible and chuck a warn if 
>>> they are not? Main thing is not to hide issues with split blocks 
>>> that should have been merged before.
>>>
>>>> list_del(&buddy->link);
>>>>   @@ -304,8 +308,49 @@ static void __drm_buddy_free(struct 
>>>> drm_buddy *mm,
>>>>           block = parent;
>>>>       }
>>>>   +    order = drm_buddy_block_order(block);
>>>>       mark_free(mm, block);
>>>> +
>>>> +    return order;
>>>> +}
>>>> +
>>>> +/**
>>>> + * drm_buddy_defrag - Defragmentation routine
>>>> + *
>>>> + * @mm: DRM buddy manager
>>>> + * @min_order: minimum order in the freelist to begin
>>>> + * the defragmentation process
>>>> + *
>>>> + * Driver calls the defragmentation function when the
>>>> + * requested memory allocation returns -ENOSPC.
>>>> + */
>>>> +void drm_buddy_defrag(struct drm_buddy *mm,
>>>> +              unsigned int min_order)
>>>
>>> Just wondering if we need "full defag" also? We would probably need 
>>> to call this at fini() anyway.
>>>
>>>> +{
>>>> +    struct drm_buddy_block *block;
>>>> +    struct list_head *list;
>>>> +    unsigned int order;
>>>> +    int i;
>>>> +
>>>> +    if (min_order > mm->max_order)
>>>> +        return;
>>>> +
>>>> +    for (i = min_order - 1; i >= 0; i--) {
>>>
>>> Need to be careful with min_order = 0 ?
>>>
>>>> +        list = &mm->free_list[i];
>>>> +        if (list_empty(list))
>>>> +            continue;
>>>> +
>>>> +        list_for_each_entry_reverse(block, list, link) {
>>>
>>> Don't we need the safe_reverse() variant here, since this is 
>>> removing from the list?
>>>
>>>> +            if (!block->parent)
>>>> +                continue;
>>>> +
>>>> +            order = __drm_buddy_free(mm, block, 1);
>>>> +            if (order >= min_order)
>>>> +                return;
>>>> +        }
>>>> +    }
>>>>   }
>>>> +EXPORT_SYMBOL(drm_buddy_defrag);
>>>>     /**
>>>>    * drm_buddy_free_block - free a block
>>>> @@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>>>       if (drm_buddy_block_is_clear(block))
>>>>           mm->clear_avail += drm_buddy_block_size(mm, block);
>>>>   -    __drm_buddy_free(mm, block);
>>>> +    __drm_buddy_free(mm, block, 0);
>>>>   }
>>>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>>>   @@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
>>>>       if (buddy &&
>>>>           (drm_buddy_block_is_free(block) &&
>>>>            drm_buddy_block_is_free(buddy)))
>>>> -        __drm_buddy_free(mm, block);
>>>> +        __drm_buddy_free(mm, block, 0);
>>>>       return ERR_PTR(err);
>>>>   }
>>>>   @@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>>     err_undo:
>>>>       if (tmp != order)
>>>> -        __drm_buddy_free(mm, block);
>>>> +        __drm_buddy_free(mm, block, 0);
>>>>       return ERR_PTR(err);
>>>>   }
>>>>   @@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>>>       if (buddy &&
>>>>           (drm_buddy_block_is_free(block) &&
>>>>            drm_buddy_block_is_free(buddy)))
>>>> -        __drm_buddy_free(mm, block);
>>>> +        __drm_buddy_free(mm, block, 0);
>>>>     err_free:
>>>>       if (err == -ENOSPC && total_allocated_on_err) {
>>>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>>> index d81c596dfa38..d0f63e7b5915 100644
>>>> --- a/include/drm/drm_buddy.h
>>>> +++ b/include/drm/drm_buddy.h
>>>> @@ -166,6 +166,9 @@ void drm_buddy_free_list(struct drm_buddy *mm,
>>>>                struct list_head *objects,
>>>>                unsigned int flags);
>>>>   +void drm_buddy_defrag(struct drm_buddy *mm,
>>>> +              unsigned int min_order);
>>>> +
>>>>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>>>   void drm_buddy_block_print(struct drm_buddy *mm,
>>>>                  struct drm_buddy_block *block,
>>


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 3/3] drm/buddy: Add defragmentation support
  2024-02-16 14:02         ` Christian König
@ 2024-02-16 14:47           ` Matthew Auld
  2024-02-16 15:08             ` Christian König
  0 siblings, 1 reply; 14+ messages in thread
From: Matthew Auld @ 2024-02-16 14:47 UTC (permalink / raw)
  To: Christian König, Christian König,
	Arunpravin Paneer Selvam, dri-devel, amd-gfx, intel-gfx
  Cc: alexander.deucher, felix.kuehling, mario.limonciello

On 16/02/2024 14:02, Christian König wrote:
> Am 16.02.24 um 14:21 schrieb Matthew Auld:
>> On 16/02/2024 12:33, Christian König wrote:
>>> Am 16.02.24 um 13:23 schrieb Matthew Auld:
>>>> On 08/02/2024 15:50, Arunpravin Paneer Selvam wrote:
>>>>> Add a function to support defragmentation.
>>>>>
>>>>> v1: Defragment the memory beginning from min_order
>>>>>      till the required memory space is available.
>>>>>
>>>>> Signed-off-by: Arunpravin Paneer Selvam 
>>>>> <Arunpravin.PaneerSelvam@amd.com>
>>>>> Suggested-by: Matthew Auld <matthew.auld@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/drm_buddy.c | 67 
>>>>> +++++++++++++++++++++++++++++++------
>>>>>   include/drm/drm_buddy.h     |  3 ++
>>>>
>>>> No users?
>>>
>>> Other question is how can a buddy allocator fragment in the first place?
>>
>> The fragmentation is due to pages now being tracked as dirty/clear. 
>> Should the allocator merge together a page that is dirty with a page 
>> that is cleared? When should it do that? User wants to be able to keep 
>> the two separate if possible. For example, freeing one single dirty 
>> page can dirty a huge swathe of your already cleared pages if they are 
>> merged together. Or do you have some some other ideas here?
> 
> Sorry, that was not what I meant. I should probably have been clearer.
> 
> That dirty and clean pages are now kept separated is obvious, but why do 
> you need to de-fragment them at some point?

Ah, right. At the very least we need to do something similar to this at 
fini(), just to ensure we properly merge everything back together so we 
can correctly tear down the mm. Outside of that the thinking was that it 
might be useful to call when allocating larger min page-sizes. You might 
now be failing the allocation due to fragmentation, and so in some cases 
might be better off running some kind of defrag step first, instead of 
failing the allocation and trying to evict stuff. Anyway, if that is not 
a concern for amdgpu, then we just need to handle the fini() case and 
can keep this internal.

> 
> Christian.
> 
>>
>>>
>>> Christian.
>>>
>>>>
>>>>>   2 files changed, 59 insertions(+), 11 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>>>> index 33ad0cfbd54c..fac423d2cb73 100644
>>>>> --- a/drivers/gpu/drm/drm_buddy.c
>>>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>>>> @@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
>>>>>   }
>>>>>   EXPORT_SYMBOL(drm_get_buddy);
>>>>>   -static void __drm_buddy_free(struct drm_buddy *mm,
>>>>> -                 struct drm_buddy_block *block)
>>>>> +static unsigned int __drm_buddy_free(struct drm_buddy *mm,
>>>>> +                     struct drm_buddy_block *block,
>>>>> +                     bool defrag)
>>>>>   {
>>>>>       struct drm_buddy_block *parent;
>>>>> +    unsigned int order;
>>>>>         while ((parent = block->parent)) {
>>>>>           struct drm_buddy_block *buddy;
>>>>> @@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy 
>>>>> *mm,
>>>>>           if (!drm_buddy_block_is_free(buddy))
>>>>>               break;
>>>>>   -        if (drm_buddy_block_is_clear(block) !=
>>>>> -            drm_buddy_block_is_clear(buddy))
>>>>> -            break;
>>>>> +        if (!defrag) {
>>>>> +            if (drm_buddy_block_is_clear(block) !=
>>>>> +                drm_buddy_block_is_clear(buddy))
>>>>> +                break;
>>>>>   -        if (drm_buddy_block_is_clear(block))
>>>>> -            mark_cleared(parent);
>>>>> +            if (drm_buddy_block_is_clear(block))
>>>>> +                mark_cleared(parent);
>>>>> +        }
>>>>
>>>> Maybe check if the two blocks are incompatible and chuck a warn if 
>>>> they are not? Main thing is not to hide issues with split blocks 
>>>> that should have been merged before.
>>>>
>>>>> list_del(&buddy->link);
>>>>>   @@ -304,8 +308,49 @@ static void __drm_buddy_free(struct 
>>>>> drm_buddy *mm,
>>>>>           block = parent;
>>>>>       }
>>>>>   +    order = drm_buddy_block_order(block);
>>>>>       mark_free(mm, block);
>>>>> +
>>>>> +    return order;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * drm_buddy_defrag - Defragmentation routine
>>>>> + *
>>>>> + * @mm: DRM buddy manager
>>>>> + * @min_order: minimum order in the freelist to begin
>>>>> + * the defragmentation process
>>>>> + *
>>>>> + * Driver calls the defragmentation function when the
>>>>> + * requested memory allocation returns -ENOSPC.
>>>>> + */
>>>>> +void drm_buddy_defrag(struct drm_buddy *mm,
>>>>> +              unsigned int min_order)
>>>>
>>>> Just wondering if we need "full defag" also? We would probably need 
>>>> to call this at fini() anyway.
>>>>
>>>>> +{
>>>>> +    struct drm_buddy_block *block;
>>>>> +    struct list_head *list;
>>>>> +    unsigned int order;
>>>>> +    int i;
>>>>> +
>>>>> +    if (min_order > mm->max_order)
>>>>> +        return;
>>>>> +
>>>>> +    for (i = min_order - 1; i >= 0; i--) {
>>>>
>>>> Need to be careful with min_order = 0 ?
>>>>
>>>>> +        list = &mm->free_list[i];
>>>>> +        if (list_empty(list))
>>>>> +            continue;
>>>>> +
>>>>> +        list_for_each_entry_reverse(block, list, link) {
>>>>
>>>> Don't we need the safe_reverse() variant here, since this is 
>>>> removing from the list?
>>>>
>>>>> +            if (!block->parent)
>>>>> +                continue;
>>>>> +
>>>>> +            order = __drm_buddy_free(mm, block, 1);
>>>>> +            if (order >= min_order)
>>>>> +                return;
>>>>> +        }
>>>>> +    }
>>>>>   }
>>>>> +EXPORT_SYMBOL(drm_buddy_defrag);
>>>>>     /**
>>>>>    * drm_buddy_free_block - free a block
>>>>> @@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>>>>       if (drm_buddy_block_is_clear(block))
>>>>>           mm->clear_avail += drm_buddy_block_size(mm, block);
>>>>>   -    __drm_buddy_free(mm, block);
>>>>> +    __drm_buddy_free(mm, block, 0);
>>>>>   }
>>>>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>>>>   @@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
>>>>>       if (buddy &&
>>>>>           (drm_buddy_block_is_free(block) &&
>>>>>            drm_buddy_block_is_free(buddy)))
>>>>> -        __drm_buddy_free(mm, block);
>>>>> +        __drm_buddy_free(mm, block, 0);
>>>>>       return ERR_PTR(err);
>>>>>   }
>>>>>   @@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>>>     err_undo:
>>>>>       if (tmp != order)
>>>>> -        __drm_buddy_free(mm, block);
>>>>> +        __drm_buddy_free(mm, block, 0);
>>>>>       return ERR_PTR(err);
>>>>>   }
>>>>>   @@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>>>>       if (buddy &&
>>>>>           (drm_buddy_block_is_free(block) &&
>>>>>            drm_buddy_block_is_free(buddy)))
>>>>> -        __drm_buddy_free(mm, block);
>>>>> +        __drm_buddy_free(mm, block, 0);
>>>>>     err_free:
>>>>>       if (err == -ENOSPC && total_allocated_on_err) {
>>>>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>>>> index d81c596dfa38..d0f63e7b5915 100644
>>>>> --- a/include/drm/drm_buddy.h
>>>>> +++ b/include/drm/drm_buddy.h
>>>>> @@ -166,6 +166,9 @@ void drm_buddy_free_list(struct drm_buddy *mm,
>>>>>                struct list_head *objects,
>>>>>                unsigned int flags);
>>>>>   +void drm_buddy_defrag(struct drm_buddy *mm,
>>>>> +              unsigned int min_order);
>>>>> +
>>>>>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>>>>   void drm_buddy_block_print(struct drm_buddy *mm,
>>>>>                  struct drm_buddy_block *block,
>>>
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 3/3] drm/buddy: Add defragmentation support
  2024-02-16 14:47           ` Matthew Auld
@ 2024-02-16 15:08             ` Christian König
  0 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2024-02-16 15:08 UTC (permalink / raw)
  To: Matthew Auld, Christian König, Arunpravin Paneer Selvam,
	dri-devel, amd-gfx, intel-gfx
  Cc: alexander.deucher, felix.kuehling, mario.limonciello

Am 16.02.24 um 15:47 schrieb Matthew Auld:
> On 16/02/2024 14:02, Christian König wrote:
>> Am 16.02.24 um 14:21 schrieb Matthew Auld:
>>> On 16/02/2024 12:33, Christian König wrote:
>>>> Am 16.02.24 um 13:23 schrieb Matthew Auld:
>>>>> On 08/02/2024 15:50, Arunpravin Paneer Selvam wrote:
>>>>>> Add a function to support defragmentation.
>>>>>>
>>>>>> v1: Defragment the memory beginning from min_order
>>>>>>      till the required memory space is available.
>>>>>>
>>>>>> Signed-off-by: Arunpravin Paneer Selvam 
>>>>>> <Arunpravin.PaneerSelvam@amd.com>
>>>>>> Suggested-by: Matthew Auld <matthew.auld@intel.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/drm_buddy.c | 67 
>>>>>> +++++++++++++++++++++++++++++++------
>>>>>>   include/drm/drm_buddy.h     |  3 ++
>>>>>
>>>>> No users?
>>>>
>>>> Other question is how can a buddy allocator fragment in the first 
>>>> place?
>>>
>>> The fragmentation is due to pages now being tracked as dirty/clear. 
>>> Should the allocator merge together a page that is dirty with a page 
>>> that is cleared? When should it do that? User wants to be able to 
>>> keep the two separate if possible. For example, freeing one single 
>>> dirty page can dirty a huge swathe of your already cleared pages if 
>>> they are merged together. Or do you have some some other ideas here?
>>
>> Sorry, that was not what I meant. I should probably have been clearer.
>>
>> That dirty and clean pages are now kept separated is obvious, but why 
>> do you need to de-fragment them at some point?
>
> Ah, right. At the very least we need to do something similar to this 
> at fini(), just to ensure we properly merge everything back together 
> so we can correctly tear down the mm. Outside of that the thinking was 
> that it might be useful to call when allocating larger min page-sizes. 
> You might now be failing the allocation due to fragmentation, and so 
> in some cases might be better off running some kind of defrag step 
> first, instead of failing the allocation and trying to evict stuff. 
> Anyway, if that is not a concern for amdgpu, then we just need to 
> handle the fini() case and can keep this internal.

Ah, yes that makes more sense.

So you basically force merge the pages before fini to avoid warnings 
that the buddy isn't empty.

Thanks, that answers my curiosity. But I unfortunately still don't have 
time to dig deep enough into this for a review.

Thanks,
Christian.

>
>>
>> Christian.
>>
>>>
>>>>
>>>> Christian.
>>>>
>>>>>
>>>>>>   2 files changed, 59 insertions(+), 11 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/drm_buddy.c 
>>>>>> b/drivers/gpu/drm/drm_buddy.c
>>>>>> index 33ad0cfbd54c..fac423d2cb73 100644
>>>>>> --- a/drivers/gpu/drm/drm_buddy.c
>>>>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>>>>> @@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
>>>>>>   }
>>>>>>   EXPORT_SYMBOL(drm_get_buddy);
>>>>>>   -static void __drm_buddy_free(struct drm_buddy *mm,
>>>>>> -                 struct drm_buddy_block *block)
>>>>>> +static unsigned int __drm_buddy_free(struct drm_buddy *mm,
>>>>>> +                     struct drm_buddy_block *block,
>>>>>> +                     bool defrag)
>>>>>>   {
>>>>>>       struct drm_buddy_block *parent;
>>>>>> +    unsigned int order;
>>>>>>         while ((parent = block->parent)) {
>>>>>>           struct drm_buddy_block *buddy;
>>>>>> @@ -289,12 +291,14 @@ static void __drm_buddy_free(struct 
>>>>>> drm_buddy *mm,
>>>>>>           if (!drm_buddy_block_is_free(buddy))
>>>>>>               break;
>>>>>>   -        if (drm_buddy_block_is_clear(block) !=
>>>>>> -            drm_buddy_block_is_clear(buddy))
>>>>>> -            break;
>>>>>> +        if (!defrag) {
>>>>>> +            if (drm_buddy_block_is_clear(block) !=
>>>>>> +                drm_buddy_block_is_clear(buddy))
>>>>>> +                break;
>>>>>>   -        if (drm_buddy_block_is_clear(block))
>>>>>> -            mark_cleared(parent);
>>>>>> +            if (drm_buddy_block_is_clear(block))
>>>>>> +                mark_cleared(parent);
>>>>>> +        }
>>>>>
>>>>> Maybe check if the two blocks are incompatible and chuck a warn if 
>>>>> they are not? Main thing is not to hide issues with split blocks 
>>>>> that should have been merged before.
>>>>>
>>>>>> list_del(&buddy->link);
>>>>>>   @@ -304,8 +308,49 @@ static void __drm_buddy_free(struct 
>>>>>> drm_buddy *mm,
>>>>>>           block = parent;
>>>>>>       }
>>>>>>   +    order = drm_buddy_block_order(block);
>>>>>>       mark_free(mm, block);
>>>>>> +
>>>>>> +    return order;
>>>>>> +}
>>>>>> +
>>>>>> +/**
>>>>>> + * drm_buddy_defrag - Defragmentation routine
>>>>>> + *
>>>>>> + * @mm: DRM buddy manager
>>>>>> + * @min_order: minimum order in the freelist to begin
>>>>>> + * the defragmentation process
>>>>>> + *
>>>>>> + * Driver calls the defragmentation function when the
>>>>>> + * requested memory allocation returns -ENOSPC.
>>>>>> + */
>>>>>> +void drm_buddy_defrag(struct drm_buddy *mm,
>>>>>> +              unsigned int min_order)
>>>>>
>>>>> Just wondering if we need "full defag" also? We would probably 
>>>>> need to call this at fini() anyway.
>>>>>
>>>>>> +{
>>>>>> +    struct drm_buddy_block *block;
>>>>>> +    struct list_head *list;
>>>>>> +    unsigned int order;
>>>>>> +    int i;
>>>>>> +
>>>>>> +    if (min_order > mm->max_order)
>>>>>> +        return;
>>>>>> +
>>>>>> +    for (i = min_order - 1; i >= 0; i--) {
>>>>>
>>>>> Need to be careful with min_order = 0 ?
>>>>>
>>>>>> +        list = &mm->free_list[i];
>>>>>> +        if (list_empty(list))
>>>>>> +            continue;
>>>>>> +
>>>>>> +        list_for_each_entry_reverse(block, list, link) {
>>>>>
>>>>> Don't we need the safe_reverse() variant here, since this is 
>>>>> removing from the list?
>>>>>
>>>>>> +            if (!block->parent)
>>>>>> +                continue;
>>>>>> +
>>>>>> +            order = __drm_buddy_free(mm, block, 1);
>>>>>> +            if (order >= min_order)
>>>>>> +                return;
>>>>>> +        }
>>>>>> +    }
>>>>>>   }
>>>>>> +EXPORT_SYMBOL(drm_buddy_defrag);
>>>>>>     /**
>>>>>>    * drm_buddy_free_block - free a block
>>>>>> @@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>>>>>       if (drm_buddy_block_is_clear(block))
>>>>>>           mm->clear_avail += drm_buddy_block_size(mm, block);
>>>>>>   -    __drm_buddy_free(mm, block);
>>>>>> +    __drm_buddy_free(mm, block, 0);
>>>>>>   }
>>>>>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>>>>>   @@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
>>>>>>       if (buddy &&
>>>>>>           (drm_buddy_block_is_free(block) &&
>>>>>>            drm_buddy_block_is_free(buddy)))
>>>>>> -        __drm_buddy_free(mm, block);
>>>>>> +        __drm_buddy_free(mm, block, 0);
>>>>>>       return ERR_PTR(err);
>>>>>>   }
>>>>>>   @@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>>>>     err_undo:
>>>>>>       if (tmp != order)
>>>>>> -        __drm_buddy_free(mm, block);
>>>>>> +        __drm_buddy_free(mm, block, 0);
>>>>>>       return ERR_PTR(err);
>>>>>>   }
>>>>>>   @@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>>>>>       if (buddy &&
>>>>>>           (drm_buddy_block_is_free(block) &&
>>>>>>            drm_buddy_block_is_free(buddy)))
>>>>>> -        __drm_buddy_free(mm, block);
>>>>>> +        __drm_buddy_free(mm, block, 0);
>>>>>>     err_free:
>>>>>>       if (err == -ENOSPC && total_allocated_on_err) {
>>>>>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>>>>> index d81c596dfa38..d0f63e7b5915 100644
>>>>>> --- a/include/drm/drm_buddy.h
>>>>>> +++ b/include/drm/drm_buddy.h
>>>>>> @@ -166,6 +166,9 @@ void drm_buddy_free_list(struct drm_buddy *mm,
>>>>>>                struct list_head *objects,
>>>>>>                unsigned int flags);
>>>>>>   +void drm_buddy_defrag(struct drm_buddy *mm,
>>>>>> +              unsigned int min_order);
>>>>>> +
>>>>>>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>>>>>   void drm_buddy_block_print(struct drm_buddy *mm,
>>>>>>                  struct drm_buddy_block *block,
>>>>
>>


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature
  2024-02-16 12:03 ` Matthew Auld
@ 2024-02-21 12:23   ` Paneer Selvam, Arunpravin
  2024-02-21 12:40   ` Paneer Selvam, Arunpravin
  1 sibling, 0 replies; 14+ messages in thread
From: Paneer Selvam, Arunpravin @ 2024-02-21 12:23 UTC (permalink / raw)
  To: Matthew Auld, dri-devel, amd-gfx, intel-gfx
  Cc: christian.koenig, alexander.deucher, felix.kuehling, mario.limonciello


On 2/16/2024 5:33 PM, Matthew Auld wrote:
> On 08/02/2024 15:49, Arunpravin Paneer Selvam wrote:
>> - Add tracking clear page feature.
>>
>> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>>    successfully clears the blocks in the free path. On the otherhand,
>>    DRM buddy marks each block as cleared.
>>
>> - Track the available cleared pages size
>>
>> - If driver requests cleared memory we prefer cleared memory
>>    but fallback to uncleared if we can't find the cleared blocks.
>>    when driver requests uncleared memory we try to use uncleared but
>>    fallback to cleared memory if necessary.
>>
>> - When a block gets freed we clear it and mark the freed block as 
>> cleared,
>>    when there are buddies which are cleared as well we can merge them.
>>    Otherwise, we prefer to keep the blocks as separated.
>>
>> v1: (Christian)
>>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>>      cleared. Else, reset the clear flag for each block in the list.
>>
>>    - For merging the 2 cleared blocks compare as below,
>>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
>>
>> v2: (Matthew)
>>    - Add a wrapper drm_buddy_free_list_internal for the freeing of 
>> blocks
>>      operation within drm buddy.
>>    - Write a macro block_incompatible() to allocate the required blocks.
>>    - Update the xe driver for the drm_buddy_free_list change in 
>> arguments.
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>
> Probably needs a new unit test.
Sure, I am working on it. I will send in a separate patch.
>
> I think we are missing something to forcefully re-merge everything at 
> fini()? In theory we can just call the defrag routine. Otherwise we 
> might trigger various warnings since the root(s) might still be split.

I have added the full defrag in the fini() function. Please review the 
patch number 3.

Thanks,

Arun.

>
> Also one nit below. Otherwise I think looks good.
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>>   drivers/gpu/drm/drm_buddy.c                   | 192 ++++++++++++++----
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>>   drivers/gpu/drm/xe/xe_ttm_vram_mgr.c          |   4 +-
>>   include/drm/drm_buddy.h                       |  18 +-
>>   6 files changed, 187 insertions(+), 49 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index 8db880244324..c0c851409241 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     error_free_blocks:
>> -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>   error_fini:
>>       ttm_resource_fini(man, &vres->base);
>> @@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
>> ttm_resource_manager *man,
>>         amdgpu_vram_mgr_do_reserve(man);
>>   -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>> @@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
>> *adev)
>>           kfree(rsv);
>>         list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, 
>> blocks) {
>> -        drm_buddy_free_list(&mgr->mm, &rsv->allocated);
>> +        drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>>           kfree(rsv);
>>       }
>>       if (!adev->gmc.is_app_apu)
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index f57e6d74fb0e..33ad0cfbd54c 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
>>       __list_add(&block->link, node->link.prev, &node->link);
>>   }
>>   +static void clear_reset(struct drm_buddy_block *block)
>> +{
>> +    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>> +static void mark_cleared(struct drm_buddy_block *block)
>> +{
>> +    block->header |= DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>>   static void mark_allocated(struct drm_buddy_block *block)
>>   {
>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>>       mark_free(mm, block->left);
>>       mark_free(mm, block->right);
>>   +    if (drm_buddy_block_is_clear(block)) {
>> +        mark_cleared(block->left);
>> +        mark_cleared(block->right);
>> +        clear_reset(block);
>> +    }
>> +
>>       mark_split(block);
>>         return 0;
>> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>>           if (!drm_buddy_block_is_free(buddy))
>>               break;
>>   +        if (drm_buddy_block_is_clear(block) !=
>> +            drm_buddy_block_is_clear(buddy))
>> +            break;
>> +
>> +        if (drm_buddy_block_is_clear(block))
>> +            mark_cleared(parent);
>> +
>>           list_del(&buddy->link);
>>             drm_block_free(mm, block);
>> @@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>   {
>>       BUG_ON(!drm_buddy_block_is_allocated(block));
>>       mm->avail += drm_buddy_block_size(mm, block);
>> +    if (drm_buddy_block_is_clear(block))
>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>> +
>>       __drm_buddy_free(mm, block);
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>   -/**
>> - * drm_buddy_free_list - free blocks
>> - *
>> - * @mm: DRM buddy manager
>> - * @objects: input list head to free blocks
>> - */
>> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>> *objects)
>> +static void __drm_buddy_free_list(struct drm_buddy *mm,
>> +                  struct list_head *objects,
>> +                  bool mark_clear,
>> +                  bool mark_dirty)
>>   {
>>       struct drm_buddy_block *block, *on;
>>   +    WARN_ON(mark_dirty && mark_clear);
>> +
>>       list_for_each_entry_safe(block, on, objects, link) {
>> +        if (mark_clear)
>> +            mark_cleared(block);
>> +        else if (mark_dirty)
>> +            clear_reset(block);
>>           drm_buddy_free_block(mm, block);
>>           cond_resched();
>>       }
>>       INIT_LIST_HEAD(objects);
>>   }
>> +
>> +static void drm_buddy_free_list_internal(struct drm_buddy *mm,
>> +                     struct list_head *objects)
>> +{
>> +    /*
>> +     * Don't touch the clear/dirty bit, since allocation is still 
>> internal
>> +     * at this point. For example we might have just failed part of the
>> +     * allocation.
>> +     */
>> +    __drm_buddy_free_list(mm, objects, false, false);
>> +}
>> +
>> +/**
>> + * drm_buddy_free_list - free blocks
>> + *
>> + * @mm: DRM buddy manager
>> + * @objects: input list head to free blocks
>> + * @flags: optional flags like DRM_BUDDY_CLEARED
>> + */
>> +void drm_buddy_free_list(struct drm_buddy *mm,
>> +             struct list_head *objects,
>> +             unsigned int flags)
>> +{
>> +    bool mark_clear = flags & DRM_BUDDY_CLEARED;
>> +
>> +    WARN_ON(flags & ~(DRM_BUDDY_CLEARED));
>> +
>> +    __drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
>> +}
>>   EXPORT_SYMBOL(drm_buddy_free_list);
>>     static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> @@ -327,10 +385,19 @@ static inline bool contains(u64 s1, u64 e1, u64 
>> s2, u64 e2)
>>       return s1 <= s2 && e1 >= e2;
>>   }
>>   +static bool block_incompatible(struct drm_buddy_block *block, 
>> unsigned int flags)
>> +{
>> +    bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
>> +
>> +    return needs_clear != drm_buddy_block_is_clear(block);
>> +}
>> +
>>   static struct drm_buddy_block *
>> -alloc_range_bias(struct drm_buddy *mm,
>> -         u64 start, u64 end,
>> -         unsigned int order)
>> +__alloc_range_bias(struct drm_buddy *mm,
>> +           u64 start, u64 end,
>> +           unsigned int order,
>> +           unsigned long flags,
>> +           bool fallback)
>>   {
>>       struct drm_buddy_block *block;
>>       struct drm_buddy_block *buddy;
>> @@ -369,6 +436,9 @@ alloc_range_bias(struct drm_buddy *mm,
>>             if (contains(start, end, block_start, block_end) &&
>>               order == drm_buddy_block_order(block)) {
>> +            if (!fallback && block_incompatible(block, flags))
>> +                continue;
>> +
>>               /*
>>                * Find the free block within the range.
>>                */
>> @@ -405,25 +475,52 @@ alloc_range_bias(struct drm_buddy *mm,
>>   }
>>     static struct drm_buddy_block *
>> -get_maxblock(struct drm_buddy *mm, unsigned int order)
>> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>> +                 u64 start, u64 end,
>> +                 unsigned int order,
>> +                 unsigned long flags)
>>   {
>> -    struct drm_buddy_block *max_block = NULL, *node;
>> +    struct drm_buddy_block *block;
>> +    bool fallback = 0;
>> +
>> +    block = __alloc_range_bias(mm, start, end, order,
>> +                   flags, fallback);
>> +    if (IS_ERR(block))
>> +        return __alloc_range_bias(mm, start, end, order,
>> +                      flags, !fallback);
>> +
>> +    return block;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +get_maxblock(struct drm_buddy *mm, unsigned int order,
>> +         unsigned long flags)
>> +{
>> +    struct drm_buddy_block *max_block = NULL, *block = NULL;
>>       unsigned int i;
>>         for (i = order; i <= mm->max_order; ++i) {
>> -        if (!list_empty(&mm->free_list[i])) {
>> -            node = list_last_entry(&mm->free_list[i],
>> -                           struct drm_buddy_block,
>> -                           link);
>> -            if (!max_block) {
>> -                max_block = node;
>> +        struct drm_buddy_block *tmp_block;
>> +
>> +        list_for_each_entry_reverse(tmp_block, &mm->free_list[i], 
>> link) {
>> +            if (block_incompatible(tmp_block, flags))
>>                   continue;
>> -            }
>>   -            if (drm_buddy_block_offset(node) >
>> -                drm_buddy_block_offset(max_block)) {
>> -                max_block = node;
>> -            }
>> +            block = tmp_block;
>> +            break;
>> +        }
>> +
>> +        if (!block)
>> +            continue;
>> +
>> +        if (!max_block) {
>> +            max_block = block;
>> +            continue;
>> +        }
>> +
>> +        if (drm_buddy_block_offset(block) >
>> +            drm_buddy_block_offset(max_block)) {
>> +            max_block = block;
>>           }
>>       }
>>   @@ -440,11 +537,29 @@ alloc_from_freelist(struct drm_buddy *mm,
>>       int err;
>>         if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>> -        block = get_maxblock(mm, order);
>> +        block = get_maxblock(mm, order, flags);
>>           if (block)
>>               /* Store the obtained block order */
>>               tmp = drm_buddy_block_order(block);
>>       } else {
>> +        for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> +            struct drm_buddy_block *tmp_block;
>> +
>> +            list_for_each_entry_reverse(tmp_block, 
>> &mm->free_list[tmp], link) {
>> +                if (block_incompatible(tmp_block, flags))
>> +                    continue;
>> +
>> +                block = tmp_block;
>> +                break;
>> +            }
>> +
>> +            if (block)
>> +                break;
>> +        }
>> +    }
>> +
>> +    if (!block) {
>> +        /* Fallback method */
>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>               if (!list_empty(&mm->free_list[tmp])) {
>>                   block = list_last_entry(&mm->free_list[tmp],
>> @@ -454,10 +569,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>>                       break;
>>               }
>>           }
>> -    }
>>   -    if (!block)
>> -        return ERR_PTR(-ENOSPC);
>> +        if (!block)
>> +            return ERR_PTR(-ENOSPC);
>> +    }
>>         BUG_ON(!drm_buddy_block_is_free(block));
>>   @@ -524,6 +639,8 @@ static int __alloc_range(struct drm_buddy *mm,
>>               mark_allocated(block);
>>               total_allocated += drm_buddy_block_size(mm, block);
>>               mm->avail -= drm_buddy_block_size(mm, block);
>> +            if (drm_buddy_block_is_clear(block))
>> +                mm->clear_avail -= drm_buddy_block_size(mm, block);
>>               list_add_tail(&block->link, &allocated);
>>               continue;
>>           }
>> @@ -558,7 +675,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>           list_splice_tail(&allocated, blocks);
>>           *total_allocated_on_err = total_allocated;
>>       } else {
>> -        drm_buddy_free_list(mm, &allocated);
>> +        drm_buddy_free_list_internal(mm, &allocated);
>>       }
>>         return err;
>> @@ -624,11 +741,11 @@ static int __alloc_contig_try_harder(struct 
>> drm_buddy *mm,
>>               list_splice(&blocks_lhs, blocks);
>>               return 0;
>>           } else if (err != -ENOSPC) {
>> -            drm_buddy_free_list(mm, blocks);
>> +            drm_buddy_free_list_internal(mm, blocks);
>>               return err;
>>           }
>>           /* Free blocks for the next iteration */
>> -        drm_buddy_free_list(mm, blocks);
>> +        drm_buddy_free_list_internal(mm, blocks);
>>       }
>>         return -ENOSPC;
>> @@ -684,6 +801,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>       list_del(&block->link);
>>       mark_free(mm, block);
>>       mm->avail += drm_buddy_block_size(mm, block);
>> +    if (drm_buddy_block_is_clear(block))
>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>         /* Prevent recursively freeing this node */
>>       parent = block->parent;
>> @@ -695,6 +814,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>       if (err) {
>>           mark_allocated(block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>> +        if (drm_buddy_block_is_clear(block))
>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>           list_add(&block->link, blocks);
>>       }
>>   @@ -782,7 +903,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>           do {
>>               if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>>                   /* Allocate traversing within the range */
>> -                block = alloc_range_bias(mm, start, end, order);
>> +                block = __drm_buddy_alloc_range_bias(mm, start, end,
>> +                                     order, flags);
>>               else
>>                   /* Allocate from freelist */
>>                   block = alloc_from_freelist(mm, order, flags);
>> @@ -808,6 +930,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>             mark_allocated(block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>> +        if (drm_buddy_block_is_clear(block))
>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>           kmemleak_update_trace(block);
>>           list_add_tail(&block->link, &allocated);
>>   @@ -846,7 +970,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>       return 0;
>>     err_free:
>> -    drm_buddy_free_list(mm, &allocated);
>> +    drm_buddy_free_list_internal(mm, &allocated);
>>       return err;
>>   }
>>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>> @@ -879,8 +1003,8 @@ void drm_buddy_print(struct drm_buddy *mm, 
>> struct drm_printer *p)
>>   {
>>       int order;
>>   -    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>> %lluMiB\n",
>> -           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
>> +    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>> %lluMiB, clear_free: %lluMiB\n",
>> +           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, 
>> mm->clear_avail >> 20);
>>         for (order = mm->max_order; order >= 0; order--) {
>>           struct drm_buddy_block *block;
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
>> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index 0d735d5c2b35..942345548bc3 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     err_free_blocks:
>> -    drm_buddy_free_list(mm, &bman_res->blocks);
>> +    drm_buddy_free_list(mm, &bman_res->blocks, 0);
>>       mutex_unlock(&bman->lock);
>>   err_free_res:
>>       ttm_resource_fini(man, &bman_res->base);
>> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct 
>> ttm_resource_manager *man,
>>       struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>         mutex_lock(&bman->lock);
>> -    drm_buddy_free_list(&bman->mm, &bman_res->blocks);
>> +    drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>>       bman->visible_avail += bman_res->used_visible_size;
>>       mutex_unlock(&bman->lock);
>>   @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device 
>> *bdev, unsigned int type)
>>       ttm_set_driver_manager(bdev, type, NULL);
>>         mutex_lock(&bman->lock);
>> -    drm_buddy_free_list(mm, &bman->reserved);
>> +    drm_buddy_free_list(mm, &bman->reserved, 0);
>>       drm_buddy_fini(mm);
>>       bman->visible_avail += bman->visible_reserved;
>>       WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
>> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
>> b/drivers/gpu/drm/tests/drm_buddy_test.c
>> index ea2af6bd9abe..e0860fce9ebd 100644
>> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
>> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
>> @@ -83,7 +83,7 @@ static void 
>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>                                 top, max_order);
>>       }
>>   -    drm_buddy_free_list(&mm, &holes);
>> +    drm_buddy_free_list(&mm, &holes, 0);
>>         /* Nothing larger than blocks of chunk_size now available */
>>       for (order = 1; order <= max_order; order++) {
>> @@ -95,7 +95,7 @@ static void 
>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>       }
>>         list_splice_tail(&holes, &blocks);
>> -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -190,7 +190,7 @@ static void 
>> drm_test_buddy_alloc_pessimistic(struct kunit *test)
>>         list_del(&block->link);
>>       drm_buddy_free_block(&mm, block);
>> -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -236,7 +236,7 @@ static void 
>> drm_test_buddy_alloc_optimistic(struct kunit *test)
>>                                  size, size, &tmp, flags),
>>                             "buddy_alloc unexpectedly succeeded, it 
>> should be full!");
>>   -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct 
>> kunit *test)
>>                           drm_buddy_block_size(&mm, block),
>>                           BIT_ULL(mm.max_order) * PAGE_SIZE);
>>   -    drm_buddy_free_list(&mm, &allocated);
>> +    drm_buddy_free_list(&mm, &allocated, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c 
>> b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
>> index 115ec745e502..1ad678b62c4a 100644
>> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
>> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
>> @@ -196,7 +196,7 @@ static int xe_ttm_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     error_free_blocks:
>> -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>   error_fini:
>>       ttm_resource_fini(man, &vres->base);
>> @@ -214,7 +214,7 @@ static void xe_ttm_vram_mgr_del(struct 
>> ttm_resource_manager *man,
>>       struct drm_buddy *mm = &mgr->mm;
>>         mutex_lock(&mgr->lock);
>> -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mgr->visible_avail += vres->used_visible_size;
>>       mutex_unlock(&mgr->lock);
>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index a5b39fc01003..d81c596dfa38 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -6,6 +6,7 @@
>>   #ifndef __DRM_BUDDY_H__
>>   #define __DRM_BUDDY_H__
>>   +#include <linux/bitfield.h>
>>   #include <linux/bitops.h>
>>   #include <linux/list.h>
>>   #include <linux/slab.h>
>> @@ -25,15 +26,19 @@
>>   #define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>   #define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>> +#define DRM_BUDDY_CLEAR_ALLOCATION        BIT(3)
>> +#define DRM_BUDDY_CLEARED            BIT(4)
>>     struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
>
> I guess better to keep this sorted...
>
>> +
>>   #define   DRM_BUDDY_ALLOCATED       (1 << 10)
>>   #define   DRM_BUDDY_FREE       (2 << 10)
>>   #define   DRM_BUDDY_SPLIT       (3 << 10)
>
> ...so maybe move here?
>
>>   /* Free to be used, if needed in the future */
>> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
>> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>>       u64 header;
>>   @@ -86,6 +91,7 @@ struct drm_buddy {
>>       u64 chunk_size;
>>       u64 size;
>>       u64 avail;
>> +    u64 clear_avail;
>>   };
>>     static inline u64
>> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct 
>> drm_buddy_block *block)
>>       return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>>   }
>>   +static inline bool
>> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
>> +{
>> +    return block->header & DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>>   static inline bool
>>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>>   {
>> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>     void drm_buddy_free_block(struct drm_buddy *mm, struct 
>> drm_buddy_block *block);
>>   -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>> *objects);
>> +void drm_buddy_free_list(struct drm_buddy *mm,
>> +             struct list_head *objects,
>> +             unsigned int flags);
>>     void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>   void drm_buddy_block_print(struct drm_buddy *mm,

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature
  2024-02-16 12:03 ` Matthew Auld
  2024-02-21 12:23   ` Paneer Selvam, Arunpravin
@ 2024-02-21 12:40   ` Paneer Selvam, Arunpravin
  2024-02-21 17:54     ` Matthew Auld
  1 sibling, 1 reply; 14+ messages in thread
From: Paneer Selvam, Arunpravin @ 2024-02-21 12:40 UTC (permalink / raw)
  To: Matthew Auld, dri-devel, amd-gfx, intel-gfx
  Cc: christian.koenig, alexander.deucher, felix.kuehling, mario.limonciello


On 2/16/2024 5:33 PM, Matthew Auld wrote:
> On 08/02/2024 15:49, Arunpravin Paneer Selvam wrote:
>> - Add tracking clear page feature.
>>
>> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>>    successfully clears the blocks in the free path. On the otherhand,
>>    DRM buddy marks each block as cleared.
>>
>> - Track the available cleared pages size
>>
>> - If driver requests cleared memory we prefer cleared memory
>>    but fallback to uncleared if we can't find the cleared blocks.
>>    when driver requests uncleared memory we try to use uncleared but
>>    fallback to cleared memory if necessary.
>>
>> - When a block gets freed we clear it and mark the freed block as 
>> cleared,
>>    when there are buddies which are cleared as well we can merge them.
>>    Otherwise, we prefer to keep the blocks as separated.
>>
>> v1: (Christian)
>>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>>      cleared. Else, reset the clear flag for each block in the list.
>>
>>    - For merging the 2 cleared blocks compare as below,
>>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
>>
>> v2: (Matthew)
>>    - Add a wrapper drm_buddy_free_list_internal for the freeing of 
>> blocks
>>      operation within drm buddy.
>>    - Write a macro block_incompatible() to allocate the required blocks.
>>    - Update the xe driver for the drm_buddy_free_list change in 
>> arguments.
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>
> Probably needs a new unit test.
>
> I think we are missing something to forcefully re-merge everything at 
> fini()? In theory we can just call the defrag routine. Otherwise we 
> might trigger various warnings since the root(s) might still be split.
>
> Also one nit below. Otherwise I think looks good.
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>>   drivers/gpu/drm/drm_buddy.c                   | 192 ++++++++++++++----
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>>   drivers/gpu/drm/xe/xe_ttm_vram_mgr.c          |   4 +-
>>   include/drm/drm_buddy.h                       |  18 +-
>>   6 files changed, 187 insertions(+), 49 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index 8db880244324..c0c851409241 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     error_free_blocks:
>> -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>   error_fini:
>>       ttm_resource_fini(man, &vres->base);
>> @@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
>> ttm_resource_manager *man,
>>         amdgpu_vram_mgr_do_reserve(man);
>>   -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>> @@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
>> *adev)
>>           kfree(rsv);
>>         list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, 
>> blocks) {
>> -        drm_buddy_free_list(&mgr->mm, &rsv->allocated);
>> +        drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>>           kfree(rsv);
>>       }
>>       if (!adev->gmc.is_app_apu)
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index f57e6d74fb0e..33ad0cfbd54c 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
>>       __list_add(&block->link, node->link.prev, &node->link);
>>   }
>>   +static void clear_reset(struct drm_buddy_block *block)
>> +{
>> +    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>> +static void mark_cleared(struct drm_buddy_block *block)
>> +{
>> +    block->header |= DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>>   static void mark_allocated(struct drm_buddy_block *block)
>>   {
>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>>       mark_free(mm, block->left);
>>       mark_free(mm, block->right);
>>   +    if (drm_buddy_block_is_clear(block)) {
>> +        mark_cleared(block->left);
>> +        mark_cleared(block->right);
>> +        clear_reset(block);
>> +    }
>> +
>>       mark_split(block);
>>         return 0;
>> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>>           if (!drm_buddy_block_is_free(buddy))
>>               break;
>>   +        if (drm_buddy_block_is_clear(block) !=
>> +            drm_buddy_block_is_clear(buddy))
>> +            break;
>> +
>> +        if (drm_buddy_block_is_clear(block))
>> +            mark_cleared(parent);
>> +
>>           list_del(&buddy->link);
>>             drm_block_free(mm, block);
>> @@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>   {
>>       BUG_ON(!drm_buddy_block_is_allocated(block));
>>       mm->avail += drm_buddy_block_size(mm, block);
>> +    if (drm_buddy_block_is_clear(block))
>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>> +
>>       __drm_buddy_free(mm, block);
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>   -/**
>> - * drm_buddy_free_list - free blocks
>> - *
>> - * @mm: DRM buddy manager
>> - * @objects: input list head to free blocks
>> - */
>> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>> *objects)
>> +static void __drm_buddy_free_list(struct drm_buddy *mm,
>> +                  struct list_head *objects,
>> +                  bool mark_clear,
>> +                  bool mark_dirty)
>>   {
>>       struct drm_buddy_block *block, *on;
>>   +    WARN_ON(mark_dirty && mark_clear);
>> +
>>       list_for_each_entry_safe(block, on, objects, link) {
>> +        if (mark_clear)
>> +            mark_cleared(block);
>> +        else if (mark_dirty)
>> +            clear_reset(block);
>>           drm_buddy_free_block(mm, block);
>>           cond_resched();
>>       }
>>       INIT_LIST_HEAD(objects);
>>   }
>> +
>> +static void drm_buddy_free_list_internal(struct drm_buddy *mm,
>> +                     struct list_head *objects)
>> +{
>> +    /*
>> +     * Don't touch the clear/dirty bit, since allocation is still 
>> internal
>> +     * at this point. For example we might have just failed part of the
>> +     * allocation.
>> +     */
>> +    __drm_buddy_free_list(mm, objects, false, false);
>> +}
>> +
>> +/**
>> + * drm_buddy_free_list - free blocks
>> + *
>> + * @mm: DRM buddy manager
>> + * @objects: input list head to free blocks
>> + * @flags: optional flags like DRM_BUDDY_CLEARED
>> + */
>> +void drm_buddy_free_list(struct drm_buddy *mm,
>> +             struct list_head *objects,
>> +             unsigned int flags)
>> +{
>> +    bool mark_clear = flags & DRM_BUDDY_CLEARED;
>> +
>> +    WARN_ON(flags & ~(DRM_BUDDY_CLEARED));
Do we need this warning? In amdgpu we are using the flags field from 
amdgpu_vram_mgr_resource structure where
the same flags field used for to indicate the CONTIGUOUS, TOPDOWN etc as 
an input flags to the buddy alloc function.
If we need this warning then we should maintain a separate flag for 
setting the CLEARED.
Thanks,
Arun.
>> +
>> +    __drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
>> +}
>>   EXPORT_SYMBOL(drm_buddy_free_list);
>>     static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> @@ -327,10 +385,19 @@ static inline bool contains(u64 s1, u64 e1, u64 
>> s2, u64 e2)
>>       return s1 <= s2 && e1 >= e2;
>>   }
>>   +static bool block_incompatible(struct drm_buddy_block *block, 
>> unsigned int flags)
>> +{
>> +    bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
>> +
>> +    return needs_clear != drm_buddy_block_is_clear(block);
>> +}
>> +
>>   static struct drm_buddy_block *
>> -alloc_range_bias(struct drm_buddy *mm,
>> -         u64 start, u64 end,
>> -         unsigned int order)
>> +__alloc_range_bias(struct drm_buddy *mm,
>> +           u64 start, u64 end,
>> +           unsigned int order,
>> +           unsigned long flags,
>> +           bool fallback)
>>   {
>>       struct drm_buddy_block *block;
>>       struct drm_buddy_block *buddy;
>> @@ -369,6 +436,9 @@ alloc_range_bias(struct drm_buddy *mm,
>>             if (contains(start, end, block_start, block_end) &&
>>               order == drm_buddy_block_order(block)) {
>> +            if (!fallback && block_incompatible(block, flags))
>> +                continue;
>> +
>>               /*
>>                * Find the free block within the range.
>>                */
>> @@ -405,25 +475,52 @@ alloc_range_bias(struct drm_buddy *mm,
>>   }
>>     static struct drm_buddy_block *
>> -get_maxblock(struct drm_buddy *mm, unsigned int order)
>> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>> +                 u64 start, u64 end,
>> +                 unsigned int order,
>> +                 unsigned long flags)
>>   {
>> -    struct drm_buddy_block *max_block = NULL, *node;
>> +    struct drm_buddy_block *block;
>> +    bool fallback = 0;
>> +
>> +    block = __alloc_range_bias(mm, start, end, order,
>> +                   flags, fallback);
>> +    if (IS_ERR(block))
>> +        return __alloc_range_bias(mm, start, end, order,
>> +                      flags, !fallback);
>> +
>> +    return block;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +get_maxblock(struct drm_buddy *mm, unsigned int order,
>> +         unsigned long flags)
>> +{
>> +    struct drm_buddy_block *max_block = NULL, *block = NULL;
>>       unsigned int i;
>>         for (i = order; i <= mm->max_order; ++i) {
>> -        if (!list_empty(&mm->free_list[i])) {
>> -            node = list_last_entry(&mm->free_list[i],
>> -                           struct drm_buddy_block,
>> -                           link);
>> -            if (!max_block) {
>> -                max_block = node;
>> +        struct drm_buddy_block *tmp_block;
>> +
>> +        list_for_each_entry_reverse(tmp_block, &mm->free_list[i], 
>> link) {
>> +            if (block_incompatible(tmp_block, flags))
>>                   continue;
>> -            }
>>   -            if (drm_buddy_block_offset(node) >
>> -                drm_buddy_block_offset(max_block)) {
>> -                max_block = node;
>> -            }
>> +            block = tmp_block;
>> +            break;
>> +        }
>> +
>> +        if (!block)
>> +            continue;
>> +
>> +        if (!max_block) {
>> +            max_block = block;
>> +            continue;
>> +        }
>> +
>> +        if (drm_buddy_block_offset(block) >
>> +            drm_buddy_block_offset(max_block)) {
>> +            max_block = block;
>>           }
>>       }
>>   @@ -440,11 +537,29 @@ alloc_from_freelist(struct drm_buddy *mm,
>>       int err;
>>         if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>> -        block = get_maxblock(mm, order);
>> +        block = get_maxblock(mm, order, flags);
>>           if (block)
>>               /* Store the obtained block order */
>>               tmp = drm_buddy_block_order(block);
>>       } else {
>> +        for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> +            struct drm_buddy_block *tmp_block;
>> +
>> +            list_for_each_entry_reverse(tmp_block, 
>> &mm->free_list[tmp], link) {
>> +                if (block_incompatible(tmp_block, flags))
>> +                    continue;
>> +
>> +                block = tmp_block;
>> +                break;
>> +            }
>> +
>> +            if (block)
>> +                break;
>> +        }
>> +    }
>> +
>> +    if (!block) {
>> +        /* Fallback method */
>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>               if (!list_empty(&mm->free_list[tmp])) {
>>                   block = list_last_entry(&mm->free_list[tmp],
>> @@ -454,10 +569,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>>                       break;
>>               }
>>           }
>> -    }
>>   -    if (!block)
>> -        return ERR_PTR(-ENOSPC);
>> +        if (!block)
>> +            return ERR_PTR(-ENOSPC);
>> +    }
>>         BUG_ON(!drm_buddy_block_is_free(block));
>>   @@ -524,6 +639,8 @@ static int __alloc_range(struct drm_buddy *mm,
>>               mark_allocated(block);
>>               total_allocated += drm_buddy_block_size(mm, block);
>>               mm->avail -= drm_buddy_block_size(mm, block);
>> +            if (drm_buddy_block_is_clear(block))
>> +                mm->clear_avail -= drm_buddy_block_size(mm, block);
>>               list_add_tail(&block->link, &allocated);
>>               continue;
>>           }
>> @@ -558,7 +675,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>           list_splice_tail(&allocated, blocks);
>>           *total_allocated_on_err = total_allocated;
>>       } else {
>> -        drm_buddy_free_list(mm, &allocated);
>> +        drm_buddy_free_list_internal(mm, &allocated);
>>       }
>>         return err;
>> @@ -624,11 +741,11 @@ static int __alloc_contig_try_harder(struct 
>> drm_buddy *mm,
>>               list_splice(&blocks_lhs, blocks);
>>               return 0;
>>           } else if (err != -ENOSPC) {
>> -            drm_buddy_free_list(mm, blocks);
>> +            drm_buddy_free_list_internal(mm, blocks);
>>               return err;
>>           }
>>           /* Free blocks for the next iteration */
>> -        drm_buddy_free_list(mm, blocks);
>> +        drm_buddy_free_list_internal(mm, blocks);
>>       }
>>         return -ENOSPC;
>> @@ -684,6 +801,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>       list_del(&block->link);
>>       mark_free(mm, block);
>>       mm->avail += drm_buddy_block_size(mm, block);
>> +    if (drm_buddy_block_is_clear(block))
>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>         /* Prevent recursively freeing this node */
>>       parent = block->parent;
>> @@ -695,6 +814,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>       if (err) {
>>           mark_allocated(block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>> +        if (drm_buddy_block_is_clear(block))
>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>           list_add(&block->link, blocks);
>>       }
>>   @@ -782,7 +903,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>           do {
>>               if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>>                   /* Allocate traversing within the range */
>> -                block = alloc_range_bias(mm, start, end, order);
>> +                block = __drm_buddy_alloc_range_bias(mm, start, end,
>> +                                     order, flags);
>>               else
>>                   /* Allocate from freelist */
>>                   block = alloc_from_freelist(mm, order, flags);
>> @@ -808,6 +930,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>             mark_allocated(block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>> +        if (drm_buddy_block_is_clear(block))
>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>           kmemleak_update_trace(block);
>>           list_add_tail(&block->link, &allocated);
>>   @@ -846,7 +970,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>       return 0;
>>     err_free:
>> -    drm_buddy_free_list(mm, &allocated);
>> +    drm_buddy_free_list_internal(mm, &allocated);
>>       return err;
>>   }
>>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>> @@ -879,8 +1003,8 @@ void drm_buddy_print(struct drm_buddy *mm, 
>> struct drm_printer *p)
>>   {
>>       int order;
>>   -    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>> %lluMiB\n",
>> -           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
>> +    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>> %lluMiB, clear_free: %lluMiB\n",
>> +           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, 
>> mm->clear_avail >> 20);
>>         for (order = mm->max_order; order >= 0; order--) {
>>           struct drm_buddy_block *block;
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
>> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index 0d735d5c2b35..942345548bc3 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     err_free_blocks:
>> -    drm_buddy_free_list(mm, &bman_res->blocks);
>> +    drm_buddy_free_list(mm, &bman_res->blocks, 0);
>>       mutex_unlock(&bman->lock);
>>   err_free_res:
>>       ttm_resource_fini(man, &bman_res->base);
>> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct 
>> ttm_resource_manager *man,
>>       struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>         mutex_lock(&bman->lock);
>> -    drm_buddy_free_list(&bman->mm, &bman_res->blocks);
>> +    drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>>       bman->visible_avail += bman_res->used_visible_size;
>>       mutex_unlock(&bman->lock);
>>   @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device 
>> *bdev, unsigned int type)
>>       ttm_set_driver_manager(bdev, type, NULL);
>>         mutex_lock(&bman->lock);
>> -    drm_buddy_free_list(mm, &bman->reserved);
>> +    drm_buddy_free_list(mm, &bman->reserved, 0);
>>       drm_buddy_fini(mm);
>>       bman->visible_avail += bman->visible_reserved;
>>       WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
>> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
>> b/drivers/gpu/drm/tests/drm_buddy_test.c
>> index ea2af6bd9abe..e0860fce9ebd 100644
>> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
>> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
>> @@ -83,7 +83,7 @@ static void 
>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>                                 top, max_order);
>>       }
>>   -    drm_buddy_free_list(&mm, &holes);
>> +    drm_buddy_free_list(&mm, &holes, 0);
>>         /* Nothing larger than blocks of chunk_size now available */
>>       for (order = 1; order <= max_order; order++) {
>> @@ -95,7 +95,7 @@ static void 
>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>       }
>>         list_splice_tail(&holes, &blocks);
>> -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -190,7 +190,7 @@ static void 
>> drm_test_buddy_alloc_pessimistic(struct kunit *test)
>>         list_del(&block->link);
>>       drm_buddy_free_block(&mm, block);
>> -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -236,7 +236,7 @@ static void 
>> drm_test_buddy_alloc_optimistic(struct kunit *test)
>>                                  size, size, &tmp, flags),
>>                             "buddy_alloc unexpectedly succeeded, it 
>> should be full!");
>>   -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct 
>> kunit *test)
>>                           drm_buddy_block_size(&mm, block),
>>                           BIT_ULL(mm.max_order) * PAGE_SIZE);
>>   -    drm_buddy_free_list(&mm, &allocated);
>> +    drm_buddy_free_list(&mm, &allocated, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c 
>> b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
>> index 115ec745e502..1ad678b62c4a 100644
>> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
>> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
>> @@ -196,7 +196,7 @@ static int xe_ttm_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     error_free_blocks:
>> -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>   error_fini:
>>       ttm_resource_fini(man, &vres->base);
>> @@ -214,7 +214,7 @@ static void xe_ttm_vram_mgr_del(struct 
>> ttm_resource_manager *man,
>>       struct drm_buddy *mm = &mgr->mm;
>>         mutex_lock(&mgr->lock);
>> -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mgr->visible_avail += vres->used_visible_size;
>>       mutex_unlock(&mgr->lock);
>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index a5b39fc01003..d81c596dfa38 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -6,6 +6,7 @@
>>   #ifndef __DRM_BUDDY_H__
>>   #define __DRM_BUDDY_H__
>>   +#include <linux/bitfield.h>
>>   #include <linux/bitops.h>
>>   #include <linux/list.h>
>>   #include <linux/slab.h>
>> @@ -25,15 +26,19 @@
>>   #define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>   #define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>> +#define DRM_BUDDY_CLEAR_ALLOCATION        BIT(3)
>> +#define DRM_BUDDY_CLEARED            BIT(4)
>>     struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
>
> I guess better to keep this sorted...
>
>> +
>>   #define   DRM_BUDDY_ALLOCATED       (1 << 10)
>>   #define   DRM_BUDDY_FREE       (2 << 10)
>>   #define   DRM_BUDDY_SPLIT       (3 << 10)
>
> ...so maybe move here?
>
>>   /* Free to be used, if needed in the future */
>> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
>> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>>       u64 header;
>>   @@ -86,6 +91,7 @@ struct drm_buddy {
>>       u64 chunk_size;
>>       u64 size;
>>       u64 avail;
>> +    u64 clear_avail;
>>   };
>>     static inline u64
>> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct 
>> drm_buddy_block *block)
>>       return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>>   }
>>   +static inline bool
>> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
>> +{
>> +    return block->header & DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>>   static inline bool
>>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>>   {
>> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>     void drm_buddy_free_block(struct drm_buddy *mm, struct 
>> drm_buddy_block *block);
>>   -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>> *objects);
>> +void drm_buddy_free_list(struct drm_buddy *mm,
>> +             struct list_head *objects,
>> +             unsigned int flags);
>>     void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>   void drm_buddy_block_print(struct drm_buddy *mm,

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature
  2024-02-21 12:40   ` Paneer Selvam, Arunpravin
@ 2024-02-21 17:54     ` Matthew Auld
  0 siblings, 0 replies; 14+ messages in thread
From: Matthew Auld @ 2024-02-21 17:54 UTC (permalink / raw)
  To: Paneer Selvam, Arunpravin, dri-devel, amd-gfx, intel-gfx
  Cc: christian.koenig, alexander.deucher, felix.kuehling, mario.limonciello

On 21/02/2024 12:40, Paneer Selvam, Arunpravin wrote:
> 
> On 2/16/2024 5:33 PM, Matthew Auld wrote:
>> On 08/02/2024 15:49, Arunpravin Paneer Selvam wrote:
>>> - Add tracking clear page feature.
>>>
>>> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>>>    successfully clears the blocks in the free path. On the otherhand,
>>>    DRM buddy marks each block as cleared.
>>>
>>> - Track the available cleared pages size
>>>
>>> - If driver requests cleared memory we prefer cleared memory
>>>    but fallback to uncleared if we can't find the cleared blocks.
>>>    when driver requests uncleared memory we try to use uncleared but
>>>    fallback to cleared memory if necessary.
>>>
>>> - When a block gets freed we clear it and mark the freed block as 
>>> cleared,
>>>    when there are buddies which are cleared as well we can merge them.
>>>    Otherwise, we prefer to keep the blocks as separated.
>>>
>>> v1: (Christian)
>>>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>>>      cleared. Else, reset the clear flag for each block in the list.
>>>
>>>    - For merging the 2 cleared blocks compare as below,
>>>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
>>>
>>> v2: (Matthew)
>>>    - Add a wrapper drm_buddy_free_list_internal for the freeing of 
>>> blocks
>>>      operation within drm buddy.
>>>    - Write a macro block_incompatible() to allocate the required blocks.
>>>    - Update the xe driver for the drm_buddy_free_list change in 
>>> arguments.
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam 
>>> <Arunpravin.PaneerSelvam@amd.com>
>>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>
>> Probably needs a new unit test.
>>
>> I think we are missing something to forcefully re-merge everything at 
>> fini()? In theory we can just call the defrag routine. Otherwise we 
>> might trigger various warnings since the root(s) might still be split.
>>
>> Also one nit below. Otherwise I think looks good.
>>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>>>   drivers/gpu/drm/drm_buddy.c                   | 192 ++++++++++++++----
>>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>>>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>>>   drivers/gpu/drm/xe/xe_ttm_vram_mgr.c          |   4 +-
>>>   include/drm/drm_buddy.h                       |  18 +-
>>>   6 files changed, 187 insertions(+), 49 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> index 8db880244324..c0c851409241 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> @@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
>>> ttm_resource_manager *man,
>>>       return 0;
>>>     error_free_blocks:
>>> -    drm_buddy_free_list(mm, &vres->blocks);
>>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>>       mutex_unlock(&mgr->lock);
>>>   error_fini:
>>>       ttm_resource_fini(man, &vres->base);
>>> @@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
>>> ttm_resource_manager *man,
>>>         amdgpu_vram_mgr_do_reserve(man);
>>>   -    drm_buddy_free_list(mm, &vres->blocks);
>>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>>       mutex_unlock(&mgr->lock);
>>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>>> @@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
>>> *adev)
>>>           kfree(rsv);
>>>         list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, 
>>> blocks) {
>>> -        drm_buddy_free_list(&mgr->mm, &rsv->allocated);
>>> +        drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>>>           kfree(rsv);
>>>       }
>>>       if (!adev->gmc.is_app_apu)
>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>> index f57e6d74fb0e..33ad0cfbd54c 100644
>>> --- a/drivers/gpu/drm/drm_buddy.c
>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
>>>       __list_add(&block->link, node->link.prev, &node->link);
>>>   }
>>>   +static void clear_reset(struct drm_buddy_block *block)
>>> +{
>>> +    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>>> +}
>>> +
>>> +static void mark_cleared(struct drm_buddy_block *block)
>>> +{
>>> +    block->header |= DRM_BUDDY_HEADER_CLEAR;
>>> +}
>>> +
>>>   static void mark_allocated(struct drm_buddy_block *block)
>>>   {
>>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>>>       mark_free(mm, block->left);
>>>       mark_free(mm, block->right);
>>>   +    if (drm_buddy_block_is_clear(block)) {
>>> +        mark_cleared(block->left);
>>> +        mark_cleared(block->right);
>>> +        clear_reset(block);
>>> +    }
>>> +
>>>       mark_split(block);
>>>         return 0;
>>> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>>>           if (!drm_buddy_block_is_free(buddy))
>>>               break;
>>>   +        if (drm_buddy_block_is_clear(block) !=
>>> +            drm_buddy_block_is_clear(buddy))
>>> +            break;
>>> +
>>> +        if (drm_buddy_block_is_clear(block))
>>> +            mark_cleared(parent);
>>> +
>>>           list_del(&buddy->link);
>>>             drm_block_free(mm, block);
>>> @@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>>   {
>>>       BUG_ON(!drm_buddy_block_is_allocated(block));
>>>       mm->avail += drm_buddy_block_size(mm, block);
>>> +    if (drm_buddy_block_is_clear(block))
>>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>> +
>>>       __drm_buddy_free(mm, block);
>>>   }
>>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>>   -/**
>>> - * drm_buddy_free_list - free blocks
>>> - *
>>> - * @mm: DRM buddy manager
>>> - * @objects: input list head to free blocks
>>> - */
>>> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>>> *objects)
>>> +static void __drm_buddy_free_list(struct drm_buddy *mm,
>>> +                  struct list_head *objects,
>>> +                  bool mark_clear,
>>> +                  bool mark_dirty)
>>>   {
>>>       struct drm_buddy_block *block, *on;
>>>   +    WARN_ON(mark_dirty && mark_clear);
>>> +
>>>       list_for_each_entry_safe(block, on, objects, link) {
>>> +        if (mark_clear)
>>> +            mark_cleared(block);
>>> +        else if (mark_dirty)
>>> +            clear_reset(block);
>>>           drm_buddy_free_block(mm, block);
>>>           cond_resched();
>>>       }
>>>       INIT_LIST_HEAD(objects);
>>>   }
>>> +
>>> +static void drm_buddy_free_list_internal(struct drm_buddy *mm,
>>> +                     struct list_head *objects)
>>> +{
>>> +    /*
>>> +     * Don't touch the clear/dirty bit, since allocation is still 
>>> internal
>>> +     * at this point. For example we might have just failed part of the
>>> +     * allocation.
>>> +     */
>>> +    __drm_buddy_free_list(mm, objects, false, false);
>>> +}
>>> +
>>> +/**
>>> + * drm_buddy_free_list - free blocks
>>> + *
>>> + * @mm: DRM buddy manager
>>> + * @objects: input list head to free blocks
>>> + * @flags: optional flags like DRM_BUDDY_CLEARED
>>> + */
>>> +void drm_buddy_free_list(struct drm_buddy *mm,
>>> +             struct list_head *objects,
>>> +             unsigned int flags)
>>> +{
>>> +    bool mark_clear = flags & DRM_BUDDY_CLEARED;
>>> +
>>> +    WARN_ON(flags & ~(DRM_BUDDY_CLEARED));
> Do we need this warning? In amdgpu we are using the flags field from 
> amdgpu_vram_mgr_resource structure where
> the same flags field used for to indicate the CONTIGUOUS, TOPDOWN etc as 
> an input flags to the buddy alloc function.
> If we need this warning then we should maintain a separate flag for 
> setting the CLEARED.

I think it makes sense to have different set of flags for alloc() and 
free(), and check that the caller is passing valid flags for each in 
order to catch obvious programmer errors.

Also ideally make that clear from the namespace:

DRM_BUDDY_ALLOC_PREFER_CLEARED
DRM_BUDDY_FREE_CLEARED

?

> Thanks,
> Arun.
>>> +
>>> +    __drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
>>> +}
>>>   EXPORT_SYMBOL(drm_buddy_free_list);
>>>     static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>>> @@ -327,10 +385,19 @@ static inline bool contains(u64 s1, u64 e1, u64 
>>> s2, u64 e2)
>>>       return s1 <= s2 && e1 >= e2;
>>>   }
>>>   +static bool block_incompatible(struct drm_buddy_block *block, 
>>> unsigned int flags)
>>> +{
>>> +    bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
>>> +
>>> +    return needs_clear != drm_buddy_block_is_clear(block);
>>> +}
>>> +
>>>   static struct drm_buddy_block *
>>> -alloc_range_bias(struct drm_buddy *mm,
>>> -         u64 start, u64 end,
>>> -         unsigned int order)
>>> +__alloc_range_bias(struct drm_buddy *mm,
>>> +           u64 start, u64 end,
>>> +           unsigned int order,
>>> +           unsigned long flags,
>>> +           bool fallback)
>>>   {
>>>       struct drm_buddy_block *block;
>>>       struct drm_buddy_block *buddy;
>>> @@ -369,6 +436,9 @@ alloc_range_bias(struct drm_buddy *mm,
>>>             if (contains(start, end, block_start, block_end) &&
>>>               order == drm_buddy_block_order(block)) {
>>> +            if (!fallback && block_incompatible(block, flags))
>>> +                continue;
>>> +
>>>               /*
>>>                * Find the free block within the range.
>>>                */
>>> @@ -405,25 +475,52 @@ alloc_range_bias(struct drm_buddy *mm,
>>>   }
>>>     static struct drm_buddy_block *
>>> -get_maxblock(struct drm_buddy *mm, unsigned int order)
>>> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>>> +                 u64 start, u64 end,
>>> +                 unsigned int order,
>>> +                 unsigned long flags)
>>>   {
>>> -    struct drm_buddy_block *max_block = NULL, *node;
>>> +    struct drm_buddy_block *block;
>>> +    bool fallback = 0;
>>> +
>>> +    block = __alloc_range_bias(mm, start, end, order,
>>> +                   flags, fallback);
>>> +    if (IS_ERR(block))
>>> +        return __alloc_range_bias(mm, start, end, order,
>>> +                      flags, !fallback);
>>> +
>>> +    return block;
>>> +}
>>> +
>>> +static struct drm_buddy_block *
>>> +get_maxblock(struct drm_buddy *mm, unsigned int order,
>>> +         unsigned long flags)
>>> +{
>>> +    struct drm_buddy_block *max_block = NULL, *block = NULL;
>>>       unsigned int i;
>>>         for (i = order; i <= mm->max_order; ++i) {
>>> -        if (!list_empty(&mm->free_list[i])) {
>>> -            node = list_last_entry(&mm->free_list[i],
>>> -                           struct drm_buddy_block,
>>> -                           link);
>>> -            if (!max_block) {
>>> -                max_block = node;
>>> +        struct drm_buddy_block *tmp_block;
>>> +
>>> +        list_for_each_entry_reverse(tmp_block, &mm->free_list[i], 
>>> link) {
>>> +            if (block_incompatible(tmp_block, flags))
>>>                   continue;
>>> -            }
>>>   -            if (drm_buddy_block_offset(node) >
>>> -                drm_buddy_block_offset(max_block)) {
>>> -                max_block = node;
>>> -            }
>>> +            block = tmp_block;
>>> +            break;
>>> +        }
>>> +
>>> +        if (!block)
>>> +            continue;
>>> +
>>> +        if (!max_block) {
>>> +            max_block = block;
>>> +            continue;
>>> +        }
>>> +
>>> +        if (drm_buddy_block_offset(block) >
>>> +            drm_buddy_block_offset(max_block)) {
>>> +            max_block = block;
>>>           }
>>>       }
>>>   @@ -440,11 +537,29 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>       int err;
>>>         if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>>> -        block = get_maxblock(mm, order);
>>> +        block = get_maxblock(mm, order, flags);
>>>           if (block)
>>>               /* Store the obtained block order */
>>>               tmp = drm_buddy_block_order(block);
>>>       } else {
>>> +        for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>> +            struct drm_buddy_block *tmp_block;
>>> +
>>> +            list_for_each_entry_reverse(tmp_block, 
>>> &mm->free_list[tmp], link) {
>>> +                if (block_incompatible(tmp_block, flags))
>>> +                    continue;
>>> +
>>> +                block = tmp_block;
>>> +                break;
>>> +            }
>>> +
>>> +            if (block)
>>> +                break;
>>> +        }
>>> +    }
>>> +
>>> +    if (!block) {
>>> +        /* Fallback method */
>>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>>               if (!list_empty(&mm->free_list[tmp])) {
>>>                   block = list_last_entry(&mm->free_list[tmp],
>>> @@ -454,10 +569,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>                       break;
>>>               }
>>>           }
>>> -    }
>>>   -    if (!block)
>>> -        return ERR_PTR(-ENOSPC);
>>> +        if (!block)
>>> +            return ERR_PTR(-ENOSPC);
>>> +    }
>>>         BUG_ON(!drm_buddy_block_is_free(block));
>>>   @@ -524,6 +639,8 @@ static int __alloc_range(struct drm_buddy *mm,
>>>               mark_allocated(block);
>>>               total_allocated += drm_buddy_block_size(mm, block);
>>>               mm->avail -= drm_buddy_block_size(mm, block);
>>> +            if (drm_buddy_block_is_clear(block))
>>> +                mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>               list_add_tail(&block->link, &allocated);
>>>               continue;
>>>           }
>>> @@ -558,7 +675,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>>           list_splice_tail(&allocated, blocks);
>>>           *total_allocated_on_err = total_allocated;
>>>       } else {
>>> -        drm_buddy_free_list(mm, &allocated);
>>> +        drm_buddy_free_list_internal(mm, &allocated);
>>>       }
>>>         return err;
>>> @@ -624,11 +741,11 @@ static int __alloc_contig_try_harder(struct 
>>> drm_buddy *mm,
>>>               list_splice(&blocks_lhs, blocks);
>>>               return 0;
>>>           } else if (err != -ENOSPC) {
>>> -            drm_buddy_free_list(mm, blocks);
>>> +            drm_buddy_free_list_internal(mm, blocks);
>>>               return err;
>>>           }
>>>           /* Free blocks for the next iteration */
>>> -        drm_buddy_free_list(mm, blocks);
>>> +        drm_buddy_free_list_internal(mm, blocks);
>>>       }
>>>         return -ENOSPC;
>>> @@ -684,6 +801,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>       list_del(&block->link);
>>>       mark_free(mm, block);
>>>       mm->avail += drm_buddy_block_size(mm, block);
>>> +    if (drm_buddy_block_is_clear(block))
>>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>>         /* Prevent recursively freeing this node */
>>>       parent = block->parent;
>>> @@ -695,6 +814,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>       if (err) {
>>>           mark_allocated(block);
>>>           mm->avail -= drm_buddy_block_size(mm, block);
>>> +        if (drm_buddy_block_is_clear(block))
>>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>           list_add(&block->link, blocks);
>>>       }
>>>   @@ -782,7 +903,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>           do {
>>>               if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>>>                   /* Allocate traversing within the range */
>>> -                block = alloc_range_bias(mm, start, end, order);
>>> +                block = __drm_buddy_alloc_range_bias(mm, start, end,
>>> +                                     order, flags);
>>>               else
>>>                   /* Allocate from freelist */
>>>                   block = alloc_from_freelist(mm, order, flags);
>>> @@ -808,6 +930,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>             mark_allocated(block);
>>>           mm->avail -= drm_buddy_block_size(mm, block);
>>> +        if (drm_buddy_block_is_clear(block))
>>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>           kmemleak_update_trace(block);
>>>           list_add_tail(&block->link, &allocated);
>>>   @@ -846,7 +970,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>       return 0;
>>>     err_free:
>>> -    drm_buddy_free_list(mm, &allocated);
>>> +    drm_buddy_free_list_internal(mm, &allocated);
>>>       return err;
>>>   }
>>>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>>> @@ -879,8 +1003,8 @@ void drm_buddy_print(struct drm_buddy *mm, 
>>> struct drm_printer *p)
>>>   {
>>>       int order;
>>>   -    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>>> %lluMiB\n",
>>> -           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
>>> +    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>>> %lluMiB, clear_free: %lluMiB\n",
>>> +           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, 
>>> mm->clear_avail >> 20);
>>>         for (order = mm->max_order; order >= 0; order--) {
>>>           struct drm_buddy_block *block;
>>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
>>> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>>> index 0d735d5c2b35..942345548bc3 100644
>>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>>> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct 
>>> ttm_resource_manager *man,
>>>       return 0;
>>>     err_free_blocks:
>>> -    drm_buddy_free_list(mm, &bman_res->blocks);
>>> +    drm_buddy_free_list(mm, &bman_res->blocks, 0);
>>>       mutex_unlock(&bman->lock);
>>>   err_free_res:
>>>       ttm_resource_fini(man, &bman_res->base);
>>> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct 
>>> ttm_resource_manager *man,
>>>       struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>>         mutex_lock(&bman->lock);
>>> -    drm_buddy_free_list(&bman->mm, &bman_res->blocks);
>>> +    drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>>>       bman->visible_avail += bman_res->used_visible_size;
>>>       mutex_unlock(&bman->lock);
>>>   @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device 
>>> *bdev, unsigned int type)
>>>       ttm_set_driver_manager(bdev, type, NULL);
>>>         mutex_lock(&bman->lock);
>>> -    drm_buddy_free_list(mm, &bman->reserved);
>>> +    drm_buddy_free_list(mm, &bman->reserved, 0);
>>>       drm_buddy_fini(mm);
>>>       bman->visible_avail += bman->visible_reserved;
>>>       WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
>>> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
>>> b/drivers/gpu/drm/tests/drm_buddy_test.c
>>> index ea2af6bd9abe..e0860fce9ebd 100644
>>> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
>>> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
>>> @@ -83,7 +83,7 @@ static void 
>>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>>                                 top, max_order);
>>>       }
>>>   -    drm_buddy_free_list(&mm, &holes);
>>> +    drm_buddy_free_list(&mm, &holes, 0);
>>>         /* Nothing larger than blocks of chunk_size now available */
>>>       for (order = 1; order <= max_order; order++) {
>>> @@ -95,7 +95,7 @@ static void 
>>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>>       }
>>>         list_splice_tail(&holes, &blocks);
>>> -    drm_buddy_free_list(&mm, &blocks);
>>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>>       drm_buddy_fini(&mm);
>>>   }
>>>   @@ -190,7 +190,7 @@ static void 
>>> drm_test_buddy_alloc_pessimistic(struct kunit *test)
>>>         list_del(&block->link);
>>>       drm_buddy_free_block(&mm, block);
>>> -    drm_buddy_free_list(&mm, &blocks);
>>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>>       drm_buddy_fini(&mm);
>>>   }
>>>   @@ -236,7 +236,7 @@ static void 
>>> drm_test_buddy_alloc_optimistic(struct kunit *test)
>>>                                  size, size, &tmp, flags),
>>>                             "buddy_alloc unexpectedly succeeded, it 
>>> should be full!");
>>>   -    drm_buddy_free_list(&mm, &blocks);
>>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>>       drm_buddy_fini(&mm);
>>>   }
>>>   @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct 
>>> kunit *test)
>>>                           drm_buddy_block_size(&mm, block),
>>>                           BIT_ULL(mm.max_order) * PAGE_SIZE);
>>>   -    drm_buddy_free_list(&mm, &allocated);
>>> +    drm_buddy_free_list(&mm, &allocated, 0);
>>>       drm_buddy_fini(&mm);
>>>   }
>>>   diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c 
>>> b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
>>> index 115ec745e502..1ad678b62c4a 100644
>>> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
>>> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
>>> @@ -196,7 +196,7 @@ static int xe_ttm_vram_mgr_new(struct 
>>> ttm_resource_manager *man,
>>>       return 0;
>>>     error_free_blocks:
>>> -    drm_buddy_free_list(mm, &vres->blocks);
>>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>>       mutex_unlock(&mgr->lock);
>>>   error_fini:
>>>       ttm_resource_fini(man, &vres->base);
>>> @@ -214,7 +214,7 @@ static void xe_ttm_vram_mgr_del(struct 
>>> ttm_resource_manager *man,
>>>       struct drm_buddy *mm = &mgr->mm;
>>>         mutex_lock(&mgr->lock);
>>> -    drm_buddy_free_list(mm, &vres->blocks);
>>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>>       mgr->visible_avail += vres->used_visible_size;
>>>       mutex_unlock(&mgr->lock);
>>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>> index a5b39fc01003..d81c596dfa38 100644
>>> --- a/include/drm/drm_buddy.h
>>> +++ b/include/drm/drm_buddy.h
>>> @@ -6,6 +6,7 @@
>>>   #ifndef __DRM_BUDDY_H__
>>>   #define __DRM_BUDDY_H__
>>>   +#include <linux/bitfield.h>
>>>   #include <linux/bitops.h>
>>>   #include <linux/list.h>
>>>   #include <linux/slab.h>
>>> @@ -25,15 +26,19 @@
>>>   #define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>>   #define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>>> +#define DRM_BUDDY_CLEAR_ALLOCATION        BIT(3)
>>> +#define DRM_BUDDY_CLEARED            BIT(4)
>>>     struct drm_buddy_block {
>>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>>> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
>>
>> I guess better to keep this sorted...
>>
>>> +
>>>   #define   DRM_BUDDY_ALLOCATED       (1 << 10)
>>>   #define   DRM_BUDDY_FREE       (2 << 10)
>>>   #define   DRM_BUDDY_SPLIT       (3 << 10)
>>
>> ...so maybe move here?
>>
>>>   /* Free to be used, if needed in the future */
>>> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
>>> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>>>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>>>       u64 header;
>>>   @@ -86,6 +91,7 @@ struct drm_buddy {
>>>       u64 chunk_size;
>>>       u64 size;
>>>       u64 avail;
>>> +    u64 clear_avail;
>>>   };
>>>     static inline u64
>>> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct 
>>> drm_buddy_block *block)
>>>       return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>>>   }
>>>   +static inline bool
>>> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
>>> +{
>>> +    return block->header & DRM_BUDDY_HEADER_CLEAR;
>>> +}
>>> +
>>>   static inline bool
>>>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>>>   {
>>> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>     void drm_buddy_free_block(struct drm_buddy *mm, struct 
>>> drm_buddy_block *block);
>>>   -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>>> *objects);
>>> +void drm_buddy_free_list(struct drm_buddy *mm,
>>> +             struct list_head *objects,
>>> +             unsigned int flags);
>>>     void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>>   void drm_buddy_block_print(struct drm_buddy *mm,

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2024-02-21 17:54 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-08 15:49 [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
2024-02-08 15:49 ` [PATCH v6 2/3] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
2024-02-08 15:50 ` [PATCH v6 3/3] drm/buddy: Add defragmentation support Arunpravin Paneer Selvam
2024-02-16 12:23   ` Matthew Auld
2024-02-16 12:33     ` Christian König
2024-02-16 13:21       ` Matthew Auld
2024-02-16 14:02         ` Christian König
2024-02-16 14:47           ` Matthew Auld
2024-02-16 15:08             ` Christian König
2024-02-16 11:49 ` [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
2024-02-16 12:03 ` Matthew Auld
2024-02-21 12:23   ` Paneer Selvam, Arunpravin
2024-02-21 12:40   ` Paneer Selvam, Arunpravin
2024-02-21 17:54     ` Matthew Auld

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).