All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
@ 2023-12-14 13:42 Arunpravin Paneer Selvam
  2023-12-14 13:42 ` [PATCH v3 2/2] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Arunpravin Paneer Selvam @ 2023-12-14 13:42 UTC (permalink / raw)
  To: dri-devel, amd-gfx
  Cc: alexander.deucher, felix.kuehling, Arunpravin Paneer Selvam,
	christian.koenig, matthew.auld

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
    cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
    drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c                   | 169 +++++++++++++++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
 include/drm/drm_buddy.h                       |  18 +-
 5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	return 0;
 
 error_free_blocks:
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mutex_unlock(&mgr->lock);
 error_fini:
 	ttm_resource_fini(man, &vres->base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 
 	amdgpu_vram_mgr_do_reserve(man);
 
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(vis_usage, &mgr->vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 		kfree(rsv);
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
-		drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+		drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
 		kfree(rsv);
 	}
 	if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
 	__list_add(&block->link, node->link.prev, &node->link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+	block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
 	block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
 	mark_free(mm, block->left);
 	mark_free(mm, block->right);
 
+	if (drm_buddy_block_is_clear(block)) {
+		mark_cleared(block->left);
+		mark_cleared(block->right);
+		clear_reset(block);
+	}
+
 	mark_split(block);
 
 	return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
 		if (!drm_buddy_block_is_free(buddy))
 			break;
 
+		if (drm_buddy_block_is_clear(block) !=
+		    drm_buddy_block_is_clear(buddy))
+			break;
+
+		if (drm_buddy_block_is_clear(block))
+			mark_cleared(parent);
+
 		list_del(&buddy->link);
 
 		drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
 	BUG_ON(!drm_buddy_block_is_allocated(block));
 	mm->avail += drm_buddy_block_size(mm, block);
+	if (drm_buddy_block_is_clear(block))
+		mm->clear_avail += drm_buddy_block_size(mm, block);
+
 	__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
  * @mm: DRM buddy manager
  * @objects: input list head to free blocks
  */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+void drm_buddy_free_list(struct drm_buddy *mm,
+			 struct list_head *objects,
+			 unsigned long flags)
 {
 	struct drm_buddy_block *block, *on;
 
+	if (flags & DRM_BUDDY_CLEARED) {
+		list_for_each_entry(block, objects, link)
+			mark_cleared(block);
+	} else {
+		list_for_each_entry(block, objects, link)
+			clear_reset(block);
+	}
+
 	list_for_each_entry_safe(block, on, objects, link) {
 		drm_buddy_free_block(mm, block);
 		cond_resched();
@@ -328,9 +364,11 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
 }
 
 static struct drm_buddy_block *
-alloc_range_bias(struct drm_buddy *mm,
-		 u64 start, u64 end,
-		 unsigned int order)
+__alloc_range_bias(struct drm_buddy *mm,
+		   u64 start, u64 end,
+		   unsigned int order,
+		   unsigned long flags,
+		   bool fallback)
 {
 	struct drm_buddy_block *block;
 	struct drm_buddy_block *buddy;
@@ -369,6 +407,15 @@ alloc_range_bias(struct drm_buddy *mm,
 
 		if (contains(start, end, block_start, block_end) &&
 		    order == drm_buddy_block_order(block)) {
+			if (!fallback) {
+				if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
+					if (!drm_buddy_block_is_clear(block))
+						continue;
+				} else {
+					if (drm_buddy_block_is_clear(block))
+						continue;
+				}
+			}
 			/*
 			 * Find the free block within the range.
 			 */
@@ -405,25 +452,58 @@ alloc_range_bias(struct drm_buddy *mm,
 }
 
 static struct drm_buddy_block *
-get_maxblock(struct drm_buddy *mm, unsigned int order)
+__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
+			     u64 start, u64 end,
+			     unsigned int order,
+			     unsigned long flags)
+{
+	struct drm_buddy_block *block;
+	bool fallback = 0;
+
+	block = __alloc_range_bias(mm, start, end, order,
+				   flags, fallback);
+	if (IS_ERR(block))
+		return __alloc_range_bias(mm, start, end, order,
+					  flags, !fallback);
+
+	return block;
+}
+
+static struct drm_buddy_block *
+get_maxblock(struct drm_buddy *mm, unsigned int order,
+	     unsigned long flags)
 {
-	struct drm_buddy_block *max_block = NULL, *node;
+	struct drm_buddy_block *max_block = NULL, *block = NULL;
 	unsigned int i;
 
 	for (i = order; i <= mm->max_order; ++i) {
-		if (!list_empty(&mm->free_list[i])) {
-			node = list_last_entry(&mm->free_list[i],
-					       struct drm_buddy_block,
-					       link);
-			if (!max_block) {
-				max_block = node;
-				continue;
+		struct drm_buddy_block *tmp_block;
+
+		list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
+			if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
+				/* Find a cleared block */
+				if (!drm_buddy_block_is_clear(tmp_block))
+					continue;
+			} else {
+				if (drm_buddy_block_is_clear(tmp_block))
+					continue;
 			}
 
-			if (drm_buddy_block_offset(node) >
-			    drm_buddy_block_offset(max_block)) {
-				max_block = node;
-			}
+			block = tmp_block;
+			break;
+		}
+
+		if (!block)
+			continue;
+
+		if (!max_block) {
+			max_block = block;
+			continue;
+		}
+
+		if (drm_buddy_block_offset(block) >
+		    drm_buddy_block_offset(max_block)) {
+			max_block = block;
 		}
 	}
 
@@ -440,11 +520,35 @@ alloc_from_freelist(struct drm_buddy *mm,
 	int err;
 
 	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-		block = get_maxblock(mm, order);
+		block = get_maxblock(mm, order, flags);
 		if (block)
 			/* Store the obtained block order */
 			tmp = drm_buddy_block_order(block);
 	} else {
+		for (tmp = order; tmp <= mm->max_order; ++tmp) {
+			struct drm_buddy_block *tmp_block;
+
+			list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
+				if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
+					/* Find a cleared block */
+					if (!drm_buddy_block_is_clear(tmp_block))
+						continue;
+				} else {
+					if (drm_buddy_block_is_clear(tmp_block))
+						continue;
+				}
+
+				block = tmp_block;
+				break;
+			}
+
+			if (block)
+				break;
+		}
+	}
+
+	if (!block) {
+		/* Fallback method */
 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
 			if (!list_empty(&mm->free_list[tmp])) {
 				block = list_last_entry(&mm->free_list[tmp],
@@ -454,10 +558,10 @@ alloc_from_freelist(struct drm_buddy *mm,
 					break;
 			}
 		}
-	}
 
-	if (!block)
-		return ERR_PTR(-ENOSPC);
+		if (!block)
+			return ERR_PTR(-ENOSPC);
+	}
 
 	BUG_ON(!drm_buddy_block_is_free(block));
 
@@ -524,6 +628,8 @@ static int __alloc_range(struct drm_buddy *mm,
 			mark_allocated(block);
 			total_allocated += drm_buddy_block_size(mm, block);
 			mm->avail -= drm_buddy_block_size(mm, block);
+			if (drm_buddy_block_is_clear(block))
+				mm->clear_avail -= drm_buddy_block_size(mm, block);
 			list_add_tail(&block->link, &allocated);
 			continue;
 		}
@@ -558,7 +664,7 @@ static int __alloc_range(struct drm_buddy *mm,
 		list_splice_tail(&allocated, blocks);
 		*total_allocated_on_err = total_allocated;
 	} else {
-		drm_buddy_free_list(mm, &allocated);
+		drm_buddy_free_list(mm, &allocated, 0);
 	}
 
 	return err;
@@ -624,11 +730,11 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 			list_splice(&blocks_lhs, blocks);
 			return 0;
 		} else if (err != -ENOSPC) {
-			drm_buddy_free_list(mm, blocks);
+			drm_buddy_free_list(mm, blocks, 0);
 			return err;
 		}
 		/* Free blocks for the next iteration */
-		drm_buddy_free_list(mm, blocks);
+		drm_buddy_free_list(mm, blocks, 0);
 	}
 
 	return -ENOSPC;
@@ -684,6 +790,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 	list_del(&block->link);
 	mark_free(mm, block);
 	mm->avail += drm_buddy_block_size(mm, block);
+	if (drm_buddy_block_is_clear(block))
+		mm->clear_avail += drm_buddy_block_size(mm, block);
 
 	/* Prevent recursively freeing this node */
 	parent = block->parent;
@@ -695,6 +803,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 	if (err) {
 		mark_allocated(block);
 		mm->avail -= drm_buddy_block_size(mm, block);
+		if (drm_buddy_block_is_clear(block))
+			mm->clear_avail -= drm_buddy_block_size(mm, block);
 		list_add(&block->link, blocks);
 	}
 
@@ -782,7 +892,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 		do {
 			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
 				/* Allocate traversing within the range */
-				block = alloc_range_bias(mm, start, end, order);
+				block = __drm_buddy_alloc_range_bias(mm, start, end,
+								     order, flags);
 			else
 				/* Allocate from freelist */
 				block = alloc_from_freelist(mm, order, flags);
@@ -808,6 +919,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 
 		mark_allocated(block);
 		mm->avail -= drm_buddy_block_size(mm, block);
+		if (drm_buddy_block_is_clear(block))
+			mm->clear_avail -= drm_buddy_block_size(mm, block);
 		kmemleak_update_trace(block);
 		list_add_tail(&block->link, &allocated);
 
@@ -846,7 +959,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 	return 0;
 
 err_free:
-	drm_buddy_free_list(mm, &allocated);
+	drm_buddy_free_list(mm, &allocated, 0);
 	return err;
 }
 EXPORT_SYMBOL(drm_buddy_alloc_blocks);
@@ -879,8 +992,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
 {
 	int order;
 
-	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n",
-		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
+	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
+		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
 
 	for (order = mm->max_order; order >= 0; order--) {
 		struct drm_buddy_block *block;
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 0d735d5c2b35..942345548bc3 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	return 0;
 
 err_free_blocks:
-	drm_buddy_free_list(mm, &bman_res->blocks);
+	drm_buddy_free_list(mm, &bman_res->blocks, 0);
 	mutex_unlock(&bman->lock);
 err_free_res:
 	ttm_resource_fini(man, &bman_res->base);
@@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 
 	mutex_lock(&bman->lock);
-	drm_buddy_free_list(&bman->mm, &bman_res->blocks);
+	drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
 	bman->visible_avail += bman_res->used_visible_size;
 	mutex_unlock(&bman->lock);
 
@@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
 	ttm_set_driver_manager(bdev, type, NULL);
 
 	mutex_lock(&bman->lock);
-	drm_buddy_free_list(mm, &bman->reserved);
+	drm_buddy_free_list(mm, &bman->reserved, 0);
 	drm_buddy_fini(mm);
 	bman->visible_avail += bman->visible_reserved;
 	WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..e0860fce9ebd 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -83,7 +83,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 							  top, max_order);
 	}
 
-	drm_buddy_free_list(&mm, &holes);
+	drm_buddy_free_list(&mm, &holes, 0);
 
 	/* Nothing larger than blocks of chunk_size now available */
 	for (order = 1; order <= max_order; order++) {
@@ -95,7 +95,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 	}
 
 	list_splice_tail(&holes, &blocks);
-	drm_buddy_free_list(&mm, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
 	drm_buddy_fini(&mm);
 }
 
@@ -190,7 +190,7 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
 
 	list_del(&block->link);
 	drm_buddy_free_block(&mm, block);
-	drm_buddy_free_list(&mm, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
 	drm_buddy_fini(&mm);
 }
 
@@ -236,7 +236,7 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
 							   size, size, &tmp, flags),
 						  "buddy_alloc unexpectedly succeeded, it should be full!");
 
-	drm_buddy_free_list(&mm, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
 	drm_buddy_fini(&mm);
 }
 
@@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
 						drm_buddy_block_size(&mm, block),
 						BIT_ULL(mm.max_order) * PAGE_SIZE);
 
-	drm_buddy_free_list(&mm, &allocated);
+	drm_buddy_free_list(&mm, &allocated, 0);
 	drm_buddy_fini(&mm);
 }
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index a5b39fc01003..f7311b59f2b0 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -6,6 +6,7 @@
 #ifndef __DRM_BUDDY_H__
 #define __DRM_BUDDY_H__
 
+#include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -25,15 +26,19 @@
 #define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
 #define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
 #define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
+#define DRM_BUDDY_CLEARED			BIT(4)
 
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
 #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
+#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
+
 #define   DRM_BUDDY_ALLOCATED	   (1 << 10)
 #define   DRM_BUDDY_FREE	   (2 << 10)
 #define   DRM_BUDDY_SPLIT	   (3 << 10)
 /* Free to be used, if needed in the future */
-#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
+#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
 #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
 	u64 header;
 
@@ -86,6 +91,7 @@ struct drm_buddy {
 	u64 chunk_size;
 	u64 size;
 	u64 avail;
+	u64 clear_avail;
 };
 
 static inline u64
@@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct drm_buddy_block *block)
 	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
 }
 
+static inline bool
+drm_buddy_block_is_clear(struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_CLEAR;
+}
+
 static inline bool
 drm_buddy_block_is_free(struct drm_buddy_block *block)
 {
@@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 
 void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
 
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
+void drm_buddy_free_list(struct drm_buddy *mm,
+			 struct list_head *objects,
+			 unsigned long flags);
 
 void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
 void drm_buddy_block_print(struct drm_buddy *mm,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v3 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-14 13:42 [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
@ 2023-12-14 13:42 ` Arunpravin Paneer Selvam
  2023-12-14 20:11   ` Felix Kuehling
  2023-12-17 19:13   ` kernel test robot
  2023-12-20 19:21 ` Matthew Auld
  2 siblings, 1 reply; 10+ messages in thread
From: Arunpravin Paneer Selvam @ 2023-12-14 13:42 UTC (permalink / raw)
  To: dri-devel, amd-gfx
  Cc: alexander.deucher, felix.kuehling, Arunpravin Paneer Selvam,
	christian.koenig, matthew.auld

Add clear page support in vram memory region.

v1:(Christian)
  - Dont handle clear page as TTM flag since when moving the BO back
    in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
    clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
    amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared(&cursor) just above the size
    calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
    in the free path to properly wait for fences etc.. (Christian)

v3:(Christian)
  - Remove buffer clear code in VRAM manager instead change the
    AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
    the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared(&cursor) check.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cef920a93924..be8bf375d823 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -598,8 +599,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 	if (!amdgpu_bo_support_uswc(bo->flags))
 		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-	if (adev->ras_enabled)
-		bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+	bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
 	bo->tbo.bdev = &adev->mman.bdev;
 	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -629,15 +629,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
 	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-		struct dma_fence *fence;
+		struct dma_fence *fence = NULL;
 
-		r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+		r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
 		if (unlikely(r))
 			goto fail_unreserve;
 
-		dma_resv_add_fence(bo->tbo.base.resv, fence,
-				   DMA_RESV_USAGE_KERNEL);
-		dma_fence_put(fence);
+		if (fence) {
+			dma_resv_add_fence(bo->tbo.base.resv, fence,
+					   DMA_RESV_USAGE_KERNEL);
+			dma_fence_put(fence);
+		}
 	}
 	if (!bp->resv)
 		amdgpu_bo_unreserve(bo);
@@ -1360,8 +1362,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 	if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
 		return;
 
-	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
+	r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
 	if (!WARN_ON(r)) {
+		struct amdgpu_vram_mgr_resource *vres;
+
+		vres = to_amdgpu_vram_mgr_resource(bo->resource);
+		vres->flags |= DRM_BUDDY_CLEARED;
 		amdgpu_bo_fence(abo, fence, false);
 		dma_fence_put(fence);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 	}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+	struct drm_buddy_block *block;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		if (!amdgpu_vram_mgr_is_cleared(block))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 05991c5c8ddb..c63510f5cb0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -383,11 +383,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
 		struct dma_fence *wipe_fence = NULL;
 
-		r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
-					false);
+		r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+				       false);
 		if (r) {
 			goto error;
 		} else if (wipe_fence) {
+			struct amdgpu_vram_mgr_resource *vres;
+
+			vres = to_amdgpu_vram_mgr_resource(bo->resource);
+			vres->flags |= DRM_BUDDY_CLEARED;
 			dma_fence_put(fence);
 			fence = wipe_fence;
 		}
@@ -2222,6 +2226,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
 	return 0;
 }
 
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+			    struct dma_resv *resv,
+			    struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_res_cursor cursor;
+	struct dma_fence *f = NULL;
+	u64 addr;
+	int r;
+
+	if (!adev->mman.buffer_funcs_enabled)
+		return -EINVAL;
+
+	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+	mutex_lock(&adev->mman.gtt_window_lock);
+	while (cursor.remaining) {
+		struct dma_fence *next = NULL;
+		u64 size;
+
+		if (amdgpu_res_cleared(&cursor)) {
+			amdgpu_res_next(&cursor, cursor.size);
+			continue;
+		}
+
+		/* Never clear more than 256MiB at once to avoid timeouts */
+		size = min(cursor.size, 256ULL << 20);
+
+		r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+					  1, ring, false, &size, &addr);
+		if (r)
+			goto err;
+
+		r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+					&next, true, true);
+		if (r)
+			goto err;
+
+		dma_fence_put(f);
+		f = next;
+
+		amdgpu_res_next(&cursor, size);
+	}
+err:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return r;
+}
+
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 65ec82141a8e..b404d89d52e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -38,8 +38,6 @@
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
 
-#define AMDGPU_POISON	0xd0bed0be
-
 extern const struct attribute_group amdgpu_vram_mgr_attr_group;
 extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
 
@@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 			       uint64_t size, bool tmz,
 			       struct dma_resv *resv,
 			       struct dma_fence **f);
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+			    struct dma_resv *resv,
+			    struct dma_fence **fence);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index d0e199cc8f17..a25d2d511877 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
 	u64 vis_usage = 0, max_bytes, min_block_size;
 	struct amdgpu_vram_mgr_resource *vres;
 	u64 size, remaining_size, lpfn, fpfn;
@@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
 		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
 
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+
 	if (fpfn || lpfn != mgr->mm.size)
 		/* Allocate blocks in desired range */
 		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@ -589,7 +593,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 
 	amdgpu_vram_mgr_do_reserve(man);
 
-	drm_buddy_free_list(mm, &vres->blocks, 0);
+	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
 	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(vis_usage, &mgr->vis_usage);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 0e04e42cf809..8478522d7366 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
 	return (u64)PAGE_SIZE << drm_buddy_block_order(block);
 }
 
+static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_is_clear(block);
+}
+
 static inline struct amdgpu_vram_mgr_resource *
 to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
 {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-14 13:42 ` [PATCH v3 2/2] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
@ 2023-12-14 20:11   ` Felix Kuehling
  0 siblings, 0 replies; 10+ messages in thread
From: Felix Kuehling @ 2023-12-14 20:11 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, dri-devel, amd-gfx
  Cc: alexander.deucher, christian.koenig, matthew.auld


On 2023-12-14 08:42, Arunpravin Paneer Selvam wrote:
> Add clear page support in vram memory region.
>
> v1:(Christian)
>    - Dont handle clear page as TTM flag since when moving the BO back
>      in from GTT again we don't need that.
>    - Make a specialized version of amdgpu_fill_buffer() which only
>      clears the VRAM areas which are not already cleared
>    - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
>      amdgpu_object.c
>
> v2:
>    - Modify the function name amdgpu_ttm_* (Alex)
>    - Drop the delayed parameter (Christian)
>    - handle amdgpu_res_cleared(&cursor) just above the size
>      calculation (Christian)
>    - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
>      in the free path to properly wait for fences etc.. (Christian)
>
> v3:(Christian)
>    - Remove buffer clear code in VRAM manager instead change the
>      AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
>      the DRM_BUDDY_CLEARED flag.
>    - Remove ! from amdgpu_res_cleared(&cursor) check.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Suggested-by: Christian König <christian.koenig@amd.com>

Acked-by: Felix Kuehling <felix.kuehling@amd.com>


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ++++---
>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 61 ++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>   6 files changed, 111 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index cef920a93924..be8bf375d823 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -39,6 +39,7 @@
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
> +#include "amdgpu_vram_mgr.h"
>   
>   /**
>    * DOC: amdgpu_object
> @@ -598,8 +599,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>   	if (!amdgpu_bo_support_uswc(bo->flags))
>   		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
>   
> -	if (adev->ras_enabled)
> -		bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
> +	bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
>   
>   	bo->tbo.bdev = &adev->mman.bdev;
>   	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
> @@ -629,15 +629,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>   
>   	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>   	    bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> -		struct dma_fence *fence;
> +		struct dma_fence *fence = NULL;
>   
> -		r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
> +		r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
>   		if (unlikely(r))
>   			goto fail_unreserve;
>   
> -		dma_resv_add_fence(bo->tbo.base.resv, fence,
> -				   DMA_RESV_USAGE_KERNEL);
> -		dma_fence_put(fence);
> +		if (fence) {
> +			dma_resv_add_fence(bo->tbo.base.resv, fence,
> +					   DMA_RESV_USAGE_KERNEL);
> +			dma_fence_put(fence);
> +		}
>   	}
>   	if (!bp->resv)
>   		amdgpu_bo_unreserve(bo);
> @@ -1360,8 +1362,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>   	if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
>   		return;
>   
> -	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
> +	r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
>   	if (!WARN_ON(r)) {
> +		struct amdgpu_vram_mgr_resource *vres;
> +
> +		vres = to_amdgpu_vram_mgr_resource(bo->resource);
> +		vres->flags |= DRM_BUDDY_CLEARED;
>   		amdgpu_bo_fence(abo, fence, false);
>   		dma_fence_put(fence);
>   	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> index 381101d2bf05..50fcd86e1033 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
>   	}
>   }
>   
> +/**
> + * amdgpu_res_cleared - check if blocks are cleared
> + *
> + * @cur: the cursor to extract the block
> + *
> + * Check if the @cur block is cleared
> + */
> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
> +{
> +	struct drm_buddy_block *block;
> +
> +	switch (cur->mem_type) {
> +	case TTM_PL_VRAM:
> +		block = cur->node;
> +
> +		if (!amdgpu_vram_mgr_is_cleared(block))
> +			return false;
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 05991c5c8ddb..c63510f5cb0f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -383,11 +383,15 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>   	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
>   		struct dma_fence *wipe_fence = NULL;
>   
> -		r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
> -					false);
> +		r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
> +				       false);
>   		if (r) {
>   			goto error;
>   		} else if (wipe_fence) {
> +			struct amdgpu_vram_mgr_resource *vres;
> +
> +			vres = to_amdgpu_vram_mgr_resource(bo->resource);
> +			vres->flags |= DRM_BUDDY_CLEARED;
>   			dma_fence_put(fence);
>   			fence = wipe_fence;
>   		}
> @@ -2222,6 +2226,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>   	return 0;
>   }
>   
> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> +			    struct dma_resv *resv,
> +			    struct dma_fence **fence)
> +{
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +	struct amdgpu_res_cursor cursor;
> +	struct dma_fence *f = NULL;
> +	u64 addr;
> +	int r;
> +
> +	if (!adev->mman.buffer_funcs_enabled)
> +		return -EINVAL;
> +
> +	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
> +
> +	mutex_lock(&adev->mman.gtt_window_lock);
> +	while (cursor.remaining) {
> +		struct dma_fence *next = NULL;
> +		u64 size;
> +
> +		if (amdgpu_res_cleared(&cursor)) {
> +			amdgpu_res_next(&cursor, cursor.size);
> +			continue;
> +		}
> +
> +		/* Never clear more than 256MiB at once to avoid timeouts */
> +		size = min(cursor.size, 256ULL << 20);
> +
> +		r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> +					  1, ring, false, &size, &addr);
> +		if (r)
> +			goto err;
> +
> +		r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> +					&next, true, true);
> +		if (r)
> +			goto err;
> +
> +		dma_fence_put(f);
> +		f = next;
> +
> +		amdgpu_res_next(&cursor, size);
> +	}
> +err:
> +	mutex_unlock(&adev->mman.gtt_window_lock);
> +	if (fence)
> +		*fence = dma_fence_get(f);
> +	dma_fence_put(f);
> +
> +	return r;
> +}
> +
>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			uint32_t src_data,
>   			struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 65ec82141a8e..b404d89d52e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -38,8 +38,6 @@
>   #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
>   
> -#define AMDGPU_POISON	0xd0bed0be
> -
>   extern const struct attribute_group amdgpu_vram_mgr_attr_group;
>   extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
>   
> @@ -155,6 +153,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>   			       uint64_t size, bool tmz,
>   			       struct dma_resv *resv,
>   			       struct dma_fence **f);
> +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
> +			    struct dma_resv *resv,
> +			    struct dma_fence **fence);
>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			uint32_t src_data,
>   			struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index d0e199cc8f17..a25d2d511877 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>   {
>   	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>   	struct amdgpu_device *adev = to_amdgpu_device(mgr);
> +	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>   	u64 vis_usage = 0, max_bytes, min_block_size;
>   	struct amdgpu_vram_mgr_resource *vres;
>   	u64 size, remaining_size, lpfn, fpfn;
> @@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>   	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>   		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>   
> +	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
> +		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> +
>   	if (fpfn || lpfn != mgr->mm.size)
>   		/* Allocate blocks in desired range */
>   		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> @@ -589,7 +593,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>   
>   	amdgpu_vram_mgr_do_reserve(man);
>   
> -	drm_buddy_free_list(mm, &vres->blocks, 0);
> +	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>   	mutex_unlock(&mgr->lock);
>   
>   	atomic64_sub(vis_usage, &mgr->vis_usage);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> index 0e04e42cf809..8478522d7366 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>   	return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>   }
>   
> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
> +{
> +	return drm_buddy_block_is_clear(block);
> +}
> +
>   static inline struct amdgpu_vram_mgr_resource *
>   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>   {

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
  2023-12-14 13:42 [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
@ 2023-12-17 19:13   ` kernel test robot
  2023-12-17 19:13   ` kernel test robot
  2023-12-20 19:21 ` Matthew Auld
  2 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2023-12-17 19:13 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, dri-devel, amd-gfx
  Cc: oe-kbuild-all, alexander.deucher, felix.kuehling,
	Arunpravin Paneer Selvam, christian.koenig, matthew.auld

Hi Arunpravin,

kernel test robot noticed the following build warnings:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on drm-intel/for-linux-next drm-intel/for-linux-next-fixes drm/drm-next drm-exynos/exynos-drm-next drm-tip/drm-tip linus/master v6.7-rc5 next-20231215]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Arunpravin-Paneer-Selvam/drm-amdgpu-Enable-clear-page-functionality/20231214-214811
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20231214134240.3183-1-Arunpravin.PaneerSelvam%40amd.com
patch subject: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
config: arc-randconfig-001-20231215 (https://download.01.org/0day-ci/archive/20231218/202312180258.cty6XurG-lkp@intel.com/config)
compiler: arc-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231218/202312180258.cty6XurG-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312180258.cty6XurG-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> scripts/kernel-doc: drivers/gpu/drm/drm_buddy.c:337: warning: Function parameter or struct member 'flags' not described in 'drm_buddy_free_list'

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
@ 2023-12-17 19:13   ` kernel test robot
  0 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2023-12-17 19:13 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, dri-devel, amd-gfx
  Cc: Arunpravin Paneer Selvam, felix.kuehling, matthew.auld,
	oe-kbuild-all, alexander.deucher, christian.koenig

Hi Arunpravin,

kernel test robot noticed the following build warnings:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on drm-intel/for-linux-next drm-intel/for-linux-next-fixes drm/drm-next drm-exynos/exynos-drm-next drm-tip/drm-tip linus/master v6.7-rc5 next-20231215]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Arunpravin-Paneer-Selvam/drm-amdgpu-Enable-clear-page-functionality/20231214-214811
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20231214134240.3183-1-Arunpravin.PaneerSelvam%40amd.com
patch subject: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
config: arc-randconfig-001-20231215 (https://download.01.org/0day-ci/archive/20231218/202312180258.cty6XurG-lkp@intel.com/config)
compiler: arc-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231218/202312180258.cty6XurG-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312180258.cty6XurG-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> scripts/kernel-doc: drivers/gpu/drm/drm_buddy.c:337: warning: Function parameter or struct member 'flags' not described in 'drm_buddy_free_list'

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
  2023-12-14 13:42 [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
  2023-12-14 13:42 ` [PATCH v3 2/2] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
  2023-12-17 19:13   ` kernel test robot
@ 2023-12-20 19:21 ` Matthew Auld
  2023-12-21 10:31   ` Arunpravin Paneer Selvam
  2024-01-30 20:30   ` Arunpravin Paneer Selvam
  2 siblings, 2 replies; 10+ messages in thread
From: Matthew Auld @ 2023-12-20 19:21 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, dri-devel, amd-gfx
  Cc: alexander.deucher, felix.kuehling, christian.koenig

Hi,

On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:
> - Add tracking clear page feature.
> 
> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>    successfully clears the blocks in the free path. On the otherhand,
>    DRM buddy marks each block as cleared.
> 
> - Track the available cleared pages size
> 
> - If driver requests cleared memory we prefer cleared memory
>    but fallback to uncleared if we can't find the cleared blocks.
>    when driver requests uncleared memory we try to use uncleared but
>    fallback to cleared memory if necessary.
> 
> - When a block gets freed we clear it and mark the freed block as cleared,
>    when there are buddies which are cleared as well we can merge them.
>    Otherwise, we prefer to keep the blocks as separated.

I was not involved, but it looks like we have also tried enabling the 
clear-on-free idea for VRAM in i915 and then also tracking that in the 
allocator, however that work unfortunately is not upstream. The code is 
open source though: 
https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300

It looks like some of the design differences there are having two 
separate free lists, so mm->clean and mm->dirty (sounds reasonable to 
me). And also the inclusion of a de-fragmentation routine, since buddy 
blocks are now not always merged back, we might choose to run the defrag 
in some cases, which also sounds reasonable. IIRC in amdgpu userspace 
can control the page-size for an allocation, so perhaps you would want 
to run it first if the allocation fails, before trying to evict stuff?

> 
> v1: (Christian)
>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>      cleared. Else, reset the clear flag for each block in the list.
> 
>    - For merging the 2 cleared blocks compare as below,
>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
> 
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> Suggested-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>   drivers/gpu/drm/drm_buddy.c                   | 169 +++++++++++++++---
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>   include/drm/drm_buddy.h                       |  18 +-
>   5 files changed, 168 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index 08916538a615..d0e199cc8f17 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>   	return 0;
>   
>   error_free_blocks:
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mutex_unlock(&mgr->lock);
>   error_fini:
>   	ttm_resource_fini(man, &vres->base);
> @@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>   
>   	amdgpu_vram_mgr_do_reserve(man);
>   
> -	drm_buddy_free_list(mm, &vres->blocks);
> +	drm_buddy_free_list(mm, &vres->blocks, 0);
>   	mutex_unlock(&mgr->lock);
>   
>   	atomic64_sub(vis_usage, &mgr->vis_usage);
> @@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
>   		kfree(rsv);
>   
>   	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
> -		drm_buddy_free_list(&mgr->mm, &rsv->allocated);
> +		drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>   		kfree(rsv);
>   	}
>   	if (!adev->gmc.is_app_apu)
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index f57e6d74fb0e..d44172f23f05 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
>   	__list_add(&block->link, node->link.prev, &node->link);
>   }
>   
> +static void clear_reset(struct drm_buddy_block *block)
> +{
> +	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
> +}
> +
> +static void mark_cleared(struct drm_buddy_block *block)
> +{
> +	block->header |= DRM_BUDDY_HEADER_CLEAR;
> +}
> +
>   static void mark_allocated(struct drm_buddy_block *block)
>   {
>   	block->header &= ~DRM_BUDDY_HEADER_STATE;
> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>   	mark_free(mm, block->left);
>   	mark_free(mm, block->right);
>   
> +	if (drm_buddy_block_is_clear(block)) {
> +		mark_cleared(block->left);
> +		mark_cleared(block->right);
> +		clear_reset(block);
> +	}
> +
>   	mark_split(block);
>   
>   	return 0;
> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>   		if (!drm_buddy_block_is_free(buddy))
>   			break;
>   
> +		if (drm_buddy_block_is_clear(block) !=
> +		    drm_buddy_block_is_clear(buddy))
> +			break;
> +
> +		if (drm_buddy_block_is_clear(block))
> +			mark_cleared(parent);
> +
>   		list_del(&buddy->link);
>   
>   		drm_block_free(mm, block);
> @@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>   {
>   	BUG_ON(!drm_buddy_block_is_allocated(block));
>   	mm->avail += drm_buddy_block_size(mm, block);
> +	if (drm_buddy_block_is_clear(block))
> +		mm->clear_avail += drm_buddy_block_size(mm, block);
> +
>   	__drm_buddy_free(mm, block);
>   }
>   EXPORT_SYMBOL(drm_buddy_free_block);
> @@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
>    * @mm: DRM buddy manager
>    * @objects: input list head to free blocks
>    */
> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
> +void drm_buddy_free_list(struct drm_buddy *mm,
> +			 struct list_head *objects,
> +			 unsigned long flags)
>   {
>   	struct drm_buddy_block *block, *on;
>   
> +	if (flags & DRM_BUDDY_CLEARED) {
> +		list_for_each_entry(block, objects, link)
> +			mark_cleared(block);
> +	} else {
> +		list_for_each_entry(block, objects, link)
> +			clear_reset(block);
> +	}
> +
>   	list_for_each_entry_safe(block, on, objects, link) {
>   		drm_buddy_free_block(mm, block);
>   		cond_resched();
> @@ -328,9 +364,11 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>   }
>   
>   static struct drm_buddy_block *
> -alloc_range_bias(struct drm_buddy *mm,
> -		 u64 start, u64 end,
> -		 unsigned int order)
> +__alloc_range_bias(struct drm_buddy *mm,
> +		   u64 start, u64 end,
> +		   unsigned int order,
> +		   unsigned long flags,
> +		   bool fallback)
>   {
>   	struct drm_buddy_block *block;
>   	struct drm_buddy_block *buddy;
> @@ -369,6 +407,15 @@ alloc_range_bias(struct drm_buddy *mm,
>   
>   		if (contains(start, end, block_start, block_end) &&
>   		    order == drm_buddy_block_order(block)) {
> +			if (!fallback) {
> +				if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
> +					if (!drm_buddy_block_is_clear(block))
> +						continue;
> +				} else {
> +					if (drm_buddy_block_is_clear(block))
> +						continue;
> +				}
> +			}
>   			/*
>   			 * Find the free block within the range.
>   			 */
> @@ -405,25 +452,58 @@ alloc_range_bias(struct drm_buddy *mm,
>   }
>   
>   static struct drm_buddy_block *
> -get_maxblock(struct drm_buddy *mm, unsigned int order)
> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
> +			     u64 start, u64 end,
> +			     unsigned int order,
> +			     unsigned long flags)
> +{
> +	struct drm_buddy_block *block;
> +	bool fallback = 0;
> +
> +	block = __alloc_range_bias(mm, start, end, order,
> +				   flags, fallback);
> +	if (IS_ERR(block))
> +		return __alloc_range_bias(mm, start, end, order,
> +					  flags, !fallback);
> +
> +	return block;
> +}
> +
> +static struct drm_buddy_block *
> +get_maxblock(struct drm_buddy *mm, unsigned int order,
> +	     unsigned long flags)
>   {
> -	struct drm_buddy_block *max_block = NULL, *node;
> +	struct drm_buddy_block *max_block = NULL, *block = NULL;
>   	unsigned int i;
>   
>   	for (i = order; i <= mm->max_order; ++i) {
> -		if (!list_empty(&mm->free_list[i])) {
> -			node = list_last_entry(&mm->free_list[i],
> -					       struct drm_buddy_block,
> -					       link);
> -			if (!max_block) {
> -				max_block = node;
> -				continue;
> +		struct drm_buddy_block *tmp_block;
> +
> +		list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
> +			if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
> +				/* Find a cleared block */
> +				if (!drm_buddy_block_is_clear(tmp_block))
> +					continue;
> +			} else {
> +				if (drm_buddy_block_is_clear(tmp_block))
> +					continue;
>   			}
>   
> -			if (drm_buddy_block_offset(node) >
> -			    drm_buddy_block_offset(max_block)) {
> -				max_block = node;
> -			}
> +			block = tmp_block;
> +			break;
> +		}
> +
> +		if (!block)
> +			continue;
> +
> +		if (!max_block) {
> +			max_block = block;
> +			continue;
> +		}
> +
> +		if (drm_buddy_block_offset(block) >
> +		    drm_buddy_block_offset(max_block)) {
> +			max_block = block;
>   		}
>   	}
>   
> @@ -440,11 +520,35 @@ alloc_from_freelist(struct drm_buddy *mm,
>   	int err;
>   
>   	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
> -		block = get_maxblock(mm, order);
> +		block = get_maxblock(mm, order, flags);
>   		if (block)
>   			/* Store the obtained block order */
>   			tmp = drm_buddy_block_order(block);
>   	} else {
> +		for (tmp = order; tmp <= mm->max_order; ++tmp) {
> +			struct drm_buddy_block *tmp_block;
> +
> +			list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
> +				if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
> +					/* Find a cleared block */
> +					if (!drm_buddy_block_is_clear(tmp_block))
> +						continue;
> +				} else {
> +					if (drm_buddy_block_is_clear(tmp_block))
> +						continue;
> +				}
> +
> +				block = tmp_block;
> +				break;
> +			}
> +
> +			if (block)
> +				break;
> +		}
> +	}
> +
> +	if (!block) {
> +		/* Fallback method */
>   		for (tmp = order; tmp <= mm->max_order; ++tmp) {
>   			if (!list_empty(&mm->free_list[tmp])) {
>   				block = list_last_entry(&mm->free_list[tmp],
> @@ -454,10 +558,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>   					break;
>   			}
>   		}
> -	}
>   
> -	if (!block)
> -		return ERR_PTR(-ENOSPC);
> +		if (!block)
> +			return ERR_PTR(-ENOSPC);
> +	}
>   
>   	BUG_ON(!drm_buddy_block_is_free(block));
>   
> @@ -524,6 +628,8 @@ static int __alloc_range(struct drm_buddy *mm,
>   			mark_allocated(block);
>   			total_allocated += drm_buddy_block_size(mm, block);
>   			mm->avail -= drm_buddy_block_size(mm, block);
> +			if (drm_buddy_block_is_clear(block))
> +				mm->clear_avail -= drm_buddy_block_size(mm, block);
>   			list_add_tail(&block->link, &allocated);
>   			continue;
>   		}
> @@ -558,7 +664,7 @@ static int __alloc_range(struct drm_buddy *mm,
>   		list_splice_tail(&allocated, blocks);
>   		*total_allocated_on_err = total_allocated;
>   	} else {
> -		drm_buddy_free_list(mm, &allocated);
> +		drm_buddy_free_list(mm, &allocated, 0);
>   	}
>   
>   	return err;
> @@ -624,11 +730,11 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>   			list_splice(&blocks_lhs, blocks);
>   			return 0;
>   		} else if (err != -ENOSPC) {
> -			drm_buddy_free_list(mm, blocks);
> +			drm_buddy_free_list(mm, blocks, 0);
>   			return err;
>   		}
>   		/* Free blocks for the next iteration */
> -		drm_buddy_free_list(mm, blocks);
> +		drm_buddy_free_list(mm, blocks, 0);
>   	}
>   
>   	return -ENOSPC;
> @@ -684,6 +790,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   	list_del(&block->link);
>   	mark_free(mm, block);
>   	mm->avail += drm_buddy_block_size(mm, block);
> +	if (drm_buddy_block_is_clear(block))
> +		mm->clear_avail += drm_buddy_block_size(mm, block);
>   
>   	/* Prevent recursively freeing this node */
>   	parent = block->parent;
> @@ -695,6 +803,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   	if (err) {
>   		mark_allocated(block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
> +		if (drm_buddy_block_is_clear(block))
> +			mm->clear_avail -= drm_buddy_block_size(mm, block);
>   		list_add(&block->link, blocks);
>   	}
>   
> @@ -782,7 +892,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   		do {
>   			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>   				/* Allocate traversing within the range */
> -				block = alloc_range_bias(mm, start, end, order);
> +				block = __drm_buddy_alloc_range_bias(mm, start, end,
> +								     order, flags);
>   			else
>   				/* Allocate from freelist */
>   				block = alloc_from_freelist(mm, order, flags);
> @@ -808,6 +919,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   
>   		mark_allocated(block);
>   		mm->avail -= drm_buddy_block_size(mm, block);
> +		if (drm_buddy_block_is_clear(block))
> +			mm->clear_avail -= drm_buddy_block_size(mm, block);
>   		kmemleak_update_trace(block);
>   		list_add_tail(&block->link, &allocated);
>   
> @@ -846,7 +959,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   	return 0;
>   
>   err_free:
> -	drm_buddy_free_list(mm, &allocated);
> +	drm_buddy_free_list(mm, &allocated, 0);
>   	return err;
>   }
>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
> @@ -879,8 +992,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
>   {
>   	int order;
>   
> -	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n",
> -		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
> +	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
> +		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
>   
>   	for (order = mm->max_order; order >= 0; order--) {
>   		struct drm_buddy_block *block;
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 0d735d5c2b35..942345548bc3 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>   	return 0;
>   
>   err_free_blocks:
> -	drm_buddy_free_list(mm, &bman_res->blocks);
> +	drm_buddy_free_list(mm, &bman_res->blocks, 0);
>   	mutex_unlock(&bman->lock);
>   err_free_res:
>   	ttm_resource_fini(man, &bman_res->base);
> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
>   	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>   
>   	mutex_lock(&bman->lock);
> -	drm_buddy_free_list(&bman->mm, &bman_res->blocks);
> +	drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>   	bman->visible_avail += bman_res->used_visible_size;
>   	mutex_unlock(&bman->lock);
>   
> @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
>   	ttm_set_driver_manager(bdev, type, NULL);
>   
>   	mutex_lock(&bman->lock);
> -	drm_buddy_free_list(mm, &bman->reserved);
> +	drm_buddy_free_list(mm, &bman->reserved, 0);
>   	drm_buddy_fini(mm);
>   	bman->visible_avail += bman->visible_reserved;
>   	WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
> index ea2af6bd9abe..e0860fce9ebd 100644
> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
> @@ -83,7 +83,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
>   							  top, max_order);
>   	}
>   
> -	drm_buddy_free_list(&mm, &holes);
> +	drm_buddy_free_list(&mm, &holes, 0);
>   
>   	/* Nothing larger than blocks of chunk_size now available */
>   	for (order = 1; order <= max_order; order++) {
> @@ -95,7 +95,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
>   	}
>   
>   	list_splice_tail(&holes, &blocks);
> -	drm_buddy_free_list(&mm, &blocks);
> +	drm_buddy_free_list(&mm, &blocks, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> @@ -190,7 +190,7 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
>   
>   	list_del(&block->link);
>   	drm_buddy_free_block(&mm, block);
> -	drm_buddy_free_list(&mm, &blocks);
> +	drm_buddy_free_list(&mm, &blocks, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> @@ -236,7 +236,7 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
>   							   size, size, &tmp, flags),
>   						  "buddy_alloc unexpectedly succeeded, it should be full!");
>   
> -	drm_buddy_free_list(&mm, &blocks);
> +	drm_buddy_free_list(&mm, &blocks, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
>   						drm_buddy_block_size(&mm, block),
>   						BIT_ULL(mm.max_order) * PAGE_SIZE);
>   
> -	drm_buddy_free_list(&mm, &allocated);
> +	drm_buddy_free_list(&mm, &allocated, 0);
>   	drm_buddy_fini(&mm);
>   }
>   
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index a5b39fc01003..f7311b59f2b0 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -6,6 +6,7 @@
>   #ifndef __DRM_BUDDY_H__
>   #define __DRM_BUDDY_H__
>   
> +#include <linux/bitfield.h>
>   #include <linux/bitops.h>
>   #include <linux/list.h>
>   #include <linux/slab.h>
> @@ -25,15 +26,19 @@
>   #define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
>   #define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
> +#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
> +#define DRM_BUDDY_CLEARED			BIT(4)
>   
>   struct drm_buddy_block {
>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
> +
>   #define   DRM_BUDDY_ALLOCATED	   (1 << 10)
>   #define   DRM_BUDDY_FREE	   (2 << 10)
>   #define   DRM_BUDDY_SPLIT	   (3 << 10)
>   /* Free to be used, if needed in the future */
> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>   	u64 header;
>   
> @@ -86,6 +91,7 @@ struct drm_buddy {
>   	u64 chunk_size;
>   	u64 size;
>   	u64 avail;
> +	u64 clear_avail;
>   };
>   
>   static inline u64
> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct drm_buddy_block *block)
>   	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>   }
>   
> +static inline bool
> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
> +{
> +	return block->header & DRM_BUDDY_HEADER_CLEAR;
> +}
> +
>   static inline bool
>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>   {
> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>   
>   void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
>   
> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
> +void drm_buddy_free_list(struct drm_buddy *mm,
> +			 struct list_head *objects,
> +			 unsigned long flags);
>   
>   void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>   void drm_buddy_block_print(struct drm_buddy *mm,

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
  2023-12-20 19:21 ` Matthew Auld
@ 2023-12-21 10:31   ` Arunpravin Paneer Selvam
  2024-01-30 20:30   ` Arunpravin Paneer Selvam
  1 sibling, 0 replies; 10+ messages in thread
From: Arunpravin Paneer Selvam @ 2023-12-21 10:31 UTC (permalink / raw)
  To: Matthew Auld, dri-devel, amd-gfx
  Cc: alexander.deucher, felix.kuehling, christian.koenig

Hi Matthew,

On 12/21/2023 12:51 AM, Matthew Auld wrote:
> Hi,
>
> On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:
>> - Add tracking clear page feature.
>>
>> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>>    successfully clears the blocks in the free path. On the otherhand,
>>    DRM buddy marks each block as cleared.
>>
>> - Track the available cleared pages size
>>
>> - If driver requests cleared memory we prefer cleared memory
>>    but fallback to uncleared if we can't find the cleared blocks.
>>    when driver requests uncleared memory we try to use uncleared but
>>    fallback to cleared memory if necessary.
>>
>> - When a block gets freed we clear it and mark the freed block as 
>> cleared,
>>    when there are buddies which are cleared as well we can merge them.
>>    Otherwise, we prefer to keep the blocks as separated.
>
> I was not involved, but it looks like we have also tried enabling the 
> clear-on-free idea for VRAM in i915 and then also tracking that in the 
> allocator, however that work unfortunately is not upstream. The code 
> is open source though: 
> https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300
>
> It looks like some of the design differences there are having two 
> separate free lists, so mm->clean and mm->dirty (sounds reasonable to 
> me). And also the inclusion of a de-fragmentation routine, since buddy 
> blocks are now not always merged back, we might choose to run the 
> defrag in some cases, which also sounds reasonable. IIRC in amdgpu 
> userspace can control the page-size for an allocation, so perhaps you 
> would want to run it first if the allocation fails, before trying to 
> evict stuff?
Thanks, I will check the code.

Regards,
Arun.
>
>>
>> v1: (Christian)
>>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>>      cleared. Else, reset the clear flag for each block in the list.
>>
>>    - For merging the 2 cleared blocks compare as below,
>>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>>   drivers/gpu/drm/drm_buddy.c                   | 169 +++++++++++++++---
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>>   include/drm/drm_buddy.h                       |  18 +-
>>   5 files changed, 168 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index 08916538a615..d0e199cc8f17 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     error_free_blocks:
>> -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>   error_fini:
>>       ttm_resource_fini(man, &vres->base);
>> @@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
>> ttm_resource_manager *man,
>>         amdgpu_vram_mgr_do_reserve(man);
>>   -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>> @@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
>> *adev)
>>           kfree(rsv);
>>         list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, 
>> blocks) {
>> -        drm_buddy_free_list(&mgr->mm, &rsv->allocated);
>> +        drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>>           kfree(rsv);
>>       }
>>       if (!adev->gmc.is_app_apu)
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index f57e6d74fb0e..d44172f23f05 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
>>       __list_add(&block->link, node->link.prev, &node->link);
>>   }
>>   +static void clear_reset(struct drm_buddy_block *block)
>> +{
>> +    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>> +static void mark_cleared(struct drm_buddy_block *block)
>> +{
>> +    block->header |= DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>>   static void mark_allocated(struct drm_buddy_block *block)
>>   {
>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>>       mark_free(mm, block->left);
>>       mark_free(mm, block->right);
>>   +    if (drm_buddy_block_is_clear(block)) {
>> +        mark_cleared(block->left);
>> +        mark_cleared(block->right);
>> +        clear_reset(block);
>> +    }
>> +
>>       mark_split(block);
>>         return 0;
>> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>>           if (!drm_buddy_block_is_free(buddy))
>>               break;
>>   +        if (drm_buddy_block_is_clear(block) !=
>> +            drm_buddy_block_is_clear(buddy))
>> +            break;
>> +
>> +        if (drm_buddy_block_is_clear(block))
>> +            mark_cleared(parent);
>> +
>>           list_del(&buddy->link);
>>             drm_block_free(mm, block);
>> @@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>   {
>>       BUG_ON(!drm_buddy_block_is_allocated(block));
>>       mm->avail += drm_buddy_block_size(mm, block);
>> +    if (drm_buddy_block_is_clear(block))
>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>> +
>>       __drm_buddy_free(mm, block);
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_block);
>> @@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
>>    * @mm: DRM buddy manager
>>    * @objects: input list head to free blocks
>>    */
>> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>> *objects)
>> +void drm_buddy_free_list(struct drm_buddy *mm,
>> +             struct list_head *objects,
>> +             unsigned long flags)
>>   {
>>       struct drm_buddy_block *block, *on;
>>   +    if (flags & DRM_BUDDY_CLEARED) {
>> +        list_for_each_entry(block, objects, link)
>> +            mark_cleared(block);
>> +    } else {
>> +        list_for_each_entry(block, objects, link)
>> +            clear_reset(block);
>> +    }
>> +
>>       list_for_each_entry_safe(block, on, objects, link) {
>>           drm_buddy_free_block(mm, block);
>>           cond_resched();
>> @@ -328,9 +364,11 @@ static inline bool contains(u64 s1, u64 e1, u64 
>> s2, u64 e2)
>>   }
>>     static struct drm_buddy_block *
>> -alloc_range_bias(struct drm_buddy *mm,
>> -         u64 start, u64 end,
>> -         unsigned int order)
>> +__alloc_range_bias(struct drm_buddy *mm,
>> +           u64 start, u64 end,
>> +           unsigned int order,
>> +           unsigned long flags,
>> +           bool fallback)
>>   {
>>       struct drm_buddy_block *block;
>>       struct drm_buddy_block *buddy;
>> @@ -369,6 +407,15 @@ alloc_range_bias(struct drm_buddy *mm,
>>             if (contains(start, end, block_start, block_end) &&
>>               order == drm_buddy_block_order(block)) {
>> +            if (!fallback) {
>> +                if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>> +                    if (!drm_buddy_block_is_clear(block))
>> +                        continue;
>> +                } else {
>> +                    if (drm_buddy_block_is_clear(block))
>> +                        continue;
>> +                }
>> +            }
>>               /*
>>                * Find the free block within the range.
>>                */
>> @@ -405,25 +452,58 @@ alloc_range_bias(struct drm_buddy *mm,
>>   }
>>     static struct drm_buddy_block *
>> -get_maxblock(struct drm_buddy *mm, unsigned int order)
>> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>> +                 u64 start, u64 end,
>> +                 unsigned int order,
>> +                 unsigned long flags)
>> +{
>> +    struct drm_buddy_block *block;
>> +    bool fallback = 0;
>> +
>> +    block = __alloc_range_bias(mm, start, end, order,
>> +                   flags, fallback);
>> +    if (IS_ERR(block))
>> +        return __alloc_range_bias(mm, start, end, order,
>> +                      flags, !fallback);
>> +
>> +    return block;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +get_maxblock(struct drm_buddy *mm, unsigned int order,
>> +         unsigned long flags)
>>   {
>> -    struct drm_buddy_block *max_block = NULL, *node;
>> +    struct drm_buddy_block *max_block = NULL, *block = NULL;
>>       unsigned int i;
>>         for (i = order; i <= mm->max_order; ++i) {
>> -        if (!list_empty(&mm->free_list[i])) {
>> -            node = list_last_entry(&mm->free_list[i],
>> -                           struct drm_buddy_block,
>> -                           link);
>> -            if (!max_block) {
>> -                max_block = node;
>> -                continue;
>> +        struct drm_buddy_block *tmp_block;
>> +
>> +        list_for_each_entry_reverse(tmp_block, &mm->free_list[i], 
>> link) {
>> +            if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>> +                /* Find a cleared block */
>> +                if (!drm_buddy_block_is_clear(tmp_block))
>> +                    continue;
>> +            } else {
>> +                if (drm_buddy_block_is_clear(tmp_block))
>> +                    continue;
>>               }
>>   -            if (drm_buddy_block_offset(node) >
>> -                drm_buddy_block_offset(max_block)) {
>> -                max_block = node;
>> -            }
>> +            block = tmp_block;
>> +            break;
>> +        }
>> +
>> +        if (!block)
>> +            continue;
>> +
>> +        if (!max_block) {
>> +            max_block = block;
>> +            continue;
>> +        }
>> +
>> +        if (drm_buddy_block_offset(block) >
>> +            drm_buddy_block_offset(max_block)) {
>> +            max_block = block;
>>           }
>>       }
>>   @@ -440,11 +520,35 @@ alloc_from_freelist(struct drm_buddy *mm,
>>       int err;
>>         if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>> -        block = get_maxblock(mm, order);
>> +        block = get_maxblock(mm, order, flags);
>>           if (block)
>>               /* Store the obtained block order */
>>               tmp = drm_buddy_block_order(block);
>>       } else {
>> +        for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> +            struct drm_buddy_block *tmp_block;
>> +
>> +            list_for_each_entry_reverse(tmp_block, 
>> &mm->free_list[tmp], link) {
>> +                if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>> +                    /* Find a cleared block */
>> +                    if (!drm_buddy_block_is_clear(tmp_block))
>> +                        continue;
>> +                } else {
>> +                    if (drm_buddy_block_is_clear(tmp_block))
>> +                        continue;
>> +                }
>> +
>> +                block = tmp_block;
>> +                break;
>> +            }
>> +
>> +            if (block)
>> +                break;
>> +        }
>> +    }
>> +
>> +    if (!block) {
>> +        /* Fallback method */
>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>               if (!list_empty(&mm->free_list[tmp])) {
>>                   block = list_last_entry(&mm->free_list[tmp],
>> @@ -454,10 +558,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>>                       break;
>>               }
>>           }
>> -    }
>>   -    if (!block)
>> -        return ERR_PTR(-ENOSPC);
>> +        if (!block)
>> +            return ERR_PTR(-ENOSPC);
>> +    }
>>         BUG_ON(!drm_buddy_block_is_free(block));
>>   @@ -524,6 +628,8 @@ static int __alloc_range(struct drm_buddy *mm,
>>               mark_allocated(block);
>>               total_allocated += drm_buddy_block_size(mm, block);
>>               mm->avail -= drm_buddy_block_size(mm, block);
>> +            if (drm_buddy_block_is_clear(block))
>> +                mm->clear_avail -= drm_buddy_block_size(mm, block);
>>               list_add_tail(&block->link, &allocated);
>>               continue;
>>           }
>> @@ -558,7 +664,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>           list_splice_tail(&allocated, blocks);
>>           *total_allocated_on_err = total_allocated;
>>       } else {
>> -        drm_buddy_free_list(mm, &allocated);
>> +        drm_buddy_free_list(mm, &allocated, 0);
>>       }
>>         return err;
>> @@ -624,11 +730,11 @@ static int __alloc_contig_try_harder(struct 
>> drm_buddy *mm,
>>               list_splice(&blocks_lhs, blocks);
>>               return 0;
>>           } else if (err != -ENOSPC) {
>> -            drm_buddy_free_list(mm, blocks);
>> +            drm_buddy_free_list(mm, blocks, 0);
>>               return err;
>>           }
>>           /* Free blocks for the next iteration */
>> -        drm_buddy_free_list(mm, blocks);
>> +        drm_buddy_free_list(mm, blocks, 0);
>>       }
>>         return -ENOSPC;
>> @@ -684,6 +790,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>       list_del(&block->link);
>>       mark_free(mm, block);
>>       mm->avail += drm_buddy_block_size(mm, block);
>> +    if (drm_buddy_block_is_clear(block))
>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>         /* Prevent recursively freeing this node */
>>       parent = block->parent;
>> @@ -695,6 +803,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>       if (err) {
>>           mark_allocated(block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>> +        if (drm_buddy_block_is_clear(block))
>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>           list_add(&block->link, blocks);
>>       }
>>   @@ -782,7 +892,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>           do {
>>               if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>>                   /* Allocate traversing within the range */
>> -                block = alloc_range_bias(mm, start, end, order);
>> +                block = __drm_buddy_alloc_range_bias(mm, start, end,
>> +                                     order, flags);
>>               else
>>                   /* Allocate from freelist */
>>                   block = alloc_from_freelist(mm, order, flags);
>> @@ -808,6 +919,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>             mark_allocated(block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>> +        if (drm_buddy_block_is_clear(block))
>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>           kmemleak_update_trace(block);
>>           list_add_tail(&block->link, &allocated);
>>   @@ -846,7 +959,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>       return 0;
>>     err_free:
>> -    drm_buddy_free_list(mm, &allocated);
>> +    drm_buddy_free_list(mm, &allocated, 0);
>>       return err;
>>   }
>>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>> @@ -879,8 +992,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct 
>> drm_printer *p)
>>   {
>>       int order;
>>   -    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>> %lluMiB\n",
>> -           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
>> +    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>> %lluMiB, clear_free: %lluMiB\n",
>> +           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, 
>> mm->clear_avail >> 20);
>>         for (order = mm->max_order; order >= 0; order--) {
>>           struct drm_buddy_block *block;
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
>> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index 0d735d5c2b35..942345548bc3 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     err_free_blocks:
>> -    drm_buddy_free_list(mm, &bman_res->blocks);
>> +    drm_buddy_free_list(mm, &bman_res->blocks, 0);
>>       mutex_unlock(&bman->lock);
>>   err_free_res:
>>       ttm_resource_fini(man, &bman_res->base);
>> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct 
>> ttm_resource_manager *man,
>>       struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>         mutex_lock(&bman->lock);
>> -    drm_buddy_free_list(&bman->mm, &bman_res->blocks);
>> +    drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>>       bman->visible_avail += bman_res->used_visible_size;
>>       mutex_unlock(&bman->lock);
>>   @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device 
>> *bdev, unsigned int type)
>>       ttm_set_driver_manager(bdev, type, NULL);
>>         mutex_lock(&bman->lock);
>> -    drm_buddy_free_list(mm, &bman->reserved);
>> +    drm_buddy_free_list(mm, &bman->reserved, 0);
>>       drm_buddy_fini(mm);
>>       bman->visible_avail += bman->visible_reserved;
>>       WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
>> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
>> b/drivers/gpu/drm/tests/drm_buddy_test.c
>> index ea2af6bd9abe..e0860fce9ebd 100644
>> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
>> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
>> @@ -83,7 +83,7 @@ static void 
>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>                                 top, max_order);
>>       }
>>   -    drm_buddy_free_list(&mm, &holes);
>> +    drm_buddy_free_list(&mm, &holes, 0);
>>         /* Nothing larger than blocks of chunk_size now available */
>>       for (order = 1; order <= max_order; order++) {
>> @@ -95,7 +95,7 @@ static void 
>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>       }
>>         list_splice_tail(&holes, &blocks);
>> -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -190,7 +190,7 @@ static void 
>> drm_test_buddy_alloc_pessimistic(struct kunit *test)
>>         list_del(&block->link);
>>       drm_buddy_free_block(&mm, block);
>> -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -236,7 +236,7 @@ static void 
>> drm_test_buddy_alloc_optimistic(struct kunit *test)
>>                                  size, size, &tmp, flags),
>>                             "buddy_alloc unexpectedly succeeded, it 
>> should be full!");
>>   -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct 
>> kunit *test)
>>                           drm_buddy_block_size(&mm, block),
>>                           BIT_ULL(mm.max_order) * PAGE_SIZE);
>>   -    drm_buddy_free_list(&mm, &allocated);
>> +    drm_buddy_free_list(&mm, &allocated, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index a5b39fc01003..f7311b59f2b0 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -6,6 +6,7 @@
>>   #ifndef __DRM_BUDDY_H__
>>   #define __DRM_BUDDY_H__
>>   +#include <linux/bitfield.h>
>>   #include <linux/bitops.h>
>>   #include <linux/list.h>
>>   #include <linux/slab.h>
>> @@ -25,15 +26,19 @@
>>   #define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>   #define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>> +#define DRM_BUDDY_CLEAR_ALLOCATION        BIT(3)
>> +#define DRM_BUDDY_CLEARED            BIT(4)
>>     struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
>> +
>>   #define   DRM_BUDDY_ALLOCATED       (1 << 10)
>>   #define   DRM_BUDDY_FREE       (2 << 10)
>>   #define   DRM_BUDDY_SPLIT       (3 << 10)
>>   /* Free to be used, if needed in the future */
>> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
>> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>>       u64 header;
>>   @@ -86,6 +91,7 @@ struct drm_buddy {
>>       u64 chunk_size;
>>       u64 size;
>>       u64 avail;
>> +    u64 clear_avail;
>>   };
>>     static inline u64
>> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct 
>> drm_buddy_block *block)
>>       return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>>   }
>>   +static inline bool
>> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
>> +{
>> +    return block->header & DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>>   static inline bool
>>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>>   {
>> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>     void drm_buddy_free_block(struct drm_buddy *mm, struct 
>> drm_buddy_block *block);
>>   -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>> *objects);
>> +void drm_buddy_free_list(struct drm_buddy *mm,
>> +             struct list_head *objects,
>> +             unsigned long flags);
>>     void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>   void drm_buddy_block_print(struct drm_buddy *mm,


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
  2023-12-20 19:21 ` Matthew Auld
  2023-12-21 10:31   ` Arunpravin Paneer Selvam
@ 2024-01-30 20:30   ` Arunpravin Paneer Selvam
  2024-01-31 18:29     ` Matthew Auld
  1 sibling, 1 reply; 10+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-01-30 20:30 UTC (permalink / raw)
  To: Matthew Auld, dri-devel, amd-gfx
  Cc: alexander.deucher, felix.kuehling, christian.koenig

Hi Matthew,

On 12/21/2023 12:51 AM, Matthew Auld wrote:
> Hi,
>
> On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:
>> - Add tracking clear page feature.
>>
>> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>>    successfully clears the blocks in the free path. On the otherhand,
>>    DRM buddy marks each block as cleared.
>>
>> - Track the available cleared pages size
>>
>> - If driver requests cleared memory we prefer cleared memory
>>    but fallback to uncleared if we can't find the cleared blocks.
>>    when driver requests uncleared memory we try to use uncleared but
>>    fallback to cleared memory if necessary.
>>
>> - When a block gets freed we clear it and mark the freed block as 
>> cleared,
>>    when there are buddies which are cleared as well we can merge them.
>>    Otherwise, we prefer to keep the blocks as separated.
>
> I was not involved, but it looks like we have also tried enabling the 
> clear-on-free idea for VRAM in i915 and then also tracking that in the 
> allocator, however that work unfortunately is not upstream. The code 
> is open source though: 
> https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300
>
> It looks like some of the design differences there are having two 
> separate free lists, so mm->clean and mm->dirty (sounds reasonable to 
> me). And also the inclusion of a de-fragmentation routine, since buddy 
> blocks are now not always merged back, we might choose to run the 
> defrag in some cases, which also sounds reasonable. IIRC in amdgpu 
> userspace can control the page-size for an allocation, so perhaps you 
> would want to run it first if the allocation fails, before trying to 
> evict stuff?
I checked the clear-on-free idea implemented in i915. In amdgpu version, 
we are clearing all the blocks in amdgpu free routine and DRM buddy 
expects only the DRM_BUDDY_CLEARED flag. Basically, we are keeping the 
cleared blocks ready to be allocated when the user request for the 
cleared memory. We observed that this improves the performance on games 
and resolves the stutter issues as well. I see i915 active fences part 
does the same job for i915. Could we move this part into i915 free 
routine and set the DRM_BUDDY_CLEARED flag.

On de-fragmentation , I have included a function which can be called at 
places where we get -ENOSPC. This routine will merge back the clear and 
dirty blocks together to form a larger block of requested size. I am 
wondering where we could use this routine as for the non-contiguous 
memory we have the fallback method and for the contiguous memory we have 
the try harder method which searches through the tree.

I agree we can have 2 lists (clear list and dirty list) and this would 
reduce the search iterations. But we need to handle the 2 lists design 
in all the functions which might require more time for testing on all 
platforms. Could we just go ahead with 1 list (free list) for now and I 
am going to take up this work as my next task.

Thanks,
Arun.
>
>>
>> v1: (Christian)
>>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>>      cleared. Else, reset the clear flag for each block in the list.
>>
>>    - For merging the 2 cleared blocks compare as below,
>>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
>>
>> Signed-off-by: Arunpravin Paneer Selvam 
>> <Arunpravin.PaneerSelvam@amd.com>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>>   drivers/gpu/drm/drm_buddy.c                   | 169 +++++++++++++++---
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>>   include/drm/drm_buddy.h                       |  18 +-
>>   5 files changed, 168 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index 08916538a615..d0e199cc8f17 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     error_free_blocks:
>> -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>   error_fini:
>>       ttm_resource_fini(man, &vres->base);
>> @@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
>> ttm_resource_manager *man,
>>         amdgpu_vram_mgr_do_reserve(man);
>>   -    drm_buddy_free_list(mm, &vres->blocks);
>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>       mutex_unlock(&mgr->lock);
>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>> @@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
>> *adev)
>>           kfree(rsv);
>>         list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, 
>> blocks) {
>> -        drm_buddy_free_list(&mgr->mm, &rsv->allocated);
>> +        drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>>           kfree(rsv);
>>       }
>>       if (!adev->gmc.is_app_apu)
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index f57e6d74fb0e..d44172f23f05 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
>>       __list_add(&block->link, node->link.prev, &node->link);
>>   }
>>   +static void clear_reset(struct drm_buddy_block *block)
>> +{
>> +    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>> +static void mark_cleared(struct drm_buddy_block *block)
>> +{
>> +    block->header |= DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>>   static void mark_allocated(struct drm_buddy_block *block)
>>   {
>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>>       mark_free(mm, block->left);
>>       mark_free(mm, block->right);
>>   +    if (drm_buddy_block_is_clear(block)) {
>> +        mark_cleared(block->left);
>> +        mark_cleared(block->right);
>> +        clear_reset(block);
>> +    }
>> +
>>       mark_split(block);
>>         return 0;
>> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>>           if (!drm_buddy_block_is_free(buddy))
>>               break;
>>   +        if (drm_buddy_block_is_clear(block) !=
>> +            drm_buddy_block_is_clear(buddy))
>> +            break;
>> +
>> +        if (drm_buddy_block_is_clear(block))
>> +            mark_cleared(parent);
>> +
>>           list_del(&buddy->link);
>>             drm_block_free(mm, block);
>> @@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>   {
>>       BUG_ON(!drm_buddy_block_is_allocated(block));
>>       mm->avail += drm_buddy_block_size(mm, block);
>> +    if (drm_buddy_block_is_clear(block))
>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>> +
>>       __drm_buddy_free(mm, block);
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_block);
>> @@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
>>    * @mm: DRM buddy manager
>>    * @objects: input list head to free blocks
>>    */
>> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>> *objects)
>> +void drm_buddy_free_list(struct drm_buddy *mm,
>> +             struct list_head *objects,
>> +             unsigned long flags)
>>   {
>>       struct drm_buddy_block *block, *on;
>>   +    if (flags & DRM_BUDDY_CLEARED) {
>> +        list_for_each_entry(block, objects, link)
>> +            mark_cleared(block);
>> +    } else {
>> +        list_for_each_entry(block, objects, link)
>> +            clear_reset(block);
>> +    }
>> +
>>       list_for_each_entry_safe(block, on, objects, link) {
>>           drm_buddy_free_block(mm, block);
>>           cond_resched();
>> @@ -328,9 +364,11 @@ static inline bool contains(u64 s1, u64 e1, u64 
>> s2, u64 e2)
>>   }
>>     static struct drm_buddy_block *
>> -alloc_range_bias(struct drm_buddy *mm,
>> -         u64 start, u64 end,
>> -         unsigned int order)
>> +__alloc_range_bias(struct drm_buddy *mm,
>> +           u64 start, u64 end,
>> +           unsigned int order,
>> +           unsigned long flags,
>> +           bool fallback)
>>   {
>>       struct drm_buddy_block *block;
>>       struct drm_buddy_block *buddy;
>> @@ -369,6 +407,15 @@ alloc_range_bias(struct drm_buddy *mm,
>>             if (contains(start, end, block_start, block_end) &&
>>               order == drm_buddy_block_order(block)) {
>> +            if (!fallback) {
>> +                if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>> +                    if (!drm_buddy_block_is_clear(block))
>> +                        continue;
>> +                } else {
>> +                    if (drm_buddy_block_is_clear(block))
>> +                        continue;
>> +                }
>> +            }
>>               /*
>>                * Find the free block within the range.
>>                */
>> @@ -405,25 +452,58 @@ alloc_range_bias(struct drm_buddy *mm,
>>   }
>>     static struct drm_buddy_block *
>> -get_maxblock(struct drm_buddy *mm, unsigned int order)
>> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>> +                 u64 start, u64 end,
>> +                 unsigned int order,
>> +                 unsigned long flags)
>> +{
>> +    struct drm_buddy_block *block;
>> +    bool fallback = 0;
>> +
>> +    block = __alloc_range_bias(mm, start, end, order,
>> +                   flags, fallback);
>> +    if (IS_ERR(block))
>> +        return __alloc_range_bias(mm, start, end, order,
>> +                      flags, !fallback);
>> +
>> +    return block;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +get_maxblock(struct drm_buddy *mm, unsigned int order,
>> +         unsigned long flags)
>>   {
>> -    struct drm_buddy_block *max_block = NULL, *node;
>> +    struct drm_buddy_block *max_block = NULL, *block = NULL;
>>       unsigned int i;
>>         for (i = order; i <= mm->max_order; ++i) {
>> -        if (!list_empty(&mm->free_list[i])) {
>> -            node = list_last_entry(&mm->free_list[i],
>> -                           struct drm_buddy_block,
>> -                           link);
>> -            if (!max_block) {
>> -                max_block = node;
>> -                continue;
>> +        struct drm_buddy_block *tmp_block;
>> +
>> +        list_for_each_entry_reverse(tmp_block, &mm->free_list[i], 
>> link) {
>> +            if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>> +                /* Find a cleared block */
>> +                if (!drm_buddy_block_is_clear(tmp_block))
>> +                    continue;
>> +            } else {
>> +                if (drm_buddy_block_is_clear(tmp_block))
>> +                    continue;
>>               }
>>   -            if (drm_buddy_block_offset(node) >
>> -                drm_buddy_block_offset(max_block)) {
>> -                max_block = node;
>> -            }
>> +            block = tmp_block;
>> +            break;
>> +        }
>> +
>> +        if (!block)
>> +            continue;
>> +
>> +        if (!max_block) {
>> +            max_block = block;
>> +            continue;
>> +        }
>> +
>> +        if (drm_buddy_block_offset(block) >
>> +            drm_buddy_block_offset(max_block)) {
>> +            max_block = block;
>>           }
>>       }
>>   @@ -440,11 +520,35 @@ alloc_from_freelist(struct drm_buddy *mm,
>>       int err;
>>         if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>> -        block = get_maxblock(mm, order);
>> +        block = get_maxblock(mm, order, flags);
>>           if (block)
>>               /* Store the obtained block order */
>>               tmp = drm_buddy_block_order(block);
>>       } else {
>> +        for (tmp = order; tmp <= mm->max_order; ++tmp) {
>> +            struct drm_buddy_block *tmp_block;
>> +
>> +            list_for_each_entry_reverse(tmp_block, 
>> &mm->free_list[tmp], link) {
>> +                if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>> +                    /* Find a cleared block */
>> +                    if (!drm_buddy_block_is_clear(tmp_block))
>> +                        continue;
>> +                } else {
>> +                    if (drm_buddy_block_is_clear(tmp_block))
>> +                        continue;
>> +                }
>> +
>> +                block = tmp_block;
>> +                break;
>> +            }
>> +
>> +            if (block)
>> +                break;
>> +        }
>> +    }
>> +
>> +    if (!block) {
>> +        /* Fallback method */
>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>               if (!list_empty(&mm->free_list[tmp])) {
>>                   block = list_last_entry(&mm->free_list[tmp],
>> @@ -454,10 +558,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>>                       break;
>>               }
>>           }
>> -    }
>>   -    if (!block)
>> -        return ERR_PTR(-ENOSPC);
>> +        if (!block)
>> +            return ERR_PTR(-ENOSPC);
>> +    }
>>         BUG_ON(!drm_buddy_block_is_free(block));
>>   @@ -524,6 +628,8 @@ static int __alloc_range(struct drm_buddy *mm,
>>               mark_allocated(block);
>>               total_allocated += drm_buddy_block_size(mm, block);
>>               mm->avail -= drm_buddy_block_size(mm, block);
>> +            if (drm_buddy_block_is_clear(block))
>> +                mm->clear_avail -= drm_buddy_block_size(mm, block);
>>               list_add_tail(&block->link, &allocated);
>>               continue;
>>           }
>> @@ -558,7 +664,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>           list_splice_tail(&allocated, blocks);
>>           *total_allocated_on_err = total_allocated;
>>       } else {
>> -        drm_buddy_free_list(mm, &allocated);
>> +        drm_buddy_free_list(mm, &allocated, 0);
>>       }
>>         return err;
>> @@ -624,11 +730,11 @@ static int __alloc_contig_try_harder(struct 
>> drm_buddy *mm,
>>               list_splice(&blocks_lhs, blocks);
>>               return 0;
>>           } else if (err != -ENOSPC) {
>> -            drm_buddy_free_list(mm, blocks);
>> +            drm_buddy_free_list(mm, blocks, 0);
>>               return err;
>>           }
>>           /* Free blocks for the next iteration */
>> -        drm_buddy_free_list(mm, blocks);
>> +        drm_buddy_free_list(mm, blocks, 0);
>>       }
>>         return -ENOSPC;
>> @@ -684,6 +790,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>       list_del(&block->link);
>>       mark_free(mm, block);
>>       mm->avail += drm_buddy_block_size(mm, block);
>> +    if (drm_buddy_block_is_clear(block))
>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>         /* Prevent recursively freeing this node */
>>       parent = block->parent;
>> @@ -695,6 +803,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>       if (err) {
>>           mark_allocated(block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>> +        if (drm_buddy_block_is_clear(block))
>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>           list_add(&block->link, blocks);
>>       }
>>   @@ -782,7 +892,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>           do {
>>               if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>>                   /* Allocate traversing within the range */
>> -                block = alloc_range_bias(mm, start, end, order);
>> +                block = __drm_buddy_alloc_range_bias(mm, start, end,
>> +                                     order, flags);
>>               else
>>                   /* Allocate from freelist */
>>                   block = alloc_from_freelist(mm, order, flags);
>> @@ -808,6 +919,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>             mark_allocated(block);
>>           mm->avail -= drm_buddy_block_size(mm, block);
>> +        if (drm_buddy_block_is_clear(block))
>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>           kmemleak_update_trace(block);
>>           list_add_tail(&block->link, &allocated);
>>   @@ -846,7 +959,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>       return 0;
>>     err_free:
>> -    drm_buddy_free_list(mm, &allocated);
>> +    drm_buddy_free_list(mm, &allocated, 0);
>>       return err;
>>   }
>>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>> @@ -879,8 +992,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct 
>> drm_printer *p)
>>   {
>>       int order;
>>   -    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>> %lluMiB\n",
>> -           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
>> +    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>> %lluMiB, clear_free: %lluMiB\n",
>> +           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, 
>> mm->clear_avail >> 20);
>>         for (order = mm->max_order; order >= 0; order--) {
>>           struct drm_buddy_block *block;
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
>> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index 0d735d5c2b35..942345548bc3 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct 
>> ttm_resource_manager *man,
>>       return 0;
>>     err_free_blocks:
>> -    drm_buddy_free_list(mm, &bman_res->blocks);
>> +    drm_buddy_free_list(mm, &bman_res->blocks, 0);
>>       mutex_unlock(&bman->lock);
>>   err_free_res:
>>       ttm_resource_fini(man, &bman_res->base);
>> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct 
>> ttm_resource_manager *man,
>>       struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>         mutex_lock(&bman->lock);
>> -    drm_buddy_free_list(&bman->mm, &bman_res->blocks);
>> +    drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>>       bman->visible_avail += bman_res->used_visible_size;
>>       mutex_unlock(&bman->lock);
>>   @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device 
>> *bdev, unsigned int type)
>>       ttm_set_driver_manager(bdev, type, NULL);
>>         mutex_lock(&bman->lock);
>> -    drm_buddy_free_list(mm, &bman->reserved);
>> +    drm_buddy_free_list(mm, &bman->reserved, 0);
>>       drm_buddy_fini(mm);
>>       bman->visible_avail += bman->visible_reserved;
>>       WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
>> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
>> b/drivers/gpu/drm/tests/drm_buddy_test.c
>> index ea2af6bd9abe..e0860fce9ebd 100644
>> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
>> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
>> @@ -83,7 +83,7 @@ static void 
>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>                                 top, max_order);
>>       }
>>   -    drm_buddy_free_list(&mm, &holes);
>> +    drm_buddy_free_list(&mm, &holes, 0);
>>         /* Nothing larger than blocks of chunk_size now available */
>>       for (order = 1; order <= max_order; order++) {
>> @@ -95,7 +95,7 @@ static void 
>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>       }
>>         list_splice_tail(&holes, &blocks);
>> -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -190,7 +190,7 @@ static void 
>> drm_test_buddy_alloc_pessimistic(struct kunit *test)
>>         list_del(&block->link);
>>       drm_buddy_free_block(&mm, block);
>> -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -236,7 +236,7 @@ static void 
>> drm_test_buddy_alloc_optimistic(struct kunit *test)
>>                                  size, size, &tmp, flags),
>>                             "buddy_alloc unexpectedly succeeded, it 
>> should be full!");
>>   -    drm_buddy_free_list(&mm, &blocks);
>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct 
>> kunit *test)
>>                           drm_buddy_block_size(&mm, block),
>>                           BIT_ULL(mm.max_order) * PAGE_SIZE);
>>   -    drm_buddy_free_list(&mm, &allocated);
>> +    drm_buddy_free_list(&mm, &allocated, 0);
>>       drm_buddy_fini(&mm);
>>   }
>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index a5b39fc01003..f7311b59f2b0 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -6,6 +6,7 @@
>>   #ifndef __DRM_BUDDY_H__
>>   #define __DRM_BUDDY_H__
>>   +#include <linux/bitfield.h>
>>   #include <linux/bitops.h>
>>   #include <linux/list.h>
>>   #include <linux/slab.h>
>> @@ -25,15 +26,19 @@
>>   #define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>   #define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>> +#define DRM_BUDDY_CLEAR_ALLOCATION        BIT(3)
>> +#define DRM_BUDDY_CLEARED            BIT(4)
>>     struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
>> +
>>   #define   DRM_BUDDY_ALLOCATED       (1 << 10)
>>   #define   DRM_BUDDY_FREE       (2 << 10)
>>   #define   DRM_BUDDY_SPLIT       (3 << 10)
>>   /* Free to be used, if needed in the future */
>> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
>> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>>       u64 header;
>>   @@ -86,6 +91,7 @@ struct drm_buddy {
>>       u64 chunk_size;
>>       u64 size;
>>       u64 avail;
>> +    u64 clear_avail;
>>   };
>>     static inline u64
>> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct 
>> drm_buddy_block *block)
>>       return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>>   }
>>   +static inline bool
>> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
>> +{
>> +    return block->header & DRM_BUDDY_HEADER_CLEAR;
>> +}
>> +
>>   static inline bool
>>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>>   {
>> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>     void drm_buddy_free_block(struct drm_buddy *mm, struct 
>> drm_buddy_block *block);
>>   -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>> *objects);
>> +void drm_buddy_free_list(struct drm_buddy *mm,
>> +             struct list_head *objects,
>> +             unsigned long flags);
>>     void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>   void drm_buddy_block_print(struct drm_buddy *mm,


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
  2024-01-30 20:30   ` Arunpravin Paneer Selvam
@ 2024-01-31 18:29     ` Matthew Auld
  2024-02-08 15:47       ` Arunpravin Paneer Selvam
  0 siblings, 1 reply; 10+ messages in thread
From: Matthew Auld @ 2024-01-31 18:29 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, dri-devel, amd-gfx
  Cc: alexander.deucher, felix.kuehling, christian.koenig

On 30/01/2024 20:30, Arunpravin Paneer Selvam wrote:
> Hi Matthew,
> 
> On 12/21/2023 12:51 AM, Matthew Auld wrote:
>> Hi,
>>
>> On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:
>>> - Add tracking clear page feature.
>>>
>>> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>>>    successfully clears the blocks in the free path. On the otherhand,
>>>    DRM buddy marks each block as cleared.
>>>
>>> - Track the available cleared pages size
>>>
>>> - If driver requests cleared memory we prefer cleared memory
>>>    but fallback to uncleared if we can't find the cleared blocks.
>>>    when driver requests uncleared memory we try to use uncleared but
>>>    fallback to cleared memory if necessary.
>>>
>>> - When a block gets freed we clear it and mark the freed block as 
>>> cleared,
>>>    when there are buddies which are cleared as well we can merge them.
>>>    Otherwise, we prefer to keep the blocks as separated.
>>
>> I was not involved, but it looks like we have also tried enabling the 
>> clear-on-free idea for VRAM in i915 and then also tracking that in the 
>> allocator, however that work unfortunately is not upstream. The code 
>> is open source though: 
>> https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300
>>
>> It looks like some of the design differences there are having two 
>> separate free lists, so mm->clean and mm->dirty (sounds reasonable to 
>> me). And also the inclusion of a de-fragmentation routine, since buddy 
>> blocks are now not always merged back, we might choose to run the 
>> defrag in some cases, which also sounds reasonable. IIRC in amdgpu 
>> userspace can control the page-size for an allocation, so perhaps you 
>> would want to run it first if the allocation fails, before trying to 
>> evict stuff?
> I checked the clear-on-free idea implemented in i915. In amdgpu version, 
> we are clearing all the blocks in amdgpu free routine and DRM buddy 
> expects only the DRM_BUDDY_CLEARED flag. Basically, we are keeping the 
> cleared blocks ready to be allocated when the user request for the 
> cleared memory. We observed that this improves the performance on games 
> and resolves the stutter issues as well. I see i915 active fences part 
> does the same job for i915. Could we move this part into i915 free 
> routine and set the DRM_BUDDY_CLEARED flag.
> 
> On de-fragmentation , I have included a function which can be called at 
> places where we get -ENOSPC. This routine will merge back the clear and 
> dirty blocks together to form a larger block of requested size. I am 
> wondering where we could use this routine as for the non-contiguous 
> memory we have the fallback method and for the contiguous memory we have 
> the try harder method which searches through the tree.

Don't you also want to call it from your vram manager when the requested 
page size is something large, before trying to evict stuff? That could 
now fail due to fragmention IIUC. Or am I misreading mdgpu_vram_mgr_new()?

> 
> I agree we can have 2 lists (clear list and dirty list) and this would 
> reduce the search iterations. But we need to handle the 2 lists design 
> in all the functions which might require more time for testing on all 
> platforms. Could we just go ahead with 1 list (free list) for now and I 
> am going to take up this work as my next task.

Sounds good.

> 
> Thanks,
> Arun.
>>
>>>
>>> v1: (Christian)
>>>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>>>      cleared. Else, reset the clear flag for each block in the list.
>>>
>>>    - For merging the 2 cleared blocks compare as below,
>>>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
>>>
>>> Signed-off-by: Arunpravin Paneer Selvam 
>>> <Arunpravin.PaneerSelvam@amd.com>
>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>>>   drivers/gpu/drm/drm_buddy.c                   | 169 +++++++++++++++---
>>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>>>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>>>   include/drm/drm_buddy.h                       |  18 +-
>>>   5 files changed, 168 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> index 08916538a615..d0e199cc8f17 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> @@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
>>> ttm_resource_manager *man,
>>>       return 0;
>>>     error_free_blocks:
>>> -    drm_buddy_free_list(mm, &vres->blocks);
>>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>>       mutex_unlock(&mgr->lock);
>>>   error_fini:
>>>       ttm_resource_fini(man, &vres->base);
>>> @@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
>>> ttm_resource_manager *man,
>>>         amdgpu_vram_mgr_do_reserve(man);
>>>   -    drm_buddy_free_list(mm, &vres->blocks);
>>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>>       mutex_unlock(&mgr->lock);
>>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>>> @@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
>>> *adev)
>>>           kfree(rsv);
>>>         list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, 
>>> blocks) {
>>> -        drm_buddy_free_list(&mgr->mm, &rsv->allocated);
>>> +        drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>>>           kfree(rsv);
>>>       }
>>>       if (!adev->gmc.is_app_apu)
>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>> index f57e6d74fb0e..d44172f23f05 100644
>>> --- a/drivers/gpu/drm/drm_buddy.c
>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
>>>       __list_add(&block->link, node->link.prev, &node->link);
>>>   }
>>>   +static void clear_reset(struct drm_buddy_block *block)
>>> +{
>>> +    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>>> +}
>>> +
>>> +static void mark_cleared(struct drm_buddy_block *block)
>>> +{
>>> +    block->header |= DRM_BUDDY_HEADER_CLEAR;
>>> +}
>>> +
>>>   static void mark_allocated(struct drm_buddy_block *block)
>>>   {
>>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>>>       mark_free(mm, block->left);
>>>       mark_free(mm, block->right);
>>>   +    if (drm_buddy_block_is_clear(block)) {
>>> +        mark_cleared(block->left);
>>> +        mark_cleared(block->right);
>>> +        clear_reset(block);
>>> +    }
>>> +
>>>       mark_split(block);
>>>         return 0;
>>> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
>>>           if (!drm_buddy_block_is_free(buddy))
>>>               break;
>>>   +        if (drm_buddy_block_is_clear(block) !=
>>> +            drm_buddy_block_is_clear(buddy))
>>> +            break;
>>> +
>>> +        if (drm_buddy_block_is_clear(block))
>>> +            mark_cleared(parent);
>>> +
>>>           list_del(&buddy->link);
>>>             drm_block_free(mm, block);
>>> @@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>>   {
>>>       BUG_ON(!drm_buddy_block_is_allocated(block));
>>>       mm->avail += drm_buddy_block_size(mm, block);
>>> +    if (drm_buddy_block_is_clear(block))
>>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>> +
>>>       __drm_buddy_free(mm, block);
>>>   }
>>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>> @@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
>>>    * @mm: DRM buddy manager
>>>    * @objects: input list head to free blocks
>>>    */
>>> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>>> *objects)
>>> +void drm_buddy_free_list(struct drm_buddy *mm,
>>> +             struct list_head *objects,
>>> +             unsigned long flags)
>>>   {
>>>       struct drm_buddy_block *block, *on;
>>>   +    if (flags & DRM_BUDDY_CLEARED) {
>>> +        list_for_each_entry(block, objects, link)
>>> +            mark_cleared(block);
>>> +    } else {
>>> +        list_for_each_entry(block, objects, link)
>>> +            clear_reset(block);
>>> +    }
>>> +
>>>       list_for_each_entry_safe(block, on, objects, link) {
>>>           drm_buddy_free_block(mm, block);
>>>           cond_resched();
>>> @@ -328,9 +364,11 @@ static inline bool contains(u64 s1, u64 e1, u64 
>>> s2, u64 e2)
>>>   }
>>>     static struct drm_buddy_block *
>>> -alloc_range_bias(struct drm_buddy *mm,
>>> -         u64 start, u64 end,
>>> -         unsigned int order)
>>> +__alloc_range_bias(struct drm_buddy *mm,
>>> +           u64 start, u64 end,
>>> +           unsigned int order,
>>> +           unsigned long flags,
>>> +           bool fallback)
>>>   {
>>>       struct drm_buddy_block *block;
>>>       struct drm_buddy_block *buddy;
>>> @@ -369,6 +407,15 @@ alloc_range_bias(struct drm_buddy *mm,
>>>             if (contains(start, end, block_start, block_end) &&
>>>               order == drm_buddy_block_order(block)) {
>>> +            if (!fallback) {
>>> +                if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>>> +                    if (!drm_buddy_block_is_clear(block))
>>> +                        continue;
>>> +                } else {
>>> +                    if (drm_buddy_block_is_clear(block))
>>> +                        continue;
>>> +                }
>>> +            }
>>>               /*
>>>                * Find the free block within the range.
>>>                */
>>> @@ -405,25 +452,58 @@ alloc_range_bias(struct drm_buddy *mm,
>>>   }
>>>     static struct drm_buddy_block *
>>> -get_maxblock(struct drm_buddy *mm, unsigned int order)
>>> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>>> +                 u64 start, u64 end,
>>> +                 unsigned int order,
>>> +                 unsigned long flags)
>>> +{
>>> +    struct drm_buddy_block *block;
>>> +    bool fallback = 0;
>>> +
>>> +    block = __alloc_range_bias(mm, start, end, order,
>>> +                   flags, fallback);
>>> +    if (IS_ERR(block))
>>> +        return __alloc_range_bias(mm, start, end, order,
>>> +                      flags, !fallback);
>>> +
>>> +    return block;
>>> +}
>>> +
>>> +static struct drm_buddy_block *
>>> +get_maxblock(struct drm_buddy *mm, unsigned int order,
>>> +         unsigned long flags)
>>>   {
>>> -    struct drm_buddy_block *max_block = NULL, *node;
>>> +    struct drm_buddy_block *max_block = NULL, *block = NULL;
>>>       unsigned int i;
>>>         for (i = order; i <= mm->max_order; ++i) {
>>> -        if (!list_empty(&mm->free_list[i])) {
>>> -            node = list_last_entry(&mm->free_list[i],
>>> -                           struct drm_buddy_block,
>>> -                           link);
>>> -            if (!max_block) {
>>> -                max_block = node;
>>> -                continue;
>>> +        struct drm_buddy_block *tmp_block;
>>> +
>>> +        list_for_each_entry_reverse(tmp_block, &mm->free_list[i], 
>>> link) {
>>> +            if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>>> +                /* Find a cleared block */
>>> +                if (!drm_buddy_block_is_clear(tmp_block))
>>> +                    continue;
>>> +            } else {
>>> +                if (drm_buddy_block_is_clear(tmp_block))
>>> +                    continue;
>>>               }
>>>   -            if (drm_buddy_block_offset(node) >
>>> -                drm_buddy_block_offset(max_block)) {
>>> -                max_block = node;
>>> -            }
>>> +            block = tmp_block;
>>> +            break;
>>> +        }
>>> +
>>> +        if (!block)
>>> +            continue;
>>> +
>>> +        if (!max_block) {
>>> +            max_block = block;
>>> +            continue;
>>> +        }
>>> +
>>> +        if (drm_buddy_block_offset(block) >
>>> +            drm_buddy_block_offset(max_block)) {
>>> +            max_block = block;
>>>           }
>>>       }
>>>   @@ -440,11 +520,35 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>       int err;
>>>         if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>>> -        block = get_maxblock(mm, order);
>>> +        block = get_maxblock(mm, order, flags);
>>>           if (block)
>>>               /* Store the obtained block order */
>>>               tmp = drm_buddy_block_order(block);
>>>       } else {
>>> +        for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>> +            struct drm_buddy_block *tmp_block;
>>> +
>>> +            list_for_each_entry_reverse(tmp_block, 
>>> &mm->free_list[tmp], link) {
>>> +                if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>>> +                    /* Find a cleared block */
>>> +                    if (!drm_buddy_block_is_clear(tmp_block))
>>> +                        continue;
>>> +                } else {
>>> +                    if (drm_buddy_block_is_clear(tmp_block))
>>> +                        continue;
>>> +                }
>>> +
>>> +                block = tmp_block;
>>> +                break;
>>> +            }
>>> +
>>> +            if (block)
>>> +                break;
>>> +        }
>>> +    }
>>> +
>>> +    if (!block) {
>>> +        /* Fallback method */
>>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>>               if (!list_empty(&mm->free_list[tmp])) {
>>>                   block = list_last_entry(&mm->free_list[tmp],
>>> @@ -454,10 +558,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>                       break;
>>>               }
>>>           }
>>> -    }
>>>   -    if (!block)
>>> -        return ERR_PTR(-ENOSPC);
>>> +        if (!block)
>>> +            return ERR_PTR(-ENOSPC);
>>> +    }
>>>         BUG_ON(!drm_buddy_block_is_free(block));
>>>   @@ -524,6 +628,8 @@ static int __alloc_range(struct drm_buddy *mm,
>>>               mark_allocated(block);
>>>               total_allocated += drm_buddy_block_size(mm, block);
>>>               mm->avail -= drm_buddy_block_size(mm, block);
>>> +            if (drm_buddy_block_is_clear(block))
>>> +                mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>               list_add_tail(&block->link, &allocated);
>>>               continue;
>>>           }
>>> @@ -558,7 +664,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>>           list_splice_tail(&allocated, blocks);
>>>           *total_allocated_on_err = total_allocated;
>>>       } else {
>>> -        drm_buddy_free_list(mm, &allocated);
>>> +        drm_buddy_free_list(mm, &allocated, 0);
>>>       }
>>>         return err;
>>> @@ -624,11 +730,11 @@ static int __alloc_contig_try_harder(struct 
>>> drm_buddy *mm,
>>>               list_splice(&blocks_lhs, blocks);
>>>               return 0;
>>>           } else if (err != -ENOSPC) {
>>> -            drm_buddy_free_list(mm, blocks);
>>> +            drm_buddy_free_list(mm, blocks, 0);
>>>               return err;
>>>           }
>>>           /* Free blocks for the next iteration */
>>> -        drm_buddy_free_list(mm, blocks);
>>> +        drm_buddy_free_list(mm, blocks, 0);
>>>       }
>>>         return -ENOSPC;
>>> @@ -684,6 +790,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>       list_del(&block->link);
>>>       mark_free(mm, block);
>>>       mm->avail += drm_buddy_block_size(mm, block);
>>> +    if (drm_buddy_block_is_clear(block))
>>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>>         /* Prevent recursively freeing this node */
>>>       parent = block->parent;
>>> @@ -695,6 +803,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>       if (err) {
>>>           mark_allocated(block);
>>>           mm->avail -= drm_buddy_block_size(mm, block);
>>> +        if (drm_buddy_block_is_clear(block))
>>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>           list_add(&block->link, blocks);
>>>       }
>>>   @@ -782,7 +892,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>           do {
>>>               if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>>>                   /* Allocate traversing within the range */
>>> -                block = alloc_range_bias(mm, start, end, order);
>>> +                block = __drm_buddy_alloc_range_bias(mm, start, end,
>>> +                                     order, flags);
>>>               else
>>>                   /* Allocate from freelist */
>>>                   block = alloc_from_freelist(mm, order, flags);
>>> @@ -808,6 +919,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>             mark_allocated(block);
>>>           mm->avail -= drm_buddy_block_size(mm, block);
>>> +        if (drm_buddy_block_is_clear(block))
>>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>           kmemleak_update_trace(block);
>>>           list_add_tail(&block->link, &allocated);
>>>   @@ -846,7 +959,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>       return 0;
>>>     err_free:
>>> -    drm_buddy_free_list(mm, &allocated);
>>> +    drm_buddy_free_list(mm, &allocated, 0);
>>>       return err;
>>>   }
>>>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>>> @@ -879,8 +992,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct 
>>> drm_printer *p)
>>>   {
>>>       int order;
>>>   -    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>>> %lluMiB\n",
>>> -           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
>>> +    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>>> %lluMiB, clear_free: %lluMiB\n",
>>> +           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, 
>>> mm->clear_avail >> 20);
>>>         for (order = mm->max_order; order >= 0; order--) {
>>>           struct drm_buddy_block *block;
>>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
>>> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>>> index 0d735d5c2b35..942345548bc3 100644
>>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>>> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct 
>>> ttm_resource_manager *man,
>>>       return 0;
>>>     err_free_blocks:
>>> -    drm_buddy_free_list(mm, &bman_res->blocks);
>>> +    drm_buddy_free_list(mm, &bman_res->blocks, 0);
>>>       mutex_unlock(&bman->lock);
>>>   err_free_res:
>>>       ttm_resource_fini(man, &bman_res->base);
>>> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct 
>>> ttm_resource_manager *man,
>>>       struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>>         mutex_lock(&bman->lock);
>>> -    drm_buddy_free_list(&bman->mm, &bman_res->blocks);
>>> +    drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>>>       bman->visible_avail += bman_res->used_visible_size;
>>>       mutex_unlock(&bman->lock);
>>>   @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device 
>>> *bdev, unsigned int type)
>>>       ttm_set_driver_manager(bdev, type, NULL);
>>>         mutex_lock(&bman->lock);
>>> -    drm_buddy_free_list(mm, &bman->reserved);
>>> +    drm_buddy_free_list(mm, &bman->reserved, 0);
>>>       drm_buddy_fini(mm);
>>>       bman->visible_avail += bman->visible_reserved;
>>>       WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
>>> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
>>> b/drivers/gpu/drm/tests/drm_buddy_test.c
>>> index ea2af6bd9abe..e0860fce9ebd 100644
>>> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
>>> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
>>> @@ -83,7 +83,7 @@ static void 
>>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>>                                 top, max_order);
>>>       }
>>>   -    drm_buddy_free_list(&mm, &holes);
>>> +    drm_buddy_free_list(&mm, &holes, 0);
>>>         /* Nothing larger than blocks of chunk_size now available */
>>>       for (order = 1; order <= max_order; order++) {
>>> @@ -95,7 +95,7 @@ static void 
>>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>>       }
>>>         list_splice_tail(&holes, &blocks);
>>> -    drm_buddy_free_list(&mm, &blocks);
>>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>>       drm_buddy_fini(&mm);
>>>   }
>>>   @@ -190,7 +190,7 @@ static void 
>>> drm_test_buddy_alloc_pessimistic(struct kunit *test)
>>>         list_del(&block->link);
>>>       drm_buddy_free_block(&mm, block);
>>> -    drm_buddy_free_list(&mm, &blocks);
>>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>>       drm_buddy_fini(&mm);
>>>   }
>>>   @@ -236,7 +236,7 @@ static void 
>>> drm_test_buddy_alloc_optimistic(struct kunit *test)
>>>                                  size, size, &tmp, flags),
>>>                             "buddy_alloc unexpectedly succeeded, it 
>>> should be full!");
>>>   -    drm_buddy_free_list(&mm, &blocks);
>>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>>       drm_buddy_fini(&mm);
>>>   }
>>>   @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct 
>>> kunit *test)
>>>                           drm_buddy_block_size(&mm, block),
>>>                           BIT_ULL(mm.max_order) * PAGE_SIZE);
>>>   -    drm_buddy_free_list(&mm, &allocated);
>>> +    drm_buddy_free_list(&mm, &allocated, 0);
>>>       drm_buddy_fini(&mm);
>>>   }
>>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>> index a5b39fc01003..f7311b59f2b0 100644
>>> --- a/include/drm/drm_buddy.h
>>> +++ b/include/drm/drm_buddy.h
>>> @@ -6,6 +6,7 @@
>>>   #ifndef __DRM_BUDDY_H__
>>>   #define __DRM_BUDDY_H__
>>>   +#include <linux/bitfield.h>
>>>   #include <linux/bitops.h>
>>>   #include <linux/list.h>
>>>   #include <linux/slab.h>
>>> @@ -25,15 +26,19 @@
>>>   #define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>>   #define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>>> +#define DRM_BUDDY_CLEAR_ALLOCATION        BIT(3)
>>> +#define DRM_BUDDY_CLEARED            BIT(4)
>>>     struct drm_buddy_block {
>>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>>> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
>>> +
>>>   #define   DRM_BUDDY_ALLOCATED       (1 << 10)
>>>   #define   DRM_BUDDY_FREE       (2 << 10)
>>>   #define   DRM_BUDDY_SPLIT       (3 << 10)
>>>   /* Free to be used, if needed in the future */
>>> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
>>> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>>>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>>>       u64 header;
>>>   @@ -86,6 +91,7 @@ struct drm_buddy {
>>>       u64 chunk_size;
>>>       u64 size;
>>>       u64 avail;
>>> +    u64 clear_avail;
>>>   };
>>>     static inline u64
>>> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct 
>>> drm_buddy_block *block)
>>>       return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>>>   }
>>>   +static inline bool
>>> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
>>> +{
>>> +    return block->header & DRM_BUDDY_HEADER_CLEAR;
>>> +}
>>> +
>>>   static inline bool
>>>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>>>   {
>>> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>     void drm_buddy_free_block(struct drm_buddy *mm, struct 
>>> drm_buddy_block *block);
>>>   -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>>> *objects);
>>> +void drm_buddy_free_list(struct drm_buddy *mm,
>>> +             struct list_head *objects,
>>> +             unsigned long flags);
>>>     void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>>   void drm_buddy_block_print(struct drm_buddy *mm,
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature
  2024-01-31 18:29     ` Matthew Auld
@ 2024-02-08 15:47       ` Arunpravin Paneer Selvam
  0 siblings, 0 replies; 10+ messages in thread
From: Arunpravin Paneer Selvam @ 2024-02-08 15:47 UTC (permalink / raw)
  To: Matthew Auld, dri-devel, amd-gfx
  Cc: christian.koenig, alexander.deucher, felix.kuehling



On 1/31/2024 11:59 PM, Matthew Auld wrote:
> On 30/01/2024 20:30, Arunpravin Paneer Selvam wrote:
>> Hi Matthew,
>>
>> On 12/21/2023 12:51 AM, Matthew Auld wrote:
>>> Hi,
>>>
>>> On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:
>>>> - Add tracking clear page feature.
>>>>
>>>> - Driver should enable the DRM_BUDDY_CLEARED flag if it
>>>>    successfully clears the blocks in the free path. On the otherhand,
>>>>    DRM buddy marks each block as cleared.
>>>>
>>>> - Track the available cleared pages size
>>>>
>>>> - If driver requests cleared memory we prefer cleared memory
>>>>    but fallback to uncleared if we can't find the cleared blocks.
>>>>    when driver requests uncleared memory we try to use uncleared but
>>>>    fallback to cleared memory if necessary.
>>>>
>>>> - When a block gets freed we clear it and mark the freed block as 
>>>> cleared,
>>>>    when there are buddies which are cleared as well we can merge them.
>>>>    Otherwise, we prefer to keep the blocks as separated.
>>>
>>> I was not involved, but it looks like we have also tried enabling 
>>> the clear-on-free idea for VRAM in i915 and then also tracking that 
>>> in the allocator, however that work unfortunately is not upstream. 
>>> The code is open source though: 
>>> https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300 
>>>
>>>
>>> It looks like some of the design differences there are having two 
>>> separate free lists, so mm->clean and mm->dirty (sounds reasonable 
>>> to me). And also the inclusion of a de-fragmentation routine, since 
>>> buddy blocks are now not always merged back, we might choose to run 
>>> the defrag in some cases, which also sounds reasonable. IIRC in 
>>> amdgpu userspace can control the page-size for an allocation, so 
>>> perhaps you would want to run it first if the allocation fails, 
>>> before trying to evict stuff?
>> I checked the clear-on-free idea implemented in i915. In amdgpu 
>> version, we are clearing all the blocks in amdgpu free routine and 
>> DRM buddy expects only the DRM_BUDDY_CLEARED flag. Basically, we are 
>> keeping the cleared blocks ready to be allocated when the user 
>> request for the cleared memory. We observed that this improves the 
>> performance on games and resolves the stutter issues as well. I see 
>> i915 active fences part does the same job for i915. Could we move 
>> this part into i915 free routine and set the DRM_BUDDY_CLEARED flag.
>>
>> On de-fragmentation , I have included a function which can be called 
>> at places where we get -ENOSPC. This routine will merge back the 
>> clear and dirty blocks together to form a larger block of requested 
>> size. I am wondering where we could use this routine as for the 
>> non-contiguous memory we have the fallback method and for the 
>> contiguous memory we have the try harder method which searches 
>> through the tree.
>
> Don't you also want to call it from your vram manager when the 
> requested page size is something large, before trying to evict stuff? 
> That could now fail due to fragmention IIUC. Or am I misreading 
> mdgpu_vram_mgr_new()?
Yes you are right, we can call the defragmentation routine from VRAM 
manager when there is a allocation failure.

Thanks,
Arun
>
>>
>> I agree we can have 2 lists (clear list and dirty list) and this 
>> would reduce the search iterations. But we need to handle the 2 lists 
>> design in all the functions which might require more time for testing 
>> on all platforms. Could we just go ahead with 1 list (free list) for 
>> now and I am going to take up this work as my next task.
>
> Sounds good.
>
>>
>> Thanks,
>> Arun.
>>>
>>>>
>>>> v1: (Christian)
>>>>    - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
>>>>      cleared. Else, reset the clear flag for each block in the list.
>>>>
>>>>    - For merging the 2 cleared blocks compare as below,
>>>>      drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)
>>>>
>>>> Signed-off-by: Arunpravin Paneer Selvam 
>>>> <Arunpravin.PaneerSelvam@amd.com>
>>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
>>>>   drivers/gpu/drm/drm_buddy.c                   | 169 
>>>> +++++++++++++++---
>>>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
>>>>   drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
>>>>   include/drm/drm_buddy.h                       |  18 +-
>>>>   5 files changed, 168 insertions(+), 41 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> index 08916538a615..d0e199cc8f17 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> @@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
>>>> ttm_resource_manager *man,
>>>>       return 0;
>>>>     error_free_blocks:
>>>> -    drm_buddy_free_list(mm, &vres->blocks);
>>>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>>>       mutex_unlock(&mgr->lock);
>>>>   error_fini:
>>>>       ttm_resource_fini(man, &vres->base);
>>>> @@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
>>>> ttm_resource_manager *man,
>>>>         amdgpu_vram_mgr_do_reserve(man);
>>>>   -    drm_buddy_free_list(mm, &vres->blocks);
>>>> +    drm_buddy_free_list(mm, &vres->blocks, 0);
>>>>       mutex_unlock(&mgr->lock);
>>>>         atomic64_sub(vis_usage, &mgr->vis_usage);
>>>> @@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
>>>> *adev)
>>>>           kfree(rsv);
>>>>         list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, 
>>>> blocks) {
>>>> -        drm_buddy_free_list(&mgr->mm, &rsv->allocated);
>>>> +        drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
>>>>           kfree(rsv);
>>>>       }
>>>>       if (!adev->gmc.is_app_apu)
>>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>>>> index f57e6d74fb0e..d44172f23f05 100644
>>>> --- a/drivers/gpu/drm/drm_buddy.c
>>>> +++ b/drivers/gpu/drm/drm_buddy.c
>>>> @@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy 
>>>> *mm,
>>>>       __list_add(&block->link, node->link.prev, &node->link);
>>>>   }
>>>>   +static void clear_reset(struct drm_buddy_block *block)
>>>> +{
>>>> +    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
>>>> +}
>>>> +
>>>> +static void mark_cleared(struct drm_buddy_block *block)
>>>> +{
>>>> +    block->header |= DRM_BUDDY_HEADER_CLEAR;
>>>> +}
>>>> +
>>>>   static void mark_allocated(struct drm_buddy_block *block)
>>>>   {
>>>>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>>>> @@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
>>>>       mark_free(mm, block->left);
>>>>       mark_free(mm, block->right);
>>>>   +    if (drm_buddy_block_is_clear(block)) {
>>>> +        mark_cleared(block->left);
>>>> +        mark_cleared(block->right);
>>>> +        clear_reset(block);
>>>> +    }
>>>> +
>>>>       mark_split(block);
>>>>         return 0;
>>>> @@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy 
>>>> *mm,
>>>>           if (!drm_buddy_block_is_free(buddy))
>>>>               break;
>>>>   +        if (drm_buddy_block_is_clear(block) !=
>>>> +            drm_buddy_block_is_clear(buddy))
>>>> +            break;
>>>> +
>>>> +        if (drm_buddy_block_is_clear(block))
>>>> +            mark_cleared(parent);
>>>> +
>>>>           list_del(&buddy->link);
>>>>             drm_block_free(mm, block);
>>>> @@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
>>>>   {
>>>>       BUG_ON(!drm_buddy_block_is_allocated(block));
>>>>       mm->avail += drm_buddy_block_size(mm, block);
>>>> +    if (drm_buddy_block_is_clear(block))
>>>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>>> +
>>>>       __drm_buddy_free(mm, block);
>>>>   }
>>>>   EXPORT_SYMBOL(drm_buddy_free_block);
>>>> @@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
>>>>    * @mm: DRM buddy manager
>>>>    * @objects: input list head to free blocks
>>>>    */
>>>> -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>>>> *objects)
>>>> +void drm_buddy_free_list(struct drm_buddy *mm,
>>>> +             struct list_head *objects,
>>>> +             unsigned long flags)
>>>>   {
>>>>       struct drm_buddy_block *block, *on;
>>>>   +    if (flags & DRM_BUDDY_CLEARED) {
>>>> +        list_for_each_entry(block, objects, link)
>>>> +            mark_cleared(block);
>>>> +    } else {
>>>> +        list_for_each_entry(block, objects, link)
>>>> +            clear_reset(block);
>>>> +    }
>>>> +
>>>>       list_for_each_entry_safe(block, on, objects, link) {
>>>>           drm_buddy_free_block(mm, block);
>>>>           cond_resched();
>>>> @@ -328,9 +364,11 @@ static inline bool contains(u64 s1, u64 e1, 
>>>> u64 s2, u64 e2)
>>>>   }
>>>>     static struct drm_buddy_block *
>>>> -alloc_range_bias(struct drm_buddy *mm,
>>>> -         u64 start, u64 end,
>>>> -         unsigned int order)
>>>> +__alloc_range_bias(struct drm_buddy *mm,
>>>> +           u64 start, u64 end,
>>>> +           unsigned int order,
>>>> +           unsigned long flags,
>>>> +           bool fallback)
>>>>   {
>>>>       struct drm_buddy_block *block;
>>>>       struct drm_buddy_block *buddy;
>>>> @@ -369,6 +407,15 @@ alloc_range_bias(struct drm_buddy *mm,
>>>>             if (contains(start, end, block_start, block_end) &&
>>>>               order == drm_buddy_block_order(block)) {
>>>> +            if (!fallback) {
>>>> +                if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>>>> +                    if (!drm_buddy_block_is_clear(block))
>>>> +                        continue;
>>>> +                } else {
>>>> +                    if (drm_buddy_block_is_clear(block))
>>>> +                        continue;
>>>> +                }
>>>> +            }
>>>>               /*
>>>>                * Find the free block within the range.
>>>>                */
>>>> @@ -405,25 +452,58 @@ alloc_range_bias(struct drm_buddy *mm,
>>>>   }
>>>>     static struct drm_buddy_block *
>>>> -get_maxblock(struct drm_buddy *mm, unsigned int order)
>>>> +__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
>>>> +                 u64 start, u64 end,
>>>> +                 unsigned int order,
>>>> +                 unsigned long flags)
>>>> +{
>>>> +    struct drm_buddy_block *block;
>>>> +    bool fallback = 0;
>>>> +
>>>> +    block = __alloc_range_bias(mm, start, end, order,
>>>> +                   flags, fallback);
>>>> +    if (IS_ERR(block))
>>>> +        return __alloc_range_bias(mm, start, end, order,
>>>> +                      flags, !fallback);
>>>> +
>>>> +    return block;
>>>> +}
>>>> +
>>>> +static struct drm_buddy_block *
>>>> +get_maxblock(struct drm_buddy *mm, unsigned int order,
>>>> +         unsigned long flags)
>>>>   {
>>>> -    struct drm_buddy_block *max_block = NULL, *node;
>>>> +    struct drm_buddy_block *max_block = NULL, *block = NULL;
>>>>       unsigned int i;
>>>>         for (i = order; i <= mm->max_order; ++i) {
>>>> -        if (!list_empty(&mm->free_list[i])) {
>>>> -            node = list_last_entry(&mm->free_list[i],
>>>> -                           struct drm_buddy_block,
>>>> -                           link);
>>>> -            if (!max_block) {
>>>> -                max_block = node;
>>>> -                continue;
>>>> +        struct drm_buddy_block *tmp_block;
>>>> +
>>>> +        list_for_each_entry_reverse(tmp_block, &mm->free_list[i], 
>>>> link) {
>>>> +            if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>>>> +                /* Find a cleared block */
>>>> +                if (!drm_buddy_block_is_clear(tmp_block))
>>>> +                    continue;
>>>> +            } else {
>>>> +                if (drm_buddy_block_is_clear(tmp_block))
>>>> +                    continue;
>>>>               }
>>>>   -            if (drm_buddy_block_offset(node) >
>>>> -                drm_buddy_block_offset(max_block)) {
>>>> -                max_block = node;
>>>> -            }
>>>> +            block = tmp_block;
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        if (!block)
>>>> +            continue;
>>>> +
>>>> +        if (!max_block) {
>>>> +            max_block = block;
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        if (drm_buddy_block_offset(block) >
>>>> +            drm_buddy_block_offset(max_block)) {
>>>> +            max_block = block;
>>>>           }
>>>>       }
>>>>   @@ -440,11 +520,35 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>>       int err;
>>>>         if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
>>>> -        block = get_maxblock(mm, order);
>>>> +        block = get_maxblock(mm, order, flags);
>>>>           if (block)
>>>>               /* Store the obtained block order */
>>>>               tmp = drm_buddy_block_order(block);
>>>>       } else {
>>>> +        for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>>> +            struct drm_buddy_block *tmp_block;
>>>> +
>>>> +            list_for_each_entry_reverse(tmp_block, 
>>>> &mm->free_list[tmp], link) {
>>>> +                if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
>>>> +                    /* Find a cleared block */
>>>> +                    if (!drm_buddy_block_is_clear(tmp_block))
>>>> +                        continue;
>>>> +                } else {
>>>> +                    if (drm_buddy_block_is_clear(tmp_block))
>>>> +                        continue;
>>>> +                }
>>>> +
>>>> +                block = tmp_block;
>>>> +                break;
>>>> +            }
>>>> +
>>>> +            if (block)
>>>> +                break;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    if (!block) {
>>>> +        /* Fallback method */
>>>>           for (tmp = order; tmp <= mm->max_order; ++tmp) {
>>>>               if (!list_empty(&mm->free_list[tmp])) {
>>>>                   block = list_last_entry(&mm->free_list[tmp],
>>>> @@ -454,10 +558,10 @@ alloc_from_freelist(struct drm_buddy *mm,
>>>>                       break;
>>>>               }
>>>>           }
>>>> -    }
>>>>   -    if (!block)
>>>> -        return ERR_PTR(-ENOSPC);
>>>> +        if (!block)
>>>> +            return ERR_PTR(-ENOSPC);
>>>> +    }
>>>>         BUG_ON(!drm_buddy_block_is_free(block));
>>>>   @@ -524,6 +628,8 @@ static int __alloc_range(struct drm_buddy *mm,
>>>>               mark_allocated(block);
>>>>               total_allocated += drm_buddy_block_size(mm, block);
>>>>               mm->avail -= drm_buddy_block_size(mm, block);
>>>> +            if (drm_buddy_block_is_clear(block))
>>>> +                mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>>               list_add_tail(&block->link, &allocated);
>>>>               continue;
>>>>           }
>>>> @@ -558,7 +664,7 @@ static int __alloc_range(struct drm_buddy *mm,
>>>>           list_splice_tail(&allocated, blocks);
>>>>           *total_allocated_on_err = total_allocated;
>>>>       } else {
>>>> -        drm_buddy_free_list(mm, &allocated);
>>>> +        drm_buddy_free_list(mm, &allocated, 0);
>>>>       }
>>>>         return err;
>>>> @@ -624,11 +730,11 @@ static int __alloc_contig_try_harder(struct 
>>>> drm_buddy *mm,
>>>>               list_splice(&blocks_lhs, blocks);
>>>>               return 0;
>>>>           } else if (err != -ENOSPC) {
>>>> -            drm_buddy_free_list(mm, blocks);
>>>> +            drm_buddy_free_list(mm, blocks, 0);
>>>>               return err;
>>>>           }
>>>>           /* Free blocks for the next iteration */
>>>> -        drm_buddy_free_list(mm, blocks);
>>>> +        drm_buddy_free_list(mm, blocks, 0);
>>>>       }
>>>>         return -ENOSPC;
>>>> @@ -684,6 +790,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>>       list_del(&block->link);
>>>>       mark_free(mm, block);
>>>>       mm->avail += drm_buddy_block_size(mm, block);
>>>> +    if (drm_buddy_block_is_clear(block))
>>>> +        mm->clear_avail += drm_buddy_block_size(mm, block);
>>>>         /* Prevent recursively freeing this node */
>>>>       parent = block->parent;
>>>> @@ -695,6 +803,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>>       if (err) {
>>>>           mark_allocated(block);
>>>>           mm->avail -= drm_buddy_block_size(mm, block);
>>>> +        if (drm_buddy_block_is_clear(block))
>>>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>>           list_add(&block->link, blocks);
>>>>       }
>>>>   @@ -782,7 +892,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>>           do {
>>>>               if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>>>>                   /* Allocate traversing within the range */
>>>> -                block = alloc_range_bias(mm, start, end, order);
>>>> +                block = __drm_buddy_alloc_range_bias(mm, start, end,
>>>> +                                     order, flags);
>>>>               else
>>>>                   /* Allocate from freelist */
>>>>                   block = alloc_from_freelist(mm, order, flags);
>>>> @@ -808,6 +919,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>>             mark_allocated(block);
>>>>           mm->avail -= drm_buddy_block_size(mm, block);
>>>> +        if (drm_buddy_block_is_clear(block))
>>>> +            mm->clear_avail -= drm_buddy_block_size(mm, block);
>>>>           kmemleak_update_trace(block);
>>>>           list_add_tail(&block->link, &allocated);
>>>>   @@ -846,7 +959,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>>>>       return 0;
>>>>     err_free:
>>>> -    drm_buddy_free_list(mm, &allocated);
>>>> +    drm_buddy_free_list(mm, &allocated, 0);
>>>>       return err;
>>>>   }
>>>>   EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>>>> @@ -879,8 +992,8 @@ void drm_buddy_print(struct drm_buddy *mm, 
>>>> struct drm_printer *p)
>>>>   {
>>>>       int order;
>>>>   -    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>>>> %lluMiB\n",
>>>> -           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
>>>> +    drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: 
>>>> %lluMiB, clear_free: %lluMiB\n",
>>>> +           mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, 
>>>> mm->clear_avail >> 20);
>>>>         for (order = mm->max_order; order >= 0; order--) {
>>>>           struct drm_buddy_block *block;
>>>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
>>>> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>>>> index 0d735d5c2b35..942345548bc3 100644
>>>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>>>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>>>> @@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct 
>>>> ttm_resource_manager *man,
>>>>       return 0;
>>>>     err_free_blocks:
>>>> -    drm_buddy_free_list(mm, &bman_res->blocks);
>>>> +    drm_buddy_free_list(mm, &bman_res->blocks, 0);
>>>>       mutex_unlock(&bman->lock);
>>>>   err_free_res:
>>>>       ttm_resource_fini(man, &bman_res->base);
>>>> @@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct 
>>>> ttm_resource_manager *man,
>>>>       struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>>>         mutex_lock(&bman->lock);
>>>> -    drm_buddy_free_list(&bman->mm, &bman_res->blocks);
>>>> +    drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
>>>>       bman->visible_avail += bman_res->used_visible_size;
>>>>       mutex_unlock(&bman->lock);
>>>>   @@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device 
>>>> *bdev, unsigned int type)
>>>>       ttm_set_driver_manager(bdev, type, NULL);
>>>>         mutex_lock(&bman->lock);
>>>> -    drm_buddy_free_list(mm, &bman->reserved);
>>>> +    drm_buddy_free_list(mm, &bman->reserved, 0);
>>>>       drm_buddy_fini(mm);
>>>>       bman->visible_avail += bman->visible_reserved;
>>>>       WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
>>>> diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
>>>> b/drivers/gpu/drm/tests/drm_buddy_test.c
>>>> index ea2af6bd9abe..e0860fce9ebd 100644
>>>> --- a/drivers/gpu/drm/tests/drm_buddy_test.c
>>>> +++ b/drivers/gpu/drm/tests/drm_buddy_test.c
>>>> @@ -83,7 +83,7 @@ static void 
>>>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>>>                                 top, max_order);
>>>>       }
>>>>   -    drm_buddy_free_list(&mm, &holes);
>>>> +    drm_buddy_free_list(&mm, &holes, 0);
>>>>         /* Nothing larger than blocks of chunk_size now available */
>>>>       for (order = 1; order <= max_order; order++) {
>>>> @@ -95,7 +95,7 @@ static void 
>>>> drm_test_buddy_alloc_pathological(struct kunit *test)
>>>>       }
>>>>         list_splice_tail(&holes, &blocks);
>>>> -    drm_buddy_free_list(&mm, &blocks);
>>>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>>>       drm_buddy_fini(&mm);
>>>>   }
>>>>   @@ -190,7 +190,7 @@ static void 
>>>> drm_test_buddy_alloc_pessimistic(struct kunit *test)
>>>>         list_del(&block->link);
>>>>       drm_buddy_free_block(&mm, block);
>>>> -    drm_buddy_free_list(&mm, &blocks);
>>>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>>>       drm_buddy_fini(&mm);
>>>>   }
>>>>   @@ -236,7 +236,7 @@ static void 
>>>> drm_test_buddy_alloc_optimistic(struct kunit *test)
>>>>                                  size, size, &tmp, flags),
>>>>                             "buddy_alloc unexpectedly succeeded, it 
>>>> should be full!");
>>>>   -    drm_buddy_free_list(&mm, &blocks);
>>>> +    drm_buddy_free_list(&mm, &blocks, 0);
>>>>       drm_buddy_fini(&mm);
>>>>   }
>>>>   @@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct 
>>>> kunit *test)
>>>>                           drm_buddy_block_size(&mm, block),
>>>>                           BIT_ULL(mm.max_order) * PAGE_SIZE);
>>>>   -    drm_buddy_free_list(&mm, &allocated);
>>>> +    drm_buddy_free_list(&mm, &allocated, 0);
>>>>       drm_buddy_fini(&mm);
>>>>   }
>>>>   diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>>>> index a5b39fc01003..f7311b59f2b0 100644
>>>> --- a/include/drm/drm_buddy.h
>>>> +++ b/include/drm/drm_buddy.h
>>>> @@ -6,6 +6,7 @@
>>>>   #ifndef __DRM_BUDDY_H__
>>>>   #define __DRM_BUDDY_H__
>>>>   +#include <linux/bitfield.h>
>>>>   #include <linux/bitops.h>
>>>>   #include <linux/list.h>
>>>>   #include <linux/slab.h>
>>>> @@ -25,15 +26,19 @@
>>>>   #define DRM_BUDDY_RANGE_ALLOCATION        BIT(0)
>>>>   #define DRM_BUDDY_TOPDOWN_ALLOCATION        BIT(1)
>>>>   #define DRM_BUDDY_CONTIGUOUS_ALLOCATION        BIT(2)
>>>> +#define DRM_BUDDY_CLEAR_ALLOCATION        BIT(3)
>>>> +#define DRM_BUDDY_CLEARED            BIT(4)
>>>>     struct drm_buddy_block {
>>>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>>>> +#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
>>>> +
>>>>   #define   DRM_BUDDY_ALLOCATED       (1 << 10)
>>>>   #define   DRM_BUDDY_FREE       (2 << 10)
>>>>   #define   DRM_BUDDY_SPLIT       (3 << 10)
>>>>   /* Free to be used, if needed in the future */
>>>> -#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
>>>> +#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
>>>>   #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
>>>>       u64 header;
>>>>   @@ -86,6 +91,7 @@ struct drm_buddy {
>>>>       u64 chunk_size;
>>>>       u64 size;
>>>>       u64 avail;
>>>> +    u64 clear_avail;
>>>>   };
>>>>     static inline u64
>>>> @@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct 
>>>> drm_buddy_block *block)
>>>>       return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
>>>>   }
>>>>   +static inline bool
>>>> +drm_buddy_block_is_clear(struct drm_buddy_block *block)
>>>> +{
>>>> +    return block->header & DRM_BUDDY_HEADER_CLEAR;
>>>> +}
>>>> +
>>>>   static inline bool
>>>>   drm_buddy_block_is_free(struct drm_buddy_block *block)
>>>>   {
>>>> @@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>>>>     void drm_buddy_free_block(struct drm_buddy *mm, struct 
>>>> drm_buddy_block *block);
>>>>   -void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
>>>> *objects);
>>>> +void drm_buddy_free_list(struct drm_buddy *mm,
>>>> +             struct list_head *objects,
>>>> +             unsigned long flags);
>>>>     void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
>>>>   void drm_buddy_block_print(struct drm_buddy *mm,
>>


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-02-08 15:48 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-14 13:42 [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
2023-12-14 13:42 ` [PATCH v3 2/2] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
2023-12-14 20:11   ` Felix Kuehling
2023-12-17 19:13 ` [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature kernel test robot
2023-12-17 19:13   ` kernel test robot
2023-12-20 19:21 ` Matthew Auld
2023-12-21 10:31   ` Arunpravin Paneer Selvam
2024-01-30 20:30   ` Arunpravin Paneer Selvam
2024-01-31 18:29     ` Matthew Auld
2024-02-08 15:47       ` Arunpravin Paneer Selvam

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.