All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/buddy: Implement tracking clear page feature
@ 2023-12-07 15:11 Arunpravin Paneer Selvam
  2023-12-07 15:11 ` [PATCH 2/2] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
  0 siblings, 1 reply; 14+ messages in thread
From: Arunpravin Paneer Selvam @ 2023-12-07 15:11 UTC (permalink / raw)
  To: dri-devel, amd-gfx
  Cc: alexander.deucher, Arunpravin Paneer Selvam, christian.koenig,
	matthew.auld

- Add tracking clear page feature.

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

- Track the available cleared pages size

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c                   | 169 +++++++++++++++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c        |  10 +-
 include/drm/drm_buddy.h                       |  18 +-
 5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	return 0;
 
 error_free_blocks:
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mutex_unlock(&mgr->lock);
 error_fini:
 	ttm_resource_fini(man, &vres->base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 
 	amdgpu_vram_mgr_do_reserve(man);
 
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(vis_usage, &mgr->vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
 		kfree(rsv);
 
 	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
-		drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+		drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
 		kfree(rsv);
 	}
 	if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
 	__list_add(&block->link, node->link.prev, &node->link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+	block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+	block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
 	block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
 	mark_free(mm, block->left);
 	mark_free(mm, block->right);
 
+	if (drm_buddy_block_is_clear(block)) {
+		mark_cleared(block->left);
+		mark_cleared(block->right);
+		clear_reset(block);
+	}
+
 	mark_split(block);
 
 	return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
 		if (!drm_buddy_block_is_free(buddy))
 			break;
 
+		if (drm_buddy_block_is_clear(block) !=
+		    drm_buddy_block_is_clear(buddy))
+			break;
+
+		if (drm_buddy_block_is_clear(block))
+			mark_cleared(parent);
+
 		list_del(&buddy->link);
 
 		drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
 	BUG_ON(!drm_buddy_block_is_allocated(block));
 	mm->avail += drm_buddy_block_size(mm, block);
+	if (drm_buddy_block_is_clear(block))
+		mm->clear_avail += drm_buddy_block_size(mm, block);
+
 	__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
  * @mm: DRM buddy manager
  * @objects: input list head to free blocks
  */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+void drm_buddy_free_list(struct drm_buddy *mm,
+			 struct list_head *objects,
+			 unsigned long flags)
 {
 	struct drm_buddy_block *block, *on;
 
+	if (flags & DRM_BUDDY_CLEARED) {
+		list_for_each_entry(block, objects, link)
+			mark_cleared(block);
+	} else {
+		list_for_each_entry(block, objects, link)
+			clear_reset(block);
+	}
+
 	list_for_each_entry_safe(block, on, objects, link) {
 		drm_buddy_free_block(mm, block);
 		cond_resched();
@@ -328,9 +364,11 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
 }
 
 static struct drm_buddy_block *
-alloc_range_bias(struct drm_buddy *mm,
-		 u64 start, u64 end,
-		 unsigned int order)
+__alloc_range_bias(struct drm_buddy *mm,
+		   u64 start, u64 end,
+		   unsigned int order,
+		   unsigned long flags,
+		   bool fallback)
 {
 	struct drm_buddy_block *block;
 	struct drm_buddy_block *buddy;
@@ -369,6 +407,15 @@ alloc_range_bias(struct drm_buddy *mm,
 
 		if (contains(start, end, block_start, block_end) &&
 		    order == drm_buddy_block_order(block)) {
+			if (!fallback) {
+				if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
+					if (!drm_buddy_block_is_clear(block))
+						continue;
+				} else {
+					if (drm_buddy_block_is_clear(block))
+						continue;
+				}
+			}
 			/*
 			 * Find the free block within the range.
 			 */
@@ -405,25 +452,58 @@ alloc_range_bias(struct drm_buddy *mm,
 }
 
 static struct drm_buddy_block *
-get_maxblock(struct drm_buddy *mm, unsigned int order)
+__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
+			     u64 start, u64 end,
+			     unsigned int order,
+			     unsigned long flags)
+{
+	struct drm_buddy_block *block;
+	bool fallback = 0;
+
+	block = __alloc_range_bias(mm, start, end, order,
+				   flags, fallback);
+	if (IS_ERR(block))
+		return __alloc_range_bias(mm, start, end, order,
+					  flags, !fallback);
+
+	return block;
+}
+
+static struct drm_buddy_block *
+get_maxblock(struct drm_buddy *mm, unsigned int order,
+	     unsigned long flags)
 {
-	struct drm_buddy_block *max_block = NULL, *node;
+	struct drm_buddy_block *max_block = NULL, *block = NULL;
 	unsigned int i;
 
 	for (i = order; i <= mm->max_order; ++i) {
-		if (!list_empty(&mm->free_list[i])) {
-			node = list_last_entry(&mm->free_list[i],
-					       struct drm_buddy_block,
-					       link);
-			if (!max_block) {
-				max_block = node;
-				continue;
+		struct drm_buddy_block *tmp_block;
+
+		list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) {
+			if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
+				/* Find a cleared block */
+				if (!drm_buddy_block_is_clear(tmp_block))
+					continue;
+			} else {
+				if (drm_buddy_block_is_clear(tmp_block))
+					continue;
 			}
 
-			if (drm_buddy_block_offset(node) >
-			    drm_buddy_block_offset(max_block)) {
-				max_block = node;
-			}
+			block = tmp_block;
+			break;
+		}
+
+		if (!block)
+			continue;
+
+		if (!max_block) {
+			max_block = block;
+			continue;
+		}
+
+		if (drm_buddy_block_offset(block) >
+		    drm_buddy_block_offset(max_block)) {
+			max_block = block;
 		}
 	}
 
@@ -440,11 +520,35 @@ alloc_from_freelist(struct drm_buddy *mm,
 	int err;
 
 	if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-		block = get_maxblock(mm, order);
+		block = get_maxblock(mm, order, flags);
 		if (block)
 			/* Store the obtained block order */
 			tmp = drm_buddy_block_order(block);
 	} else {
+		for (tmp = order; tmp <= mm->max_order; ++tmp) {
+			struct drm_buddy_block *tmp_block;
+
+			list_for_each_entry_reverse(tmp_block, &mm->free_list[tmp], link) {
+				if (flags & DRM_BUDDY_CLEAR_ALLOCATION) {
+					/* Find a cleared block */
+					if (!drm_buddy_block_is_clear(tmp_block))
+						continue;
+				} else {
+					if (drm_buddy_block_is_clear(tmp_block))
+						continue;
+				}
+
+				block = tmp_block;
+				break;
+			}
+
+			if (block)
+				break;
+		}
+	}
+
+	if (!block) {
+		/* Fallback method */
 		for (tmp = order; tmp <= mm->max_order; ++tmp) {
 			if (!list_empty(&mm->free_list[tmp])) {
 				block = list_last_entry(&mm->free_list[tmp],
@@ -454,10 +558,10 @@ alloc_from_freelist(struct drm_buddy *mm,
 					break;
 			}
 		}
-	}
 
-	if (!block)
-		return ERR_PTR(-ENOSPC);
+		if (!block)
+			return ERR_PTR(-ENOSPC);
+	}
 
 	BUG_ON(!drm_buddy_block_is_free(block));
 
@@ -524,6 +628,8 @@ static int __alloc_range(struct drm_buddy *mm,
 			mark_allocated(block);
 			total_allocated += drm_buddy_block_size(mm, block);
 			mm->avail -= drm_buddy_block_size(mm, block);
+			if (drm_buddy_block_is_clear(block))
+				mm->clear_avail -= drm_buddy_block_size(mm, block);
 			list_add_tail(&block->link, &allocated);
 			continue;
 		}
@@ -558,7 +664,7 @@ static int __alloc_range(struct drm_buddy *mm,
 		list_splice_tail(&allocated, blocks);
 		*total_allocated_on_err = total_allocated;
 	} else {
-		drm_buddy_free_list(mm, &allocated);
+		drm_buddy_free_list(mm, &allocated, 0);
 	}
 
 	return err;
@@ -624,11 +730,11 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
 			list_splice(&blocks_lhs, blocks);
 			return 0;
 		} else if (err != -ENOSPC) {
-			drm_buddy_free_list(mm, blocks);
+			drm_buddy_free_list(mm, blocks, 0);
 			return err;
 		}
 		/* Free blocks for the next iteration */
-		drm_buddy_free_list(mm, blocks);
+		drm_buddy_free_list(mm, blocks, 0);
 	}
 
 	return -ENOSPC;
@@ -684,6 +790,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 	list_del(&block->link);
 	mark_free(mm, block);
 	mm->avail += drm_buddy_block_size(mm, block);
+	if (drm_buddy_block_is_clear(block))
+		mm->clear_avail += drm_buddy_block_size(mm, block);
 
 	/* Prevent recursively freeing this node */
 	parent = block->parent;
@@ -695,6 +803,8 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 	if (err) {
 		mark_allocated(block);
 		mm->avail -= drm_buddy_block_size(mm, block);
+		if (drm_buddy_block_is_clear(block))
+			mm->clear_avail -= drm_buddy_block_size(mm, block);
 		list_add(&block->link, blocks);
 	}
 
@@ -782,7 +892,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 		do {
 			if (flags & DRM_BUDDY_RANGE_ALLOCATION)
 				/* Allocate traversing within the range */
-				block = alloc_range_bias(mm, start, end, order);
+				block = __drm_buddy_alloc_range_bias(mm, start, end,
+								     order, flags);
 			else
 				/* Allocate from freelist */
 				block = alloc_from_freelist(mm, order, flags);
@@ -808,6 +919,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 
 		mark_allocated(block);
 		mm->avail -= drm_buddy_block_size(mm, block);
+		if (drm_buddy_block_is_clear(block))
+			mm->clear_avail -= drm_buddy_block_size(mm, block);
 		kmemleak_update_trace(block);
 		list_add_tail(&block->link, &allocated);
 
@@ -846,7 +959,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 	return 0;
 
 err_free:
-	drm_buddy_free_list(mm, &allocated);
+	drm_buddy_free_list(mm, &allocated, 0);
 	return err;
 }
 EXPORT_SYMBOL(drm_buddy_alloc_blocks);
@@ -879,8 +992,8 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
 {
 	int order;
 
-	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n",
-		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20);
+	drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
+		   mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
 
 	for (order = mm->max_order; order >= 0; order--) {
 		struct drm_buddy_block *block;
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 0d735d5c2b35..942345548bc3 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -126,7 +126,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
 	return 0;
 
 err_free_blocks:
-	drm_buddy_free_list(mm, &bman_res->blocks);
+	drm_buddy_free_list(mm, &bman_res->blocks, 0);
 	mutex_unlock(&bman->lock);
 err_free_res:
 	ttm_resource_fini(man, &bman_res->base);
@@ -141,7 +141,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
 	struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
 
 	mutex_lock(&bman->lock);
-	drm_buddy_free_list(&bman->mm, &bman_res->blocks);
+	drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
 	bman->visible_avail += bman_res->used_visible_size;
 	mutex_unlock(&bman->lock);
 
@@ -345,7 +345,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
 	ttm_set_driver_manager(bdev, type, NULL);
 
 	mutex_lock(&bman->lock);
-	drm_buddy_free_list(mm, &bman->reserved);
+	drm_buddy_free_list(mm, &bman->reserved, 0);
 	drm_buddy_fini(mm);
 	bman->visible_avail += bman->visible_reserved;
 	WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..e0860fce9ebd 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -83,7 +83,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 							  top, max_order);
 	}
 
-	drm_buddy_free_list(&mm, &holes);
+	drm_buddy_free_list(&mm, &holes, 0);
 
 	/* Nothing larger than blocks of chunk_size now available */
 	for (order = 1; order <= max_order; order++) {
@@ -95,7 +95,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
 	}
 
 	list_splice_tail(&holes, &blocks);
-	drm_buddy_free_list(&mm, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
 	drm_buddy_fini(&mm);
 }
 
@@ -190,7 +190,7 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
 
 	list_del(&block->link);
 	drm_buddy_free_block(&mm, block);
-	drm_buddy_free_list(&mm, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
 	drm_buddy_fini(&mm);
 }
 
@@ -236,7 +236,7 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
 							   size, size, &tmp, flags),
 						  "buddy_alloc unexpectedly succeeded, it should be full!");
 
-	drm_buddy_free_list(&mm, &blocks);
+	drm_buddy_free_list(&mm, &blocks, 0);
 	drm_buddy_fini(&mm);
 }
 
@@ -271,7 +271,7 @@ static void drm_test_buddy_alloc_limit(struct kunit *test)
 						drm_buddy_block_size(&mm, block),
 						BIT_ULL(mm.max_order) * PAGE_SIZE);
 
-	drm_buddy_free_list(&mm, &allocated);
+	drm_buddy_free_list(&mm, &allocated, 0);
 	drm_buddy_fini(&mm);
 }
 
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index a5b39fc01003..f7311b59f2b0 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -6,6 +6,7 @@
 #ifndef __DRM_BUDDY_H__
 #define __DRM_BUDDY_H__
 
+#include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -25,15 +26,19 @@
 #define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
 #define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
 #define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
+#define DRM_BUDDY_CLEARED			BIT(4)
 
 struct drm_buddy_block {
 #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
 #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
+#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
+
 #define   DRM_BUDDY_ALLOCATED	   (1 << 10)
 #define   DRM_BUDDY_FREE	   (2 << 10)
 #define   DRM_BUDDY_SPLIT	   (3 << 10)
 /* Free to be used, if needed in the future */
-#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6)
+#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
 #define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
 	u64 header;
 
@@ -86,6 +91,7 @@ struct drm_buddy {
 	u64 chunk_size;
 	u64 size;
 	u64 avail;
+	u64 clear_avail;
 };
 
 static inline u64
@@ -112,6 +118,12 @@ drm_buddy_block_is_allocated(struct drm_buddy_block *block)
 	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
 }
 
+static inline bool
+drm_buddy_block_is_clear(struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_CLEAR;
+}
+
 static inline bool
 drm_buddy_block_is_free(struct drm_buddy_block *block)
 {
@@ -150,7 +162,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
 
 void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
 
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
+void drm_buddy_free_list(struct drm_buddy *mm,
+			 struct list_head *objects,
+			 unsigned long flags);
 
 void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
 void drm_buddy_block_print(struct drm_buddy *mm,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-07 15:11 [PATCH 1/2] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
@ 2023-12-07 15:11 ` Arunpravin Paneer Selvam
  2023-12-07 16:46   ` Alex Deucher
  2023-12-08 10:00   ` Christian König
  0 siblings, 2 replies; 14+ messages in thread
From: Arunpravin Paneer Selvam @ 2023-12-07 15:11 UTC (permalink / raw)
  To: dri-devel, amd-gfx
  Cc: alexander.deucher, Arunpravin Paneer Selvam, christian.koenig,
	matthew.auld

Add clear page support in vram memory region.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 13 +++--
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 50 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 14 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cef920a93924..bc4ea87f8b5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -629,15 +630,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
 	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-		struct dma_fence *fence;
+		struct dma_fence *fence = NULL;
 
-		r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+		r = amdgpu_clear_buffer(bo, bo->tbo.base.resv, &fence, true);
 		if (unlikely(r))
 			goto fail_unreserve;
 
-		dma_resv_add_fence(bo->tbo.base.resv, fence,
-				   DMA_RESV_USAGE_KERNEL);
-		dma_fence_put(fence);
+		if (fence) {
+			dma_resv_add_fence(bo->tbo.base.resv, fence,
+					   DMA_RESV_USAGE_KERNEL);
+			dma_fence_put(fence);
+		}
 	}
 	if (!bp->resv)
 		amdgpu_bo_unreserve(bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
 	}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+	struct drm_buddy_block *block;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		if (!amdgpu_vram_mgr_is_cleared(block))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 05991c5c8ddb..6d7514e8f40c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2222,6 +2222,56 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
 	return 0;
 }
 
+int amdgpu_clear_buffer(struct amdgpu_bo *bo,
+			struct dma_resv *resv,
+			struct dma_fence **fence,
+			bool delayed)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_res_cursor cursor;
+	struct dma_fence *f = NULL;
+	u64 addr;
+	int r;
+
+	if (!adev->mman.buffer_funcs_enabled)
+		return -EINVAL;
+
+	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+	mutex_lock(&adev->mman.gtt_window_lock);
+	while (cursor.remaining) {
+		struct dma_fence *next = NULL;
+		u64 size;
+
+		/* Never clear more than 256MiB at once to avoid timeouts */
+		size = min(cursor.size, 256ULL << 20);
+
+		if (!amdgpu_res_cleared(&cursor)) {
+			r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+						  1, ring, false, &size, &addr);
+			if (r)
+				goto err;
+
+			r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+						&next, true, delayed);
+			if (r)
+				goto err;
+		}
+		dma_fence_put(f);
+		f = next;
+
+		amdgpu_res_next(&cursor, size);
+	}
+err:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return r;
+}
+
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 65ec82141a8e..838251166883 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -155,6 +155,10 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 			       uint64_t size, bool tmz,
 			       struct dma_resv *resv,
 			       struct dma_fence **f);
+int amdgpu_clear_buffer(struct amdgpu_bo *bo,
+			struct dma_resv *resv,
+			struct dma_fence **fence,
+			bool delayed);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index d0e199cc8f17..ff74c324b5b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 {
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
 	u64 vis_usage = 0, max_bytes, min_block_size;
 	struct amdgpu_vram_mgr_resource *vres;
 	u64 size, remaining_size, lpfn, fpfn;
@@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
 		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
 
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+
 	if (fpfn || lpfn != mgr->mm.size)
 		/* Allocate blocks in desired range */
 		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@ -579,7 +583,9 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 	struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(res->bo);
 	struct drm_buddy *mm = &mgr->mm;
+	struct dma_fence *fence = NULL;
 	struct drm_buddy_block *block;
 	uint64_t vis_usage = 0;
 
@@ -589,7 +595,13 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
 
 	amdgpu_vram_mgr_do_reserve(man);
 
-	drm_buddy_free_list(mm, &vres->blocks, 0);
+	/* Clear all the blocks in free path */
+	if (!amdgpu_fill_buffer(bo, 0, NULL, &fence, true)) {
+		vres->flags |= DRM_BUDDY_CLEARED;
+		dma_fence_put(fence);
+	}
+
+	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
 	mutex_unlock(&mgr->lock);
 
 	atomic64_sub(vis_usage, &mgr->vis_usage);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 0e04e42cf809..8478522d7366 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
 	return (u64)PAGE_SIZE << drm_buddy_block_order(block);
 }
 
+static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_is_clear(block);
+}
+
 static inline struct amdgpu_vram_mgr_resource *
 to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
 {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-07 15:11 ` [PATCH 2/2] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
@ 2023-12-07 16:46   ` Alex Deucher
  2023-12-08 10:00   ` Christian König
  1 sibling, 0 replies; 14+ messages in thread
From: Alex Deucher @ 2023-12-07 16:46 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam
  Cc: alexander.deucher, matthew.auld, amd-gfx, dri-devel, christian.koenig

On Thu, Dec 7, 2023 at 10:12 AM Arunpravin Paneer Selvam
<Arunpravin.PaneerSelvam@amd.com> wrote:
>
> Add clear page support in vram memory region.
>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 13 +++--
>  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 50 +++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  4 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 14 +++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>  6 files changed, 105 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index cef920a93924..bc4ea87f8b5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -39,6 +39,7 @@
>  #include "amdgpu.h"
>  #include "amdgpu_trace.h"
>  #include "amdgpu_amdkfd.h"
> +#include "amdgpu_vram_mgr.h"
>
>  /**
>   * DOC: amdgpu_object
> @@ -629,15 +630,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>
>         if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>             bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> -               struct dma_fence *fence;
> +               struct dma_fence *fence = NULL;
>
> -               r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
> +               r = amdgpu_clear_buffer(bo, bo->tbo.base.resv, &fence, true);
>                 if (unlikely(r))
>                         goto fail_unreserve;
>
> -               dma_resv_add_fence(bo->tbo.base.resv, fence,
> -                                  DMA_RESV_USAGE_KERNEL);
> -               dma_fence_put(fence);
> +               if (fence) {
> +                       dma_resv_add_fence(bo->tbo.base.resv, fence,
> +                                          DMA_RESV_USAGE_KERNEL);
> +                       dma_fence_put(fence);
> +               }
>         }
>         if (!bp->resv)
>                 amdgpu_bo_unreserve(bo);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> index 381101d2bf05..50fcd86e1033 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
>         }
>  }
>
> +/**
> + * amdgpu_res_cleared - check if blocks are cleared
> + *
> + * @cur: the cursor to extract the block
> + *
> + * Check if the @cur block is cleared
> + */
> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
> +{
> +       struct drm_buddy_block *block;
> +
> +       switch (cur->mem_type) {
> +       case TTM_PL_VRAM:
> +               block = cur->node;
> +
> +               if (!amdgpu_vram_mgr_is_cleared(block))
> +                       return false;
> +               break;
> +       default:
> +               return false;
> +       }
> +
> +       return true;
> +}
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 05991c5c8ddb..6d7514e8f40c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2222,6 +2222,56 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>         return 0;
>  }
>
> +int amdgpu_clear_buffer(struct amdgpu_bo *bo,

amdgpu_ttm_clear_buffer() for naming consistency.

Alex

> +                       struct dma_resv *resv,
> +                       struct dma_fence **fence,
> +                       bool delayed)
> +{
> +       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +       struct amdgpu_res_cursor cursor;
> +       struct dma_fence *f = NULL;
> +       u64 addr;
> +       int r;
> +
> +       if (!adev->mman.buffer_funcs_enabled)
> +               return -EINVAL;
> +
> +       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
> +
> +       mutex_lock(&adev->mman.gtt_window_lock);
> +       while (cursor.remaining) {
> +               struct dma_fence *next = NULL;
> +               u64 size;
> +
> +               /* Never clear more than 256MiB at once to avoid timeouts */
> +               size = min(cursor.size, 256ULL << 20);
> +
> +               if (!amdgpu_res_cleared(&cursor)) {
> +                       r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> +                                                 1, ring, false, &size, &addr);
> +                       if (r)
> +                               goto err;
> +
> +                       r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> +                                               &next, true, delayed);
> +                       if (r)
> +                               goto err;
> +               }
> +               dma_fence_put(f);
> +               f = next;
> +
> +               amdgpu_res_next(&cursor, size);
> +       }
> +err:
> +       mutex_unlock(&adev->mman.gtt_window_lock);
> +       if (fence)
> +               *fence = dma_fence_get(f);
> +       dma_fence_put(f);
> +
> +       return r;
> +}
> +
>  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>                         uint32_t src_data,
>                         struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 65ec82141a8e..838251166883 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -155,6 +155,10 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>                                uint64_t size, bool tmz,
>                                struct dma_resv *resv,
>                                struct dma_fence **f);
> +int amdgpu_clear_buffer(struct amdgpu_bo *bo,
> +                       struct dma_resv *resv,
> +                       struct dma_fence **fence,
> +                       bool delayed);
>  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>                         uint32_t src_data,
>                         struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index d0e199cc8f17..ff74c324b5b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>  {
>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
> +       struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>         u64 vis_usage = 0, max_bytes, min_block_size;
>         struct amdgpu_vram_mgr_resource *vres;
>         u64 size, remaining_size, lpfn, fpfn;
> @@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>         if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>                 vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>
> +       if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
> +               vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> +
>         if (fpfn || lpfn != mgr->mm.size)
>                 /* Allocate blocks in desired range */
>                 vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> @@ -579,7 +583,9 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>         struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
>         struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>         struct amdgpu_device *adev = to_amdgpu_device(mgr);
> +       struct amdgpu_bo *bo = ttm_to_amdgpu_bo(res->bo);
>         struct drm_buddy *mm = &mgr->mm;
> +       struct dma_fence *fence = NULL;
>         struct drm_buddy_block *block;
>         uint64_t vis_usage = 0;
>
> @@ -589,7 +595,13 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>
>         amdgpu_vram_mgr_do_reserve(man);
>
> -       drm_buddy_free_list(mm, &vres->blocks, 0);
> +       /* Clear all the blocks in free path */
> +       if (!amdgpu_fill_buffer(bo, 0, NULL, &fence, true)) {
> +               vres->flags |= DRM_BUDDY_CLEARED;
> +               dma_fence_put(fence);
> +       }
> +
> +       drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>         mutex_unlock(&mgr->lock);
>
>         atomic64_sub(vis_usage, &mgr->vis_usage);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> index 0e04e42cf809..8478522d7366 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>         return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>  }
>
> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
> +{
> +       return drm_buddy_block_is_clear(block);
> +}
> +
>  static inline struct amdgpu_vram_mgr_resource *
>  to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>  {
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-07 15:11 ` [PATCH 2/2] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
  2023-12-07 16:46   ` Alex Deucher
@ 2023-12-08 10:00   ` Christian König
  2023-12-08 19:53     ` Alex Deucher
  1 sibling, 1 reply; 14+ messages in thread
From: Christian König @ 2023-12-08 10:00 UTC (permalink / raw)
  To: Arunpravin Paneer Selvam, dri-devel, amd-gfx
  Cc: alexander.deucher, matthew.auld

Am 07.12.23 um 16:11 schrieb Arunpravin Paneer Selvam:
> Add clear page support in vram memory region.

The first patch looks good, but this here needs quite some work.

>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 13 +++--
>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 50 +++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  4 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 14 +++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
>   6 files changed, 105 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index cef920a93924..bc4ea87f8b5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -39,6 +39,7 @@
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
> +#include "amdgpu_vram_mgr.h"
>   
>   /**
>    * DOC: amdgpu_object
> @@ -629,15 +630,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>   
>   	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
>   	    bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> -		struct dma_fence *fence;
> +		struct dma_fence *fence = NULL;
>   
> -		r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
> +		r = amdgpu_clear_buffer(bo, bo->tbo.base.resv, &fence, true);
>   		if (unlikely(r))
>   			goto fail_unreserve;
>   
> -		dma_resv_add_fence(bo->tbo.base.resv, fence,
> -				   DMA_RESV_USAGE_KERNEL);
> -		dma_fence_put(fence);
> +		if (fence) {
> +			dma_resv_add_fence(bo->tbo.base.resv, fence,
> +					   DMA_RESV_USAGE_KERNEL);
> +			dma_fence_put(fence);
> +		}
>   	}
>   	if (!bp->resv)
>   		amdgpu_bo_unreserve(bo);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> index 381101d2bf05..50fcd86e1033 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
>   	}
>   }
>   
> +/**
> + * amdgpu_res_cleared - check if blocks are cleared
> + *
> + * @cur: the cursor to extract the block
> + *
> + * Check if the @cur block is cleared
> + */
> +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
> +{
> +	struct drm_buddy_block *block;
> +
> +	switch (cur->mem_type) {
> +	case TTM_PL_VRAM:
> +		block = cur->node;
> +
> +		if (!amdgpu_vram_mgr_is_cleared(block))
> +			return false;
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 05991c5c8ddb..6d7514e8f40c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2222,6 +2222,56 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
>   	return 0;
>   }
>   
> +int amdgpu_clear_buffer(struct amdgpu_bo *bo,
> +			struct dma_resv *resv,
> +			struct dma_fence **fence,
> +			bool delayed)

Drop the delayed parameter, that doesn't make any sense here.

And as Alex said please use an amdgpu_ttm_ prefix for the function name.

> +{
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +	struct amdgpu_res_cursor cursor;
> +	struct dma_fence *f = NULL;
> +	u64 addr;
> +	int r;
> +
> +	if (!adev->mman.buffer_funcs_enabled)
> +		return -EINVAL;
> +
> +	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
> +
> +	mutex_lock(&adev->mman.gtt_window_lock);
> +	while (cursor.remaining) {
> +		struct dma_fence *next = NULL;
> +		u64 size;
> +
> +		/* Never clear more than 256MiB at once to avoid timeouts */
> +		size = min(cursor.size, 256ULL << 20);
> +
> +		if (!amdgpu_res_cleared(&cursor)) {

This needs to come before the min(cursor.size....) directly above. I 
suggest a handling like this:

if (amdgpu_res_cleared(&cursor)) {
	amdgpu_res_next(&cursor, cursor.size);
	continue;
}

size = min(....

> +			r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> +						  1, ring, false, &size, &addr);
> +			if (r)
> +				goto err;
> +
> +			r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> +						&next, true, delayed);
> +			if (r)
> +				goto err;
> +		}
> +		dma_fence_put(f);
> +		f = next;
> +
> +		amdgpu_res_next(&cursor, size);
> +	}
> +err:
> +	mutex_unlock(&adev->mman.gtt_window_lock);
> +	if (fence)
> +		*fence = dma_fence_get(f);
> +	dma_fence_put(f);
> +
> +	return r;
> +}
> +
>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			uint32_t src_data,
>   			struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 65ec82141a8e..838251166883 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -155,6 +155,10 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>   			       uint64_t size, bool tmz,
>   			       struct dma_resv *resv,
>   			       struct dma_fence **f);
> +int amdgpu_clear_buffer(struct amdgpu_bo *bo,
> +			struct dma_resv *resv,
> +			struct dma_fence **fence,
> +			bool delayed);
>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			uint32_t src_data,
>   			struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index d0e199cc8f17..ff74c324b5b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>   {
>   	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>   	struct amdgpu_device *adev = to_amdgpu_device(mgr);
> +	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
>   	u64 vis_usage = 0, max_bytes, min_block_size;
>   	struct amdgpu_vram_mgr_resource *vres;
>   	u64 size, remaining_size, lpfn, fpfn;
> @@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>   	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>   		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
>   
> +	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
> +		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> +
>   	if (fpfn || lpfn != mgr->mm.size)
>   		/* Allocate blocks in desired range */
>   		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> @@ -579,7 +583,9 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>   	struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
>   	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>   	struct amdgpu_device *adev = to_amdgpu_device(mgr);
> +	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(res->bo);
>   	struct drm_buddy *mm = &mgr->mm;
> +	struct dma_fence *fence = NULL;
>   	struct drm_buddy_block *block;
>   	uint64_t vis_usage = 0;
>   
> @@ -589,7 +595,13 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
>   
>   	amdgpu_vram_mgr_do_reserve(man);
>   
> -	drm_buddy_free_list(mm, &vres->blocks, 0);
> +	/* Clear all the blocks in free path */
> +	if (!amdgpu_fill_buffer(bo, 0, NULL, &fence, true)) {
> +		vres->flags |= DRM_BUDDY_CLEARED;
> +		dma_fence_put(fence);
> +	}
> +

That's a pretty clear no-go. This is the backend and CS is done from the 
front end. E.g. can't properly wait for the fence for example.

Instead use the AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE flag for this.

IIRC we already always set this flag when ras is enabled, just make it 
mandatory for now.

> +	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
>   	mutex_unlock(&mgr->lock);
>   
>   	atomic64_sub(vis_usage, &mgr->vis_usage);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> index 0e04e42cf809..8478522d7366 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
>   	return (u64)PAGE_SIZE << drm_buddy_block_order(block);
>   }
>   
> +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
> +{
> +	return drm_buddy_block_is_clear(block);
> +}
> +

You also need a functionality which resets all cleared blocks to 
uncleared after suspend/resume.

No idea how to do this, maybe Alex knows of hand.

Regards,
Christian.

>   static inline struct amdgpu_vram_mgr_resource *
>   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
>   {


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-08 10:00   ` Christian König
@ 2023-12-08 19:53     ` Alex Deucher
  2023-12-11  9:50       ` Christian König
  0 siblings, 1 reply; 14+ messages in thread
From: Alex Deucher @ 2023-12-08 19:53 UTC (permalink / raw)
  To: Christian König
  Cc: alexander.deucher, matthew.auld, amd-gfx, dri-devel,
	Arunpravin Paneer Selvam

On Fri, Dec 8, 2023 at 5:07 AM Christian König <christian.koenig@amd.com> wrote:
>
> Am 07.12.23 um 16:11 schrieb Arunpravin Paneer Selvam:
> > Add clear page support in vram memory region.
>
> The first patch looks good, but this here needs quite some work.
>
> >
> > Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 13 +++--
> >   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 ++++++++++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 50 +++++++++++++++++++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  4 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 14 +++++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
> >   6 files changed, 105 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > index cef920a93924..bc4ea87f8b5e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > @@ -39,6 +39,7 @@
> >   #include "amdgpu.h"
> >   #include "amdgpu_trace.h"
> >   #include "amdgpu_amdkfd.h"
> > +#include "amdgpu_vram_mgr.h"
> >
> >   /**
> >    * DOC: amdgpu_object
> > @@ -629,15 +630,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
> >
> >       if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
> >           bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> > -             struct dma_fence *fence;
> > +             struct dma_fence *fence = NULL;
> >
> > -             r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
> > +             r = amdgpu_clear_buffer(bo, bo->tbo.base.resv, &fence, true);
> >               if (unlikely(r))
> >                       goto fail_unreserve;
> >
> > -             dma_resv_add_fence(bo->tbo.base.resv, fence,
> > -                                DMA_RESV_USAGE_KERNEL);
> > -             dma_fence_put(fence);
> > +             if (fence) {
> > +                     dma_resv_add_fence(bo->tbo.base.resv, fence,
> > +                                        DMA_RESV_USAGE_KERNEL);
> > +                     dma_fence_put(fence);
> > +             }
> >       }
> >       if (!bp->resv)
> >               amdgpu_bo_unreserve(bo);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> > index 381101d2bf05..50fcd86e1033 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> > @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
> >       }
> >   }
> >
> > +/**
> > + * amdgpu_res_cleared - check if blocks are cleared
> > + *
> > + * @cur: the cursor to extract the block
> > + *
> > + * Check if the @cur block is cleared
> > + */
> > +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
> > +{
> > +     struct drm_buddy_block *block;
> > +
> > +     switch (cur->mem_type) {
> > +     case TTM_PL_VRAM:
> > +             block = cur->node;
> > +
> > +             if (!amdgpu_vram_mgr_is_cleared(block))
> > +                     return false;
> > +             break;
> > +     default:
> > +             return false;
> > +     }
> > +
> > +     return true;
> > +}
> > +
> >   #endif
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 05991c5c8ddb..6d7514e8f40c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -2222,6 +2222,56 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> >       return 0;
> >   }
> >
> > +int amdgpu_clear_buffer(struct amdgpu_bo *bo,
> > +                     struct dma_resv *resv,
> > +                     struct dma_fence **fence,
> > +                     bool delayed)
>
> Drop the delayed parameter, that doesn't make any sense here.
>
> And as Alex said please use an amdgpu_ttm_ prefix for the function name.
>
> > +{
> > +     struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> > +     struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> > +     struct amdgpu_res_cursor cursor;
> > +     struct dma_fence *f = NULL;
> > +     u64 addr;
> > +     int r;
> > +
> > +     if (!adev->mman.buffer_funcs_enabled)
> > +             return -EINVAL;
> > +
> > +     amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
> > +
> > +     mutex_lock(&adev->mman.gtt_window_lock);
> > +     while (cursor.remaining) {
> > +             struct dma_fence *next = NULL;
> > +             u64 size;
> > +
> > +             /* Never clear more than 256MiB at once to avoid timeouts */
> > +             size = min(cursor.size, 256ULL << 20);
> > +
> > +             if (!amdgpu_res_cleared(&cursor)) {
>
> This needs to come before the min(cursor.size....) directly above. I
> suggest a handling like this:
>
> if (amdgpu_res_cleared(&cursor)) {
>         amdgpu_res_next(&cursor, cursor.size);
>         continue;
> }
>
> size = min(....
>
> > +                     r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
> > +                                               1, ring, false, &size, &addr);
> > +                     if (r)
> > +                             goto err;
> > +
> > +                     r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
> > +                                             &next, true, delayed);
> > +                     if (r)
> > +                             goto err;
> > +             }
> > +             dma_fence_put(f);
> > +             f = next;
> > +
> > +             amdgpu_res_next(&cursor, size);
> > +     }
> > +err:
> > +     mutex_unlock(&adev->mman.gtt_window_lock);
> > +     if (fence)
> > +             *fence = dma_fence_get(f);
> > +     dma_fence_put(f);
> > +
> > +     return r;
> > +}
> > +
> >   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> >                       uint32_t src_data,
> >                       struct dma_resv *resv,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> > index 65ec82141a8e..838251166883 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> > @@ -155,6 +155,10 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> >                              uint64_t size, bool tmz,
> >                              struct dma_resv *resv,
> >                              struct dma_fence **f);
> > +int amdgpu_clear_buffer(struct amdgpu_bo *bo,
> > +                     struct dma_resv *resv,
> > +                     struct dma_fence **fence,
> > +                     bool delayed);
> >   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> >                       uint32_t src_data,
> >                       struct dma_resv *resv,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> > index d0e199cc8f17..ff74c324b5b5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> > @@ -435,6 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> >   {
> >       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
> >       struct amdgpu_device *adev = to_amdgpu_device(mgr);
> > +     struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
> >       u64 vis_usage = 0, max_bytes, min_block_size;
> >       struct amdgpu_vram_mgr_resource *vres;
> >       u64 size, remaining_size, lpfn, fpfn;
> > @@ -486,6 +487,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> >       if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
> >               vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
> >
> > +     if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
> > +             vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
> > +
> >       if (fpfn || lpfn != mgr->mm.size)
> >               /* Allocate blocks in desired range */
> >               vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> > @@ -579,7 +583,9 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
> >       struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
> >       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
> >       struct amdgpu_device *adev = to_amdgpu_device(mgr);
> > +     struct amdgpu_bo *bo = ttm_to_amdgpu_bo(res->bo);
> >       struct drm_buddy *mm = &mgr->mm;
> > +     struct dma_fence *fence = NULL;
> >       struct drm_buddy_block *block;
> >       uint64_t vis_usage = 0;
> >
> > @@ -589,7 +595,13 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
> >
> >       amdgpu_vram_mgr_do_reserve(man);
> >
> > -     drm_buddy_free_list(mm, &vres->blocks, 0);
> > +     /* Clear all the blocks in free path */
> > +     if (!amdgpu_fill_buffer(bo, 0, NULL, &fence, true)) {
> > +             vres->flags |= DRM_BUDDY_CLEARED;
> > +             dma_fence_put(fence);
> > +     }
> > +
>
> That's a pretty clear no-go. This is the backend and CS is done from the
> front end. E.g. can't properly wait for the fence for example.
>
> Instead use the AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE flag for this.
>
> IIRC we already always set this flag when ras is enabled, just make it
> mandatory for now.
>
> > +     drm_buddy_free_list(mm, &vres->blocks, vres->flags);
> >       mutex_unlock(&mgr->lock);
> >
> >       atomic64_sub(vis_usage, &mgr->vis_usage);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> > index 0e04e42cf809..8478522d7366 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
> > @@ -53,6 +53,11 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
> >       return (u64)PAGE_SIZE << drm_buddy_block_order(block);
> >   }
> >
> > +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
> > +{
> > +     return drm_buddy_block_is_clear(block);
> > +}
> > +
>
> You also need a functionality which resets all cleared blocks to
> uncleared after suspend/resume.
>
> No idea how to do this, maybe Alex knows of hand.

Since the buffers are cleared on creation, is there actually anything to do?

Alex

>
> Regards,
> Christian.
>
> >   static inline struct amdgpu_vram_mgr_resource *
> >   to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
> >   {
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-08 19:53     ` Alex Deucher
@ 2023-12-11  9:50       ` Christian König
  2023-12-11 17:22         ` Alex Deucher
  2023-12-11 23:32         ` Felix Kuehling
  0 siblings, 2 replies; 14+ messages in thread
From: Christian König @ 2023-12-11  9:50 UTC (permalink / raw)
  To: Alex Deucher, Christian König
  Cc: alexander.deucher, dri-devel, matthew.auld, amd-gfx,
	Arunpravin Paneer Selvam

[-- Attachment #1: Type: text/plain, Size: 749 bytes --]

Am 08.12.23 um 20:53 schrieb Alex Deucher:
> [SNIP]
>> You also need a functionality which resets all cleared blocks to
>> uncleared after suspend/resume.
>>
>> No idea how to do this, maybe Alex knows of hand.
> Since the buffers are cleared on creation, is there actually anything to do?

Well exactly that's the problem, the buffers are no longer always 
cleared on creation with this patch.

Instead we clear on free, track which areas are cleared and clear only 
the ones which aren't cleared yet on creation.

So some cases need special handling. E.g. when the engine is not 
initialized yet or suspend/resume.

In theory after a suspend/resume cycle the VRAM is cleared to zeros, but 
in practice that's not always true.

Christian.

>
> Alex

[-- Attachment #2: Type: text/html, Size: 1478 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-11  9:50       ` Christian König
@ 2023-12-11 17:22         ` Alex Deucher
  2023-12-11 23:32         ` Felix Kuehling
  1 sibling, 0 replies; 14+ messages in thread
From: Alex Deucher @ 2023-12-11 17:22 UTC (permalink / raw)
  To: Christian König
  Cc: Arunpravin Paneer Selvam, dri-devel, amd-gfx, alexander.deucher,
	Christian König, matthew.auld

On Mon, Dec 11, 2023 at 4:50 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 08.12.23 um 20:53 schrieb Alex Deucher:
>
> [SNIP]
>
> You also need a functionality which resets all cleared blocks to
> uncleared after suspend/resume.
>
> No idea how to do this, maybe Alex knows of hand.
>
> Since the buffers are cleared on creation, is there actually anything to do?
>
>
> Well exactly that's the problem, the buffers are no longer always cleared on creation with this patch.
>
> Instead we clear on free, track which areas are cleared and clear only the ones which aren't cleared yet on creation.
>
> So some cases need special handling. E.g. when the engine is not initialized yet or suspend/resume.
>
> In theory after a suspend/resume cycle the VRAM is cleared to zeros, but in practice that's not always true.

The vbios asic_init table will clear vram on boards with RAS, but not on others.

Alex

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-11  9:50       ` Christian König
  2023-12-11 17:22         ` Alex Deucher
@ 2023-12-11 23:32         ` Felix Kuehling
  2023-12-13 14:20           ` Christian König
  1 sibling, 1 reply; 14+ messages in thread
From: Felix Kuehling @ 2023-12-11 23:32 UTC (permalink / raw)
  To: Christian König, Alex Deucher, Christian König
  Cc: alexander.deucher, amd-gfx, matthew.auld, dri-devel,
	Arunpravin Paneer Selvam


On 2023-12-11 04:50, Christian König wrote:
> Am 08.12.23 um 20:53 schrieb Alex Deucher:
>> [SNIP]
>>> You also need a functionality which resets all cleared blocks to
>>> uncleared after suspend/resume.
>>>
>>> No idea how to do this, maybe Alex knows of hand.
>> Since the buffers are cleared on creation, is there actually anything to do?
>
> Well exactly that's the problem, the buffers are no longer always 
> cleared on creation with this patch.
>
> Instead we clear on free, track which areas are cleared and clear only 
> the ones which aren't cleared yet on creation.

The code I added for clearing-on-free a long time ago, does not clear to 
0, but to a non-0 poison value. That was meant to make it easier to 
catch applications incorrectly relying on 0-initialized memory. Is that 
being changed? I didn't see it in this patch series.

Regards,
   Felix


>
> So some cases need special handling. E.g. when the engine is not 
> initialized yet or suspend/resume.
>
> In theory after a suspend/resume cycle the VRAM is cleared to zeros, 
> but in practice that's not always true.
>
> Christian.
>
>> Alex

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-11 23:32         ` Felix Kuehling
@ 2023-12-13 14:20           ` Christian König
  2023-12-13 15:39             ` Felix Kuehling
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2023-12-13 14:20 UTC (permalink / raw)
  To: Felix Kuehling, Christian König, Alex Deucher
  Cc: alexander.deucher, amd-gfx, matthew.auld, dri-devel,
	Arunpravin Paneer Selvam

Am 12.12.23 um 00:32 schrieb Felix Kuehling:
>
> On 2023-12-11 04:50, Christian König wrote:
>> Am 08.12.23 um 20:53 schrieb Alex Deucher:
>>> [SNIP]
>>>> You also need a functionality which resets all cleared blocks to
>>>> uncleared after suspend/resume.
>>>>
>>>> No idea how to do this, maybe Alex knows of hand.
>>> Since the buffers are cleared on creation, is there actually 
>>> anything to do?
>>
>> Well exactly that's the problem, the buffers are no longer always 
>> cleared on creation with this patch.
>>
>> Instead we clear on free, track which areas are cleared and clear 
>> only the ones which aren't cleared yet on creation.
>
> The code I added for clearing-on-free a long time ago, does not clear 
> to 0, but to a non-0 poison value. That was meant to make it easier to 
> catch applications incorrectly relying on 0-initialized memory. Is 
> that being changed? I didn't see it in this patch series.

Yeah, Arun stumbled over that as well. Any objections that we fill with 
zeros instead or is that poison value something necessary for debugging?

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>>
>> So some cases need special handling. E.g. when the engine is not 
>> initialized yet or suspend/resume.
>>
>> In theory after a suspend/resume cycle the VRAM is cleared to zeros, 
>> but in practice that's not always true.
>>
>> Christian.
>>
>>> Alex


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-13 14:20           ` Christian König
@ 2023-12-13 15:39             ` Felix Kuehling
  2023-12-13 15:46               ` Christian König
  2023-12-13 15:46               ` Michel Dänzer
  0 siblings, 2 replies; 14+ messages in thread
From: Felix Kuehling @ 2023-12-13 15:39 UTC (permalink / raw)
  To: Christian König, Christian König, Alex Deucher
  Cc: alexander.deucher, amd-gfx, matthew.auld, dri-devel,
	Arunpravin Paneer Selvam

On 2023-12-13 9:20, Christian König wrote:
> Am 12.12.23 um 00:32 schrieb Felix Kuehling:
>>
>> On 2023-12-11 04:50, Christian König wrote:
>>> Am 08.12.23 um 20:53 schrieb Alex Deucher:
>>>> [SNIP]
>>>>> You also need a functionality which resets all cleared blocks to
>>>>> uncleared after suspend/resume.
>>>>>
>>>>> No idea how to do this, maybe Alex knows of hand.
>>>> Since the buffers are cleared on creation, is there actually 
>>>> anything to do?
>>>
>>> Well exactly that's the problem, the buffers are no longer always 
>>> cleared on creation with this patch.
>>>
>>> Instead we clear on free, track which areas are cleared and clear 
>>> only the ones which aren't cleared yet on creation.
>>
>> The code I added for clearing-on-free a long time ago, does not clear 
>> to 0, but to a non-0 poison value. That was meant to make it easier 
>> to catch applications incorrectly relying on 0-initialized memory. Is 
>> that being changed? I didn't see it in this patch series.
>
> Yeah, Arun stumbled over that as well. Any objections that we fill 
> with zeros instead or is that poison value something necessary for 
> debugging?

I don't think it's strictly necessary. But it may encourage sloppy user 
mode programming to rely on 0-initialized memory that ends up breaking 
in corner cases or on older kernels.

That said, I see that this patch series adds clearing of memory in the 
VRAM manager, but it doesn't remove the clearing for 
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE in amdgpu_bo_release_notify and 
amdgpu_move_blit. This will lead to duplicate work.

I'm also not sure how the clearing added in this patch series will 
affect free-latency observed in user mode. Will this be synchronous and 
cause the user mode thread to stall while the memory is being cleared?

Regards,
   Felix


>
> Regards,
> Christian.
>
>>
>> Regards,
>>   Felix
>>
>>
>>>
>>> So some cases need special handling. E.g. when the engine is not 
>>> initialized yet or suspend/resume.
>>>
>>> In theory after a suspend/resume cycle the VRAM is cleared to zeros, 
>>> but in practice that's not always true.
>>>
>>> Christian.
>>>
>>>> Alex
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-13 15:39             ` Felix Kuehling
@ 2023-12-13 15:46               ` Christian König
  2023-12-13 15:46               ` Michel Dänzer
  1 sibling, 0 replies; 14+ messages in thread
From: Christian König @ 2023-12-13 15:46 UTC (permalink / raw)
  To: Felix Kuehling, Christian König, Alex Deucher
  Cc: alexander.deucher, amd-gfx, matthew.auld, dri-devel,
	Arunpravin Paneer Selvam

Am 13.12.23 um 16:39 schrieb Felix Kuehling:
> On 2023-12-13 9:20, Christian König wrote:
>> Am 12.12.23 um 00:32 schrieb Felix Kuehling:
>>>
>>> On 2023-12-11 04:50, Christian König wrote:
>>>> Am 08.12.23 um 20:53 schrieb Alex Deucher:
>>>>> [SNIP]
>>>>>> You also need a functionality which resets all cleared blocks to
>>>>>> uncleared after suspend/resume.
>>>>>>
>>>>>> No idea how to do this, maybe Alex knows of hand.
>>>>> Since the buffers are cleared on creation, is there actually 
>>>>> anything to do?
>>>>
>>>> Well exactly that's the problem, the buffers are no longer always 
>>>> cleared on creation with this patch.
>>>>
>>>> Instead we clear on free, track which areas are cleared and clear 
>>>> only the ones which aren't cleared yet on creation.
>>>
>>> The code I added for clearing-on-free a long time ago, does not 
>>> clear to 0, but to a non-0 poison value. That was meant to make it 
>>> easier to catch applications incorrectly relying on 0-initialized 
>>> memory. Is that being changed? I didn't see it in this patch series.
>>
>> Yeah, Arun stumbled over that as well. Any objections that we fill 
>> with zeros instead or is that poison value something necessary for 
>> debugging?
>
> I don't think it's strictly necessary. But it may encourage sloppy 
> user mode programming to rely on 0-initialized memory that ends up 
> breaking in corner cases or on older kernels.
>
> That said, I see that this patch series adds clearing of memory in the 
> VRAM manager, but it doesn't remove the clearing for 
> AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE in amdgpu_bo_release_notify and 
> amdgpu_move_blit. This will lead to duplicate work.
>
> I'm also not sure how the clearing added in this patch series will 
> affect free-latency observed in user mode. Will this be synchronous 
> and cause the user mode thread to stall while the memory is being 
> cleared?

Yeah, that's not fully working at the moment. I already pointed out as 
well that Arun should remove the clearing in the VRAM manager.

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Regards,
>>>   Felix
>>>
>>>
>>>>
>>>> So some cases need special handling. E.g. when the engine is not 
>>>> initialized yet or suspend/resume.
>>>>
>>>> In theory after a suspend/resume cycle the VRAM is cleared to 
>>>> zeros, but in practice that's not always true.
>>>>
>>>> Christian.
>>>>
>>>>> Alex
>>


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-13 15:39             ` Felix Kuehling
  2023-12-13 15:46               ` Christian König
@ 2023-12-13 15:46               ` Michel Dänzer
  2023-12-14 10:31                 ` Christian König
  1 sibling, 1 reply; 14+ messages in thread
From: Michel Dänzer @ 2023-12-13 15:46 UTC (permalink / raw)
  To: Felix Kuehling, Christian König, Christian König, Alex Deucher
  Cc: alexander.deucher, dri-devel, matthew.auld, amd-gfx,
	Arunpravin Paneer Selvam

On 2023-12-13 16:39, Felix Kuehling wrote:
> On 2023-12-13 9:20, Christian König wrote:
>> Am 12.12.23 um 00:32 schrieb Felix Kuehling:
>>> On 2023-12-11 04:50, Christian König wrote:
>>>> Am 08.12.23 um 20:53 schrieb Alex Deucher:
>>>>> [SNIP]
>>>>>> You also need a functionality which resets all cleared blocks to
>>>>>> uncleared after suspend/resume.
>>>>>>
>>>>>> No idea how to do this, maybe Alex knows of hand.
>>>>> Since the buffers are cleared on creation, is there actually anything to do?
>>>>
>>>> Well exactly that's the problem, the buffers are no longer always cleared on creation with this patch.
>>>>
>>>> Instead we clear on free, track which areas are cleared and clear only the ones which aren't cleared yet on creation.
>>>
>>> The code I added for clearing-on-free a long time ago, does not clear to 0, but to a non-0 poison value. That was meant to make it easier to catch applications incorrectly relying on 0-initialized memory. Is that being changed? I didn't see it in this patch series.
>>
>> Yeah, Arun stumbled over that as well. Any objections that we fill with zeros instead or is that poison value something necessary for debugging?
> 
> I don't think it's strictly necessary. But it may encourage sloppy user mode programming to rely on 0-initialized memory that ends up breaking in corner cases or on older kernels.

From a security PoV, the kernel should never return uncleared memory to (at least unprivileged) user space. This series seems like a big step in that direction.


-- 
Earthling Michel Dänzer            |                  https://redhat.com
Libre software enthusiast          |         Mesa and Xwayland developer


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-13 15:46               ` Michel Dänzer
@ 2023-12-14 10:31                 ` Christian König
  2023-12-14 11:14                   ` Michel Dänzer
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2023-12-14 10:31 UTC (permalink / raw)
  To: Michel Dänzer, Felix Kuehling, Christian König, Alex Deucher
  Cc: alexander.deucher, dri-devel, matthew.auld, amd-gfx,
	Arunpravin Paneer Selvam

[-- Attachment #1: Type: text/plain, Size: 475 bytes --]

Am 13.12.23 um 16:46 schrieb Michel Dänzer:
>  From a security PoV, the kernel should never return uncleared memory to (at least unprivileged) user space. This series seems like a big step in that direction.

Well please take a look at the MAP_UNINITIALIZED flag for mmap(). We 
even have the functionality to return uninitialized system memory when 
the kernel compile option for this is set since this is an important 
optimization for many use cases.

Regards,
Christian.

[-- Attachment #2: Type: text/html, Size: 881 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: Enable clear page functionality
  2023-12-14 10:31                 ` Christian König
@ 2023-12-14 11:14                   ` Michel Dänzer
  0 siblings, 0 replies; 14+ messages in thread
From: Michel Dänzer @ 2023-12-14 11:14 UTC (permalink / raw)
  To: Christian König, Felix Kuehling, Christian König, Alex Deucher
  Cc: alexander.deucher, amd-gfx, matthew.auld, dri-devel,
	Arunpravin Paneer Selvam

On 2023-12-14 11:31, Christian König wrote:
> Am 13.12.23 um 16:46 schrieb Michel Dänzer:
>> From a security PoV, the kernel should never return uncleared memory to (at least unprivileged) user space. This series seems like a big step in that direction.
> 
> Well please take a look at the MAP_UNINITIALIZED flag for mmap().

       MAP_UNINITIALIZED (since Linux 2.6.33)
              Don't  clear  anonymous pages.  This flag is intended to improve
              performance on embedded devices.  This flag is honored  only  if
              the  kernel was configured with the CONFIG_MMAP_ALLOW_UNINITIAL‐
              IZED option.  Because of the security implications, that  option
              is  normally  enabled  only  on  embedded devices (i.e., devices
              where one has complete control of the contents of user memory).


> We even have the functionality to return uninitialized system memory when the kernel compile option for this is set

From mm/Kconfig:

config MMAP_ALLOW_UNINITIALIZED 
        bool "Allow mmapped anonymous memory to be uninitialized"
        depends on EXPERT && !MMU
        default n
        help
          Normally, and according to the Linux spec, anonymous memory obtained
          from mmap() has its contents cleared before it is passed to
          userspace.  Enabling this config option allows you to request that
          mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus
          providing a huge performance boost.  If this option is not enabled,
          then the flag will be ignored.
          
          This is taken advantage of by uClibc's malloc(), and also by
          ELF-FDPIC binfmt's brk and stack allocator.
          
          Because of the obvious security issues, this option should only be
          enabled on embedded devices where you control what is run in
          userspace.  Since that isn't generally a problem on no-MMU systems,
          it is normally safe to say Y here.
        
          See Documentation/admin-guide/mm/nommu-mmap.rst for more information.


Both looks consistent with what I wrote.


> since this is an important optimization for many use cases.

Per above, it's available only on platforms without MMU.


-- 
Earthling Michel Dänzer            |                  https://redhat.com
Libre software enthusiast          |         Mesa and Xwayland developer


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2023-12-14 11:14 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-07 15:11 [PATCH 1/2] drm/buddy: Implement tracking clear page feature Arunpravin Paneer Selvam
2023-12-07 15:11 ` [PATCH 2/2] drm/amdgpu: Enable clear page functionality Arunpravin Paneer Selvam
2023-12-07 16:46   ` Alex Deucher
2023-12-08 10:00   ` Christian König
2023-12-08 19:53     ` Alex Deucher
2023-12-11  9:50       ` Christian König
2023-12-11 17:22         ` Alex Deucher
2023-12-11 23:32         ` Felix Kuehling
2023-12-13 14:20           ` Christian König
2023-12-13 15:39             ` Felix Kuehling
2023-12-13 15:46               ` Christian König
2023-12-13 15:46               ` Michel Dänzer
2023-12-14 10:31                 ` Christian König
2023-12-14 11:14                   ` Michel Dänzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.