[PATCH] drm/amdgpu: further lower VRAM allocation overhead

* [PATCH] drm/amdgpu: further lower VRAM allocation overhead
@ 2021-07-13 19:17 Eric Huang
  2021-07-13 19:19 ` Eric Huang
  0 siblings, 1 reply; 11+ messages in thread
From: Eric Huang @ 2021-07-13 19:17 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang, Felix Kuehling, Christian König

For allocations larger than 48MiB we need more than a page for the
housekeeping in the worst case resulting in the usual vmalloc overhead.

Try to avoid this by assuming the good case and only falling back to the
worst case if this didn't worked.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 +++++++++++++++-----
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index be4261c4512e..ecbe05e1db66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
 static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			       struct ttm_buffer_object *tbo,
 			       const struct ttm_place *place,
+			       unsigned long num_nodes,
+			       unsigned long pages_per_node,
 			       struct ttm_resource *mem)
 {
-	unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
+	unsigned long lpfn, pages_left, pages;
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
 	uint64_t vis_usage = 0, mem_bytes, max_bytes;
@@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 		return -ENOSPC;
 	}
 
-	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-		pages_per_node = ~0ul;
-		num_nodes = 1;
-	} else {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		pages_per_node = HPAGE_PMD_NR;
-#else
-		/* default to 2MB */
-		pages_per_node = 2UL << (20UL - PAGE_SHIFT);
-#endif
-		pages_per_node = max_t(uint32_t, pages_per_node,
-				       mem->page_alignment);
-		num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
-	}
-
 	nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
 			       GFP_KERNEL | __GFP_ZERO);
 	if (!nodes) {
@@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	i = 0;
 	spin_lock(&mgr->lock);
 	while (pages_left) {
-		uint32_t alignment = mem->page_alignment;
+		unsigned long alignment = mem->page_alignment;
+
+		if (i >= num_nodes) {
+			r = -E2BIG;
+			goto error;
+		}
 
 		if (pages >= pages_per_node)
 			alignment = pages_per_node;
@@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	return r;
 }
 
+/**
+ * amdgpu_vram_mgr_alloc - allocate new range
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @mem: the resulting mem object
+ *
+ * Allocate VRAM for the given BO.
+ */
+static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
+				 struct ttm_buffer_object *tbo,
+				 const struct ttm_place *place,
+				 struct ttm_resource *mem)
+{
+	unsigned long num_nodes, pages_per_node;
+	int r;
+
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+		return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	pages_per_node = HPAGE_PMD_NR;
+#else
+	/* default to 2MB */
+	pages_per_node = 2UL << (20UL - PAGE_SHIFT);
+#endif
+	pages_per_node = max_t(uint32_t, pages_per_node,
+			       mem->page_alignment);
+	num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
+
+	if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
+		r = amdgpu_vram_mgr_new(man, tbo, place,
+				PAGE_SIZE / sizeof(struct drm_mm_node),
+				pages_per_node,	mem);
+		if (r != -E2BIG)
+			return r;
+	}
+
+	return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
+				   mem);
+}
+
 /**
  * amdgpu_vram_mgr_del - free ranges
  *
@@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
 }
 
 static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
-	.alloc	= amdgpu_vram_mgr_new,
+	.alloc	= amdgpu_vram_mgr_alloc,
 	.free	= amdgpu_vram_mgr_del,
 	.debug	= amdgpu_vram_mgr_debug
 };
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread