All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: further lower VRAM allocation overhead
@ 2021-07-13 19:17 Eric Huang
  2021-07-13 19:19 ` Eric Huang
  0 siblings, 1 reply; 11+ messages in thread
From: Eric Huang @ 2021-07-13 19:17 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang, Felix Kuehling, Christian König

For allocations larger than 48MiB we need more than a page for the
housekeeping in the worst case resulting in the usual vmalloc overhead.

Try to avoid this by assuming the good case and only falling back to the
worst case if this didn't worked.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 +++++++++++++++-----
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index be4261c4512e..ecbe05e1db66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
 static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			       struct ttm_buffer_object *tbo,
 			       const struct ttm_place *place,
+			       unsigned long num_nodes,
+			       unsigned long pages_per_node,
 			       struct ttm_resource *mem)
 {
-	unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
+	unsigned long lpfn, pages_left, pages;
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
 	uint64_t vis_usage = 0, mem_bytes, max_bytes;
@@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 		return -ENOSPC;
 	}
 
-	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-		pages_per_node = ~0ul;
-		num_nodes = 1;
-	} else {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		pages_per_node = HPAGE_PMD_NR;
-#else
-		/* default to 2MB */
-		pages_per_node = 2UL << (20UL - PAGE_SHIFT);
-#endif
-		pages_per_node = max_t(uint32_t, pages_per_node,
-				       mem->page_alignment);
-		num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
-	}
-
 	nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
 			       GFP_KERNEL | __GFP_ZERO);
 	if (!nodes) {
@@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	i = 0;
 	spin_lock(&mgr->lock);
 	while (pages_left) {
-		uint32_t alignment = mem->page_alignment;
+		unsigned long alignment = mem->page_alignment;
+
+		if (i >= num_nodes) {
+			r = -E2BIG;
+			goto error;
+		}
 
 		if (pages >= pages_per_node)
 			alignment = pages_per_node;
@@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	return r;
 }
 
+/**
+ * amdgpu_vram_mgr_alloc - allocate new range
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @mem: the resulting mem object
+ *
+ * Allocate VRAM for the given BO.
+ */
+static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
+				 struct ttm_buffer_object *tbo,
+				 const struct ttm_place *place,
+				 struct ttm_resource *mem)
+{
+	unsigned long num_nodes, pages_per_node;
+	int r;
+
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+		return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	pages_per_node = HPAGE_PMD_NR;
+#else
+	/* default to 2MB */
+	pages_per_node = 2UL << (20UL - PAGE_SHIFT);
+#endif
+	pages_per_node = max_t(uint32_t, pages_per_node,
+			       mem->page_alignment);
+	num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
+
+	if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
+		r = amdgpu_vram_mgr_new(man, tbo, place,
+				PAGE_SIZE / sizeof(struct drm_mm_node),
+				pages_per_node,	mem);
+		if (r != -E2BIG)
+			return r;
+	}
+
+	return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
+				   mem);
+}
+
 /**
  * amdgpu_vram_mgr_del - free ranges
  *
@@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
 }
 
 static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
-	.alloc	= amdgpu_vram_mgr_new,
+	.alloc	= amdgpu_vram_mgr_alloc,
 	.free	= amdgpu_vram_mgr_del,
 	.debug	= amdgpu_vram_mgr_debug
 };
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread
* [PATCH] drm/amdgpu: further lower VRAM allocation overhead
@ 2021-07-13 13:32 Christian König
  2021-07-13 16:11 ` Felix Kuehling
  0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2021-07-13 13:32 UTC (permalink / raw)
  To: jinhuieric.huang
  Cc: felix.kuehling, amd-gfx, Luugi.Marsan, Jenny-Jing.Liu, Chris.Mason

For allocations larger than 48MiB we need more than a page for the
housekeeping in the worst case resulting in the usual vmalloc overhead.

Try to avoid this by assuming the good case and only falling back to the
worst case if this didn't worked.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 80 +++++++++++++++-----
 1 file changed, 60 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 2fd77c36a1ff..ab8c5e28df7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -361,19 +361,23 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
  * @man: TTM memory type manager
  * @tbo: TTM BO we need this range for
  * @place: placement flags and restrictions
- * @mem: the resulting mem object
+ * @num_nodes: number of page nodes to use.
+ * @pages_per_node: number of pages per node to use.
+ * @res: the resulting mem object
  *
  * Allocate VRAM for the given BO.
  */
 static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 			       struct ttm_buffer_object *tbo,
 			       const struct ttm_place *place,
+			       unsigned long num_nodes,
+			       unsigned long pages_per_node,
 			       struct ttm_resource **res)
 {
-	unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
 	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
 	struct amdgpu_device *adev = to_amdgpu_device(mgr);
 	uint64_t vis_usage = 0, mem_bytes, max_bytes;
+	unsigned long lpfn, pages_left, pages;
 	struct ttm_range_mgr_node *node;
 	struct drm_mm *mm = &mgr->mm;
 	enum drm_mm_insert_mode mode;
@@ -395,21 +399,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 		goto error_sub;
 	}
 
-	if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-		pages_per_node = ~0ul;
-		num_nodes = 1;
-	} else {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		pages_per_node = HPAGE_PMD_NR;
-#else
-		/* default to 2MB */
-		pages_per_node = 2UL << (20UL - PAGE_SHIFT);
-#endif
-		pages_per_node = max_t(uint32_t, pages_per_node,
-				       tbo->page_alignment);
-		num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
-	}
-
 	node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
 			GFP_KERNEL | __GFP_ZERO);
 	if (!node) {
@@ -431,10 +420,15 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	i = 0;
 	spin_lock(&mgr->lock);
 	while (pages_left) {
-		uint32_t alignment = tbo->page_alignment;
+		unsigned long alignment = tbo->page_alignment;
+
+		if (i >= num_nodes) {
+			r = -E2BIG;
+			goto error_free;
+		}
 
 		if (pages >= pages_per_node)
-			alignment = pages_per_node;
+			alignment = max(alignment, pages_per_node);
 
 		r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
 						alignment, 0, place->fpfn,
@@ -483,6 +477,52 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	return r;
 }
 
+/**
+ * amdgpu_vram_mgr_alloc - allocate new range
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @res: the resulting mem object
+ *
+ * Allocate VRAM for the given BO.
+ */
+static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
+				 struct ttm_buffer_object *tbo,
+				 const struct ttm_place *place,
+				 struct ttm_resource **res)
+{
+	unsigned long num_nodes, pages_per_node;
+	struct ttm_range_mgr_node *node;
+	int r;
+
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+		return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, res);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	pages_per_node = HPAGE_PMD_NR;
+#else
+	/* default to 2MB */
+	pages_per_node = 2UL << (20UL - PAGE_SHIFT);
+#endif
+	pages_per_node = max_t(uint32_t, pages_per_node, tbo->page_alignment);
+	num_nodes = DIV_ROUND_UP_ULL(PFN_UP(tbo->base.size), pages_per_node);
+
+	if (struct_size(node, mm_nodes, num_nodes) > PAGE_SIZE) {
+		size_t size = PAGE_SIZE;
+
+		size -= sizeof(struct ttm_range_mgr_node);
+		size /= sizeof(struct drm_mm_node);
+		r = amdgpu_vram_mgr_new(man, tbo, place, size, pages_per_node,
+					res);
+		if (r != -E2BIG)
+			return r;
+	}
+
+	return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
+				   res);
+}
+
 /**
  * amdgpu_vram_mgr_del - free ranges
  *
@@ -680,7 +720,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
 }
 
 static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
-	.alloc	= amdgpu_vram_mgr_new,
+	.alloc	= amdgpu_vram_mgr_alloc,
 	.free	= amdgpu_vram_mgr_del,
 	.debug	= amdgpu_vram_mgr_debug
 };
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-07-15 14:38 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-13 19:17 [PATCH] drm/amdgpu: further lower VRAM allocation overhead Eric Huang
2021-07-13 19:19 ` Eric Huang
2021-07-14  8:33   ` Christian König
2021-07-14  9:41     ` Pan, Xinhui
2021-07-14 12:17       ` Christian König
2021-07-15 13:50     ` Eric Huang
2021-07-15 14:38       ` Felix Kuehling
  -- strict thread matches above, loose matches on Subject: below --
2021-07-13 13:32 Christian König
2021-07-13 16:11 ` Felix Kuehling
2021-07-13 17:23   ` Eric Huang
2021-07-14 12:08   ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.