* [PATCH] drm/amdgpu: further lower VRAM allocation overhead
@ 2021-07-13 13:32 Christian König
2021-07-13 16:11 ` Felix Kuehling
0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2021-07-13 13:32 UTC (permalink / raw)
To: jinhuieric.huang
Cc: felix.kuehling, amd-gfx, Luugi.Marsan, Jenny-Jing.Liu, Chris.Mason
For allocations larger than 48MiB we need more than a page for the
housekeeping in the worst case resulting in the usual vmalloc overhead.
Try to avoid this by assuming the good case and only falling back to the
worst case if this didn't worked.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 80 +++++++++++++++-----
1 file changed, 60 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 2fd77c36a1ff..ab8c5e28df7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -361,19 +361,23 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
* @man: TTM memory type manager
* @tbo: TTM BO we need this range for
* @place: placement flags and restrictions
- * @mem: the resulting mem object
+ * @num_nodes: number of page nodes to use.
+ * @pages_per_node: number of pages per node to use.
+ * @res: the resulting mem object
*
* Allocate VRAM for the given BO.
*/
static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
struct ttm_buffer_object *tbo,
const struct ttm_place *place,
+ unsigned long num_nodes,
+ unsigned long pages_per_node,
struct ttm_resource **res)
{
- unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
uint64_t vis_usage = 0, mem_bytes, max_bytes;
+ unsigned long lpfn, pages_left, pages;
struct ttm_range_mgr_node *node;
struct drm_mm *mm = &mgr->mm;
enum drm_mm_insert_mode mode;
@@ -395,21 +399,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
goto error_sub;
}
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- pages_per_node = ~0ul;
- num_nodes = 1;
- } else {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- pages_per_node = HPAGE_PMD_NR;
-#else
- /* default to 2MB */
- pages_per_node = 2UL << (20UL - PAGE_SHIFT);
-#endif
- pages_per_node = max_t(uint32_t, pages_per_node,
- tbo->page_alignment);
- num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
- }
-
node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
GFP_KERNEL | __GFP_ZERO);
if (!node) {
@@ -431,10 +420,15 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
i = 0;
spin_lock(&mgr->lock);
while (pages_left) {
- uint32_t alignment = tbo->page_alignment;
+ unsigned long alignment = tbo->page_alignment;
+
+ if (i >= num_nodes) {
+ r = -E2BIG;
+ goto error_free;
+ }
if (pages >= pages_per_node)
- alignment = pages_per_node;
+ alignment = max(alignment, pages_per_node);
r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
alignment, 0, place->fpfn,
@@ -483,6 +477,52 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
return r;
}
+/**
+ * amdgpu_vram_mgr_alloc - allocate new range
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @res: the resulting mem object
+ *
+ * Allocate VRAM for the given BO.
+ */
+static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ unsigned long num_nodes, pages_per_node;
+ struct ttm_range_mgr_node *node;
+ int r;
+
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+ return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, res);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ pages_per_node = HPAGE_PMD_NR;
+#else
+ /* default to 2MB */
+ pages_per_node = 2UL << (20UL - PAGE_SHIFT);
+#endif
+ pages_per_node = max_t(uint32_t, pages_per_node, tbo->page_alignment);
+ num_nodes = DIV_ROUND_UP_ULL(PFN_UP(tbo->base.size), pages_per_node);
+
+ if (struct_size(node, mm_nodes, num_nodes) > PAGE_SIZE) {
+ size_t size = PAGE_SIZE;
+
+ size -= sizeof(struct ttm_range_mgr_node);
+ size /= sizeof(struct drm_mm_node);
+ r = amdgpu_vram_mgr_new(man, tbo, place, size, pages_per_node,
+ res);
+ if (r != -E2BIG)
+ return r;
+ }
+
+ return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
+ res);
+}
+
/**
* amdgpu_vram_mgr_del - free ranges
*
@@ -680,7 +720,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
}
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
- .alloc = amdgpu_vram_mgr_new,
+ .alloc = amdgpu_vram_mgr_alloc,
.free = amdgpu_vram_mgr_del,
.debug = amdgpu_vram_mgr_debug
};
--
2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead
2021-07-13 13:32 [PATCH] drm/amdgpu: further lower VRAM allocation overhead Christian König
@ 2021-07-13 16:11 ` Felix Kuehling
2021-07-13 17:23 ` Eric Huang
2021-07-14 12:08 ` Christian König
0 siblings, 2 replies; 11+ messages in thread
From: Felix Kuehling @ 2021-07-13 16:11 UTC (permalink / raw)
To: Christian König, jinhuieric.huang
Cc: amd-gfx, Luugi.Marsan, Jenny-Jing.Liu, Chris.Mason
Am 2021-07-13 um 9:32 a.m. schrieb Christian König:
> For allocations larger than 48MiB we need more than a page for the
> housekeeping in the worst case resulting in the usual vmalloc overhead.
>
> Try to avoid this by assuming the good case and only falling back to the
> worst case if this didn't worked.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 80 +++++++++++++++-----
> 1 file changed, 60 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index 2fd77c36a1ff..ab8c5e28df7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -361,19 +361,23 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
> * @man: TTM memory type manager
> * @tbo: TTM BO we need this range for
> * @place: placement flags and restrictions
> - * @mem: the resulting mem object
> + * @num_nodes: number of page nodes to use.
> + * @pages_per_node: number of pages per node to use.
> + * @res: the resulting mem object
> *
> * Allocate VRAM for the given BO.
> */
> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> struct ttm_buffer_object *tbo,
> const struct ttm_place *place,
> + unsigned long num_nodes,
> + unsigned long pages_per_node,
> struct ttm_resource **res)
> {
> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
> struct amdgpu_device *adev = to_amdgpu_device(mgr);
> uint64_t vis_usage = 0, mem_bytes, max_bytes;
> + unsigned long lpfn, pages_left, pages;
> struct ttm_range_mgr_node *node;
> struct drm_mm *mm = &mgr->mm;
> enum drm_mm_insert_mode mode;
> @@ -395,21 +399,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> goto error_sub;
> }
>
> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> - pages_per_node = ~0ul;
> - num_nodes = 1;
> - } else {
> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> - pages_per_node = HPAGE_PMD_NR;
> -#else
> - /* default to 2MB */
> - pages_per_node = 2UL << (20UL - PAGE_SHIFT);
> -#endif
> - pages_per_node = max_t(uint32_t, pages_per_node,
> - tbo->page_alignment);
> - num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
> - }
> -
> node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
> GFP_KERNEL | __GFP_ZERO);
> if (!node) {
> @@ -431,10 +420,15 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> i = 0;
> spin_lock(&mgr->lock);
> while (pages_left) {
> - uint32_t alignment = tbo->page_alignment;
> + unsigned long alignment = tbo->page_alignment;
> +
> + if (i >= num_nodes) {
> + r = -E2BIG;
> + goto error_free;
> + }
>
> if (pages >= pages_per_node)
> - alignment = pages_per_node;
> + alignment = max(alignment, pages_per_node);
I don't understand this change. Is this an unrelated fix? pages_per_node
is already bumped up to tbo->page_alignment in amdgpu_vram_mgr_alloc. So
this "max" operation here seems redundant.
Other than that, the patch is
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
@JinHuiEric, can you confirm the performance improvement?
Thanks,
Felix
>
> r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
> alignment, 0, place->fpfn,
> @@ -483,6 +477,52 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> return r;
> }
>
> +/**
> + * amdgpu_vram_mgr_alloc - allocate new range
> + *
> + * @man: TTM memory type manager
> + * @tbo: TTM BO we need this range for
> + * @place: placement flags and restrictions
> + * @res: the resulting mem object
> + *
> + * Allocate VRAM for the given BO.
> + */
> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
> + struct ttm_buffer_object *tbo,
> + const struct ttm_place *place,
> + struct ttm_resource **res)
> +{
> + unsigned long num_nodes, pages_per_node;
> + struct ttm_range_mgr_node *node;
> + int r;
> +
> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, res);
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> + pages_per_node = HPAGE_PMD_NR;
> +#else
> + /* default to 2MB */
> + pages_per_node = 2UL << (20UL - PAGE_SHIFT);
> +#endif
> + pages_per_node = max_t(uint32_t, pages_per_node, tbo->page_alignment);
> + num_nodes = DIV_ROUND_UP_ULL(PFN_UP(tbo->base.size), pages_per_node);
> +
> + if (struct_size(node, mm_nodes, num_nodes) > PAGE_SIZE) {
> + size_t size = PAGE_SIZE;
> +
> + size -= sizeof(struct ttm_range_mgr_node);
> + size /= sizeof(struct drm_mm_node);
> + r = amdgpu_vram_mgr_new(man, tbo, place, size, pages_per_node,
> + res);
> + if (r != -E2BIG)
> + return r;
> + }
> +
> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
> + res);
> +}
> +
> /**
> * amdgpu_vram_mgr_del - free ranges
> *
> @@ -680,7 +720,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
> }
>
> static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
> - .alloc = amdgpu_vram_mgr_new,
> + .alloc = amdgpu_vram_mgr_alloc,
> .free = amdgpu_vram_mgr_del,
> .debug = amdgpu_vram_mgr_debug
> };
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead
2021-07-13 16:11 ` Felix Kuehling
@ 2021-07-13 17:23 ` Eric Huang
2021-07-14 12:08 ` Christian König
1 sibling, 0 replies; 11+ messages in thread
From: Eric Huang @ 2021-07-13 17:23 UTC (permalink / raw)
To: Felix Kuehling, Christian König
Cc: amd-gfx, Luugi.Marsan, Jenny-Jing.Liu, Chris.Mason
I am converting codes into amd-staging-drm-next. Theoretically it will
improve a lot on the latency, the size of the array allocated is 24
(PAGE_SIZE/struct drm_mm_node) with this patch, and it was 8192 before.
So the latency should be reduced by 98 us.
Regards,
Eric
On 2021-07-13 12:11 p.m., Felix Kuehling wrote:
> Am 2021-07-13 um 9:32 a.m. schrieb Christian König:
>> For allocations larger than 48MiB we need more than a page for the
>> housekeeping in the worst case resulting in the usual vmalloc overhead.
>>
>> Try to avoid this by assuming the good case and only falling back to the
>> worst case if this didn't worked.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 80 +++++++++++++++-----
>> 1 file changed, 60 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index 2fd77c36a1ff..ab8c5e28df7b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -361,19 +361,23 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>> * @man: TTM memory type manager
>> * @tbo: TTM BO we need this range for
>> * @place: placement flags and restrictions
>> - * @mem: the resulting mem object
>> + * @num_nodes: number of page nodes to use.
>> + * @pages_per_node: number of pages per node to use.
>> + * @res: the resulting mem object
>> *
>> * Allocate VRAM for the given BO.
>> */
>> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>> struct ttm_buffer_object *tbo,
>> const struct ttm_place *place,
>> + unsigned long num_nodes,
>> + unsigned long pages_per_node,
>> struct ttm_resource **res)
>> {
>> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>> struct amdgpu_device *adev = to_amdgpu_device(mgr);
>> uint64_t vis_usage = 0, mem_bytes, max_bytes;
>> + unsigned long lpfn, pages_left, pages;
>> struct ttm_range_mgr_node *node;
>> struct drm_mm *mm = &mgr->mm;
>> enum drm_mm_insert_mode mode;
>> @@ -395,21 +399,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>> goto error_sub;
>> }
>>
>> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> - pages_per_node = ~0ul;
>> - num_nodes = 1;
>> - } else {
>> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> - pages_per_node = HPAGE_PMD_NR;
>> -#else
>> - /* default to 2MB */
>> - pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>> -#endif
>> - pages_per_node = max_t(uint32_t, pages_per_node,
>> - tbo->page_alignment);
>> - num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>> - }
>> -
>> node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>> GFP_KERNEL | __GFP_ZERO);
>> if (!node) {
>> @@ -431,10 +420,15 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>> i = 0;
>> spin_lock(&mgr->lock);
>> while (pages_left) {
>> - uint32_t alignment = tbo->page_alignment;
>> + unsigned long alignment = tbo->page_alignment;
>> +
>> + if (i >= num_nodes) {
>> + r = -E2BIG;
>> + goto error_free;
>> + }
>>
>> if (pages >= pages_per_node)
>> - alignment = pages_per_node;
>> + alignment = max(alignment, pages_per_node);
> I don't understand this change. Is this an unrelated fix? pages_per_node
> is already bumped up to tbo->page_alignment in amdgpu_vram_mgr_alloc. So
> this "max" operation here seems redundant.
>
> Other than that, the patch is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>
> @JinHuiEric, can you confirm the performance improvement?
>
> Thanks,
> Felix
>
>
>>
>> r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>> alignment, 0, place->fpfn,
>> @@ -483,6 +477,52 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>> return r;
>> }
>>
>> +/**
>> + * amdgpu_vram_mgr_alloc - allocate new range
>> + *
>> + * @man: TTM memory type manager
>> + * @tbo: TTM BO we need this range for
>> + * @place: placement flags and restrictions
>> + * @res: the resulting mem object
>> + *
>> + * Allocate VRAM for the given BO.
>> + */
>> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
>> + struct ttm_buffer_object *tbo,
>> + const struct ttm_place *place,
>> + struct ttm_resource **res)
>> +{
>> + unsigned long num_nodes, pages_per_node;
>> + struct ttm_range_mgr_node *node;
>> + int r;
>> +
>> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, res);
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> + pages_per_node = HPAGE_PMD_NR;
>> +#else
>> + /* default to 2MB */
>> + pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>> +#endif
>> + pages_per_node = max_t(uint32_t, pages_per_node, tbo->page_alignment);
>> + num_nodes = DIV_ROUND_UP_ULL(PFN_UP(tbo->base.size), pages_per_node);
>> +
>> + if (struct_size(node, mm_nodes, num_nodes) > PAGE_SIZE) {
>> + size_t size = PAGE_SIZE;
>> +
>> + size -= sizeof(struct ttm_range_mgr_node);
>> + size /= sizeof(struct drm_mm_node);
>> + r = amdgpu_vram_mgr_new(man, tbo, place, size, pages_per_node,
>> + res);
>> + if (r != -E2BIG)
>> + return r;
>> + }
>> +
>> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
>> + res);
>> +}
>> +
>> /**
>> * amdgpu_vram_mgr_del - free ranges
>> *
>> @@ -680,7 +720,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
>> }
>>
>> static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
>> - .alloc = amdgpu_vram_mgr_new,
>> + .alloc = amdgpu_vram_mgr_alloc,
>> .free = amdgpu_vram_mgr_del,
>> .debug = amdgpu_vram_mgr_debug
>> };
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead
2021-07-13 16:11 ` Felix Kuehling
2021-07-13 17:23 ` Eric Huang
@ 2021-07-14 12:08 ` Christian König
1 sibling, 0 replies; 11+ messages in thread
From: Christian König @ 2021-07-14 12:08 UTC (permalink / raw)
To: Felix Kuehling, jinhuieric.huang
Cc: amd-gfx, Luugi.Marsan, Jenny-Jing.Liu, Chris.Mason
Am 13.07.21 um 18:11 schrieb Felix Kuehling:
> Am 2021-07-13 um 9:32 a.m. schrieb Christian König:
>> For allocations larger than 48MiB we need more than a page for the
>> housekeeping in the worst case resulting in the usual vmalloc overhead.
>>
>> Try to avoid this by assuming the good case and only falling back to the
>> worst case if this didn't worked.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 80 +++++++++++++++-----
>> 1 file changed, 60 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index 2fd77c36a1ff..ab8c5e28df7b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -361,19 +361,23 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>> * @man: TTM memory type manager
>> * @tbo: TTM BO we need this range for
>> * @place: placement flags and restrictions
>> - * @mem: the resulting mem object
>> + * @num_nodes: number of page nodes to use.
>> + * @pages_per_node: number of pages per node to use.
>> + * @res: the resulting mem object
>> *
>> * Allocate VRAM for the given BO.
>> */
>> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>> struct ttm_buffer_object *tbo,
>> const struct ttm_place *place,
>> + unsigned long num_nodes,
>> + unsigned long pages_per_node,
>> struct ttm_resource **res)
>> {
>> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>> struct amdgpu_device *adev = to_amdgpu_device(mgr);
>> uint64_t vis_usage = 0, mem_bytes, max_bytes;
>> + unsigned long lpfn, pages_left, pages;
>> struct ttm_range_mgr_node *node;
>> struct drm_mm *mm = &mgr->mm;
>> enum drm_mm_insert_mode mode;
>> @@ -395,21 +399,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>> goto error_sub;
>> }
>>
>> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> - pages_per_node = ~0ul;
>> - num_nodes = 1;
>> - } else {
>> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> - pages_per_node = HPAGE_PMD_NR;
>> -#else
>> - /* default to 2MB */
>> - pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>> -#endif
>> - pages_per_node = max_t(uint32_t, pages_per_node,
>> - tbo->page_alignment);
>> - num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
>> - }
>> -
>> node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
>> GFP_KERNEL | __GFP_ZERO);
>> if (!node) {
>> @@ -431,10 +420,15 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>> i = 0;
>> spin_lock(&mgr->lock);
>> while (pages_left) {
>> - uint32_t alignment = tbo->page_alignment;
>> + unsigned long alignment = tbo->page_alignment;
>> +
>> + if (i >= num_nodes) {
>> + r = -E2BIG;
>> + goto error_free;
>> + }
>>
>> if (pages >= pages_per_node)
>> - alignment = pages_per_node;
>> + alignment = max(alignment, pages_per_node);
> I don't understand this change. Is this an unrelated fix? pages_per_node
> is already bumped up to tbo->page_alignment in amdgpu_vram_mgr_alloc. So
> this "max" operation here seems redundant.
Oh, yes good point. I've totally missed that and was wondering why this
here isn't a problem.
>
> Other than that, the patch is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Thanks,
Christian.
>
> @JinHuiEric, can you confirm the performance improvement?
>
> Thanks,
> Felix
>
>
>>
>> r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
>> alignment, 0, place->fpfn,
>> @@ -483,6 +477,52 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>> return r;
>> }
>>
>> +/**
>> + * amdgpu_vram_mgr_alloc - allocate new range
>> + *
>> + * @man: TTM memory type manager
>> + * @tbo: TTM BO we need this range for
>> + * @place: placement flags and restrictions
>> + * @res: the resulting mem object
>> + *
>> + * Allocate VRAM for the given BO.
>> + */
>> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
>> + struct ttm_buffer_object *tbo,
>> + const struct ttm_place *place,
>> + struct ttm_resource **res)
>> +{
>> + unsigned long num_nodes, pages_per_node;
>> + struct ttm_range_mgr_node *node;
>> + int r;
>> +
>> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, res);
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> + pages_per_node = HPAGE_PMD_NR;
>> +#else
>> + /* default to 2MB */
>> + pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>> +#endif
>> + pages_per_node = max_t(uint32_t, pages_per_node, tbo->page_alignment);
>> + num_nodes = DIV_ROUND_UP_ULL(PFN_UP(tbo->base.size), pages_per_node);
>> +
>> + if (struct_size(node, mm_nodes, num_nodes) > PAGE_SIZE) {
>> + size_t size = PAGE_SIZE;
>> +
>> + size -= sizeof(struct ttm_range_mgr_node);
>> + size /= sizeof(struct drm_mm_node);
>> + r = amdgpu_vram_mgr_new(man, tbo, place, size, pages_per_node,
>> + res);
>> + if (r != -E2BIG)
>> + return r;
>> + }
>> +
>> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
>> + res);
>> +}
>> +
>> /**
>> * amdgpu_vram_mgr_del - free ranges
>> *
>> @@ -680,7 +720,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
>> }
>>
>> static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
>> - .alloc = amdgpu_vram_mgr_new,
>> + .alloc = amdgpu_vram_mgr_alloc,
>> .free = amdgpu_vram_mgr_del,
>> .debug = amdgpu_vram_mgr_debug
>> };
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead
2021-07-15 13:50 ` Eric Huang
@ 2021-07-15 14:38 ` Felix Kuehling
0 siblings, 0 replies; 11+ messages in thread
From: Felix Kuehling @ 2021-07-15 14:38 UTC (permalink / raw)
To: Eric Huang, Christian König, amd-gfx
Cc: Christian König, Liu, Jenny (Jing), Marsan, Luugi
Looking at the ticket, it's getting a segfault in user mode. The only
possible mechanism I can see where this change causes that segfault, is
a badly handled memory allocation failure. But I don't have an
explanation for how this patch introduces a memory allocation failure
that wasn't there before (other than maybe some very obscure internal
detail of the drm_mm node manager).
I see one potential problem here on the error handling path that we're
more likely to go to now. But I think it should only affect 32-bit kernels:
> error: while (i--) drm_mm_remove_node(&nodes[i]);
> spin_unlock(&mgr->lock); atomic64_sub(mem->num_pages << PAGE_SHIFT,
> &mgr->usage);
We need a (u64) cast here to avoid the left-shift overflowing.
mem->num_pages is unsigned long, so that's already 64 bits on a 64-bit
kernel.
> kvfree(nodes); return r; }
Regards,
Felix
Am 2021-07-15 um 9:50 a.m. schrieb Eric Huang:
> Hi Christian,
>
> I have pushed it into amd-staging-dkms-5.11, but it causes a
> regression with test TransferBench on MI200. Jira is here:
> https://ontrack-internal.amd.com/browse/SWDEV-295245
>
> Can you please take a look? Thanks!
>
> Regards,
> Eric
>
> On 2021-07-14 4:33 a.m., Christian König wrote:
>> Hi Eric,
>>
>> feel free to push into amd-staging-dkms-5.11, but please don't push
>> it into amd-staging-drm-next.
>>
>> The later will just cause a merge failure which Alex needs to resolve
>> manually.
>>
>> I can take care of pushing to amd-staging-drm-next as soon as that is
>> rebased on latest upstream.
>>
>> Regards,
>> Christian.
>>
>> Am 13.07.21 um 21:19 schrieb Eric Huang:
>>> Hi Christian/Felix,
>>>
>>> If you don't have objection, it will be pushed into
>>> amd-staging-dkms-5.11 and amd-staging-drm-next.
>>>
>>> Thanks,
>>> Eric
>>>
>>> On 2021-07-13 3:17 p.m., Eric Huang wrote:
>>>> For allocations larger than 48MiB we need more than a page for the
>>>> housekeeping in the worst case resulting in the usual vmalloc
>>>> overhead.
>>>>
>>>> Try to avoid this by assuming the good case and only falling back
>>>> to the
>>>> worst case if this didn't worked.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71
>>>> +++++++++++++++-----
>>>> 1 file changed, 53 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> index be4261c4512e..ecbe05e1db66 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> @@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct
>>>> ttm_resource *mem,
>>>> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>> struct ttm_buffer_object *tbo,
>>>> const struct ttm_place *place,
>>>> + unsigned long num_nodes,
>>>> + unsigned long pages_per_node,
>>>> struct ttm_resource *mem)
>>>> {
>>>> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>>> + unsigned long lpfn, pages_left, pages;
>>>> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>> struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>> uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>>> @@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct
>>>> ttm_resource_manager *man,
>>>> return -ENOSPC;
>>>> }
>>>> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> - pages_per_node = ~0ul;
>>>> - num_nodes = 1;
>>>> - } else {
>>>> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>> - pages_per_node = HPAGE_PMD_NR;
>>>> -#else
>>>> - /* default to 2MB */
>>>> - pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>>>> -#endif
>>>> - pages_per_node = max_t(uint32_t, pages_per_node,
>>>> - mem->page_alignment);
>>>> - num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>>>> - }
>>>> -
>>>> nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
>>>> GFP_KERNEL | __GFP_ZERO);
>>>> if (!nodes) {
>>>> @@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct
>>>> ttm_resource_manager *man,
>>>> i = 0;
>>>> spin_lock(&mgr->lock);
>>>> while (pages_left) {
>>>> - uint32_t alignment = mem->page_alignment;
>>>> + unsigned long alignment = mem->page_alignment;
>>>> +
>>>> + if (i >= num_nodes) {
>>>> + r = -E2BIG;
>>>> + goto error;
>>>> + }
>>>> if (pages >= pages_per_node)
>>>> alignment = pages_per_node;
>>>> @@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct
>>>> ttm_resource_manager *man,
>>>> return r;
>>>> }
>>>> +/**
>>>> + * amdgpu_vram_mgr_alloc - allocate new range
>>>> + *
>>>> + * @man: TTM memory type manager
>>>> + * @tbo: TTM BO we need this range for
>>>> + * @place: placement flags and restrictions
>>>> + * @mem: the resulting mem object
>>>> + *
>>>> + * Allocate VRAM for the given BO.
>>>> + */
>>>> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
>>>> + struct ttm_buffer_object *tbo,
>>>> + const struct ttm_place *place,
>>>> + struct ttm_resource *mem)
>>>> +{
>>>> + unsigned long num_nodes, pages_per_node;
>>>> + int r;
>>>> +
>>>> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
>>>> +
>>>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>> + pages_per_node = HPAGE_PMD_NR;
>>>> +#else
>>>> + /* default to 2MB */
>>>> + pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>>>> +#endif
>>>> + pages_per_node = max_t(uint32_t, pages_per_node,
>>>> + mem->page_alignment);
>>>> + num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>>>> +
>>>> + if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
>>>> + r = amdgpu_vram_mgr_new(man, tbo, place,
>>>> + PAGE_SIZE / sizeof(struct drm_mm_node),
>>>> + pages_per_node, mem);
>>>> + if (r != -E2BIG)
>>>> + return r;
>>>> + }
>>>> +
>>>> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes,
>>>> pages_per_node,
>>>> + mem);
>>>> +}
>>>> +
>>>> /**
>>>> * amdgpu_vram_mgr_del - free ranges
>>>> *
>>>> @@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct
>>>> ttm_resource_manager *man,
>>>> }
>>>> static const struct ttm_resource_manager_func
>>>> amdgpu_vram_mgr_func = {
>>>> - .alloc = amdgpu_vram_mgr_new,
>>>> + .alloc = amdgpu_vram_mgr_alloc,
>>>> .free = amdgpu_vram_mgr_del,
>>>> .debug = amdgpu_vram_mgr_debug
>>>> };
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead
2021-07-14 8:33 ` Christian König
2021-07-14 9:41 ` Pan, Xinhui
@ 2021-07-15 13:50 ` Eric Huang
2021-07-15 14:38 ` Felix Kuehling
1 sibling, 1 reply; 11+ messages in thread
From: Eric Huang @ 2021-07-15 13:50 UTC (permalink / raw)
To: Christian König, amd-gfx
Cc: Felix Kuehling, Christian König, Liu, Jenny (Jing), Marsan, Luugi
Hi Christian,
I have pushed it into amd-staging-dkms-5.11, but it causes a regression
with test TransferBench on MI200. Jira is here:
https://ontrack-internal.amd.com/browse/SWDEV-295245
Can you please take a look? Thanks!
Regards,
Eric
On 2021-07-14 4:33 a.m., Christian König wrote:
> Hi Eric,
>
> feel free to push into amd-staging-dkms-5.11, but please don't push it
> into amd-staging-drm-next.
>
> The later will just cause a merge failure which Alex needs to resolve
> manually.
>
> I can take care of pushing to amd-staging-drm-next as soon as that is
> rebased on latest upstream.
>
> Regards,
> Christian.
>
> Am 13.07.21 um 21:19 schrieb Eric Huang:
>> Hi Christian/Felix,
>>
>> If you don't have objection, it will be pushed into
>> amd-staging-dkms-5.11 and amd-staging-drm-next.
>>
>> Thanks,
>> Eric
>>
>> On 2021-07-13 3:17 p.m., Eric Huang wrote:
>>> For allocations larger than 48MiB we need more than a page for the
>>> housekeeping in the worst case resulting in the usual vmalloc overhead.
>>>
>>> Try to avoid this by assuming the good case and only falling back to
>>> the
>>> worst case if this didn't worked.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71
>>> +++++++++++++++-----
>>> 1 file changed, 53 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> index be4261c4512e..ecbe05e1db66 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> @@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct
>>> ttm_resource *mem,
>>> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>> struct ttm_buffer_object *tbo,
>>> const struct ttm_place *place,
>>> + unsigned long num_nodes,
>>> + unsigned long pages_per_node,
>>> struct ttm_resource *mem)
>>> {
>>> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>> + unsigned long lpfn, pages_left, pages;
>>> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>> struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>> uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>> @@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_resource_manager *man,
>>> return -ENOSPC;
>>> }
>>> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> - pages_per_node = ~0ul;
>>> - num_nodes = 1;
>>> - } else {
>>> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>> - pages_per_node = HPAGE_PMD_NR;
>>> -#else
>>> - /* default to 2MB */
>>> - pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>>> -#endif
>>> - pages_per_node = max_t(uint32_t, pages_per_node,
>>> - mem->page_alignment);
>>> - num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>>> - }
>>> -
>>> nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
>>> GFP_KERNEL | __GFP_ZERO);
>>> if (!nodes) {
>>> @@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_resource_manager *man,
>>> i = 0;
>>> spin_lock(&mgr->lock);
>>> while (pages_left) {
>>> - uint32_t alignment = mem->page_alignment;
>>> + unsigned long alignment = mem->page_alignment;
>>> +
>>> + if (i >= num_nodes) {
>>> + r = -E2BIG;
>>> + goto error;
>>> + }
>>> if (pages >= pages_per_node)
>>> alignment = pages_per_node;
>>> @@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct
>>> ttm_resource_manager *man,
>>> return r;
>>> }
>>> +/**
>>> + * amdgpu_vram_mgr_alloc - allocate new range
>>> + *
>>> + * @man: TTM memory type manager
>>> + * @tbo: TTM BO we need this range for
>>> + * @place: placement flags and restrictions
>>> + * @mem: the resulting mem object
>>> + *
>>> + * Allocate VRAM for the given BO.
>>> + */
>>> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
>>> + struct ttm_buffer_object *tbo,
>>> + const struct ttm_place *place,
>>> + struct ttm_resource *mem)
>>> +{
>>> + unsigned long num_nodes, pages_per_node;
>>> + int r;
>>> +
>>> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
>>> +
>>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>> + pages_per_node = HPAGE_PMD_NR;
>>> +#else
>>> + /* default to 2MB */
>>> + pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>>> +#endif
>>> + pages_per_node = max_t(uint32_t, pages_per_node,
>>> + mem->page_alignment);
>>> + num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>>> +
>>> + if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
>>> + r = amdgpu_vram_mgr_new(man, tbo, place,
>>> + PAGE_SIZE / sizeof(struct drm_mm_node),
>>> + pages_per_node, mem);
>>> + if (r != -E2BIG)
>>> + return r;
>>> + }
>>> +
>>> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes,
>>> pages_per_node,
>>> + mem);
>>> +}
>>> +
>>> /**
>>> * amdgpu_vram_mgr_del - free ranges
>>> *
>>> @@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct
>>> ttm_resource_manager *man,
>>> }
>>> static const struct ttm_resource_manager_func
>>> amdgpu_vram_mgr_func = {
>>> - .alloc = amdgpu_vram_mgr_new,
>>> + .alloc = amdgpu_vram_mgr_alloc,
>>> .free = amdgpu_vram_mgr_del,
>>> .debug = amdgpu_vram_mgr_debug
>>> };
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Cjinhuieric.huang%40amd.com%7C1f368defa88042677f4008d946a217c8%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637618484270197021%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=wvuHNuK1gQBYbErvhBO%2FambKpNzjHL2A9ea22fvQMkY%3D&reserved=0
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead
2021-07-14 9:41 ` Pan, Xinhui
@ 2021-07-14 12:17 ` Christian König
0 siblings, 0 replies; 11+ messages in thread
From: Christian König @ 2021-07-14 12:17 UTC (permalink / raw)
To: Pan, Xinhui
Cc: Huang, JinHuiEric, Kuehling, Felix, Koenig, Christian, amd-gfx
Am 14.07.21 um 11:41 schrieb Pan, Xinhui:
>> 2021年7月14日 16:33,Christian König <ckoenig.leichtzumerken@gmail.com> 写道:
>>
>> Hi Eric,
>>
>> feel free to push into amd-staging-dkms-5.11, but please don't push it into amd-staging-drm-next.
>>
>> The later will just cause a merge failure which Alex needs to resolve manually.
>>
>> I can take care of pushing to amd-staging-drm-next as soon as that is rebased on latest upstream.
>>
>> Regards,
>> Christian.
>>
>> Am 13.07.21 um 21:19 schrieb Eric Huang:
>>> Hi Christian/Felix,
>>>
>>> If you don't have objection, it will be pushed into amd-staging-dkms-5.11 and amd-staging-drm-next.
>>>
>>> Thanks,
>>> Eric
>>>
>>> On 2021-07-13 3:17 p.m., Eric Huang wrote:
>>>> For allocations larger than 48MiB we need more than a page for the
>>>> housekeeping in the worst case resulting in the usual vmalloc overhead.
>>>>
>>>> Try to avoid this by assuming the good case and only falling back to the
>>>> worst case if this didn't worked.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 +++++++++++++++-----
>>>> 1 file changed, 53 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> index be4261c4512e..ecbe05e1db66 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>>> @@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>>> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>> struct ttm_buffer_object *tbo,
>>>> const struct ttm_place *place,
>>>> + unsigned long num_nodes,
>>>> + unsigned long pages_per_node,
>>>> struct ttm_resource *mem)
>>>> {
>>>> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>>> + unsigned long lpfn, pages_left, pages;
>>>> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>>> struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>>> uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>>> @@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>> return -ENOSPC;
>>>> }
>>>> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>>> - pages_per_node = ~0ul;
>>>> - num_nodes = 1;
>>>> - } else {
>>>> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>> - pages_per_node = HPAGE_PMD_NR;
>>>> -#else
>>>> - /* default to 2MB */
>>>> - pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>>>> -#endif
>>>> - pages_per_node = max_t(uint32_t, pages_per_node,
>>>> - mem->page_alignment);
>>>> - num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>>>> - }
>>>> -
>>>> nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
>>>> GFP_KERNEL | __GFP_ZERO);
>>>> if (!nodes) {
>>>> @@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>> i = 0;
>>>> spin_lock(&mgr->lock);
>>>> while (pages_left) {
>>>> - uint32_t alignment = mem->page_alignment;
>>>> + unsigned long alignment = mem->page_alignment;
>>>> +
>>>> + if (i >= num_nodes) {
>>>> + r = -E2BIG;
>>>> + goto error;
>>>> + }
>>>> if (pages >= pages_per_node)
>>>> alignment = pages_per_node;
>>>> @@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>>> return r;
>>>> }
>>>> +/**
>>>> + * amdgpu_vram_mgr_alloc - allocate new range
>>>> + *
>>>> + * @man: TTM memory type manager
>>>> + * @tbo: TTM BO we need this range for
>>>> + * @place: placement flags and restrictions
>>>> + * @mem: the resulting mem object
>>>> + *
>>>> + * Allocate VRAM for the given BO.
>>>> + */
>>>> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
>>>> + struct ttm_buffer_object *tbo,
>>>> + const struct ttm_place *place,
>>>> + struct ttm_resource *mem)
>>>> +{
>>>> + unsigned long num_nodes, pages_per_node;
>>>> + int r;
>>>> +
>>>> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>>> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
>>>> +
>>>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>>> + pages_per_node = HPAGE_PMD_NR;
>>>> +#else
>>>> + /* default to 2MB */
>>>> + pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>>>> +#endif
>>>> + pages_per_node = max_t(uint32_t, pages_per_node,
>>>> + mem->page_alignment);
>>>> + num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>>>> +
>>>> + if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
> I think this should be < PAGE_SIZE? Otherwise amdgpu_vram_mgr_new always return -E2BIG. Or I am missing something?
Sounds like you are missing something. The idea here is that we don't
always allocate for the worst case, but try with just one page first.
> But you want one page to save all drm mm nodes in the good case. What if user just create a bunch of small VRAM BO, say, 1 thound of 4KB VRAM BOs.
> the system memory usage would change from 24KB to 4MB. I have no strong objection to it as time is more expensive in reality.
The system memory usage will stay the same because we only apply this if
sizeof(struct drm_mm_node) * num_nodes is larger than a page size.
Regards,
Christian.
>
>
>
>>>> + r = amdgpu_vram_mgr_new(man, tbo, place,
>>>> + PAGE_SIZE / sizeof(struct drm_mm_node),
>>>> + pages_per_node, mem);
>>>> + if (r != -E2BIG)
>>>> + return r;
>>>> + }
>>>> +
>>>> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
>>>> + mem);
>>>> +}
>>>> +
>>>> /**
>>>> * amdgpu_vram_mgr_del - free ranges
>>>> *
>>>> @@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
>>>> }
>>>> static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
>>>> - .alloc = amdgpu_vram_mgr_new,
>>>> + .alloc = amdgpu_vram_mgr_alloc,
>>>> .free = amdgpu_vram_mgr_del,
>>>> .debug = amdgpu_vram_mgr_debug
>>>> };
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Cxinhui.pan%40amd.com%7C66465cc2d6b9468a2d8208d946a218ba%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637618484289336937%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=U1xfDEKdKmDr%2FeFF%2BXoLaZVcuTzcoFnoZOD%2Fwo4redo%3D&reserved=0
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Cxinhui.pan%40amd.com%7C66465cc2d6b9468a2d8208d946a218ba%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637618484289336937%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=U1xfDEKdKmDr%2FeFF%2BXoLaZVcuTzcoFnoZOD%2Fwo4redo%3D&reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead
2021-07-14 8:33 ` Christian König
@ 2021-07-14 9:41 ` Pan, Xinhui
2021-07-14 12:17 ` Christian König
2021-07-15 13:50 ` Eric Huang
1 sibling, 1 reply; 11+ messages in thread
From: Pan, Xinhui @ 2021-07-14 9:41 UTC (permalink / raw)
To: Christian König
Cc: Huang, JinHuiEric, Kuehling, Felix, Pan, Xinhui, Koenig,
Christian, amd-gfx
> 2021年7月14日 16:33,Christian König <ckoenig.leichtzumerken@gmail.com> 写道:
>
> Hi Eric,
>
> feel free to push into amd-staging-dkms-5.11, but please don't push it into amd-staging-drm-next.
>
> The later will just cause a merge failure which Alex needs to resolve manually.
>
> I can take care of pushing to amd-staging-drm-next as soon as that is rebased on latest upstream.
>
> Regards,
> Christian.
>
> Am 13.07.21 um 21:19 schrieb Eric Huang:
>> Hi Christian/Felix,
>>
>> If you don't have objection, it will be pushed into amd-staging-dkms-5.11 and amd-staging-drm-next.
>>
>> Thanks,
>> Eric
>>
>> On 2021-07-13 3:17 p.m., Eric Huang wrote:
>>> For allocations larger than 48MiB we need more than a page for the
>>> housekeeping in the worst case resulting in the usual vmalloc overhead.
>>>
>>> Try to avoid this by assuming the good case and only falling back to the
>>> worst case if this didn't worked.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 +++++++++++++++-----
>>> 1 file changed, 53 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> index be4261c4512e..ecbe05e1db66 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>>> @@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
>>> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>> struct ttm_buffer_object *tbo,
>>> const struct ttm_place *place,
>>> + unsigned long num_nodes,
>>> + unsigned long pages_per_node,
>>> struct ttm_resource *mem)
>>> {
>>> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>>> + unsigned long lpfn, pages_left, pages;
>>> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>>> struct amdgpu_device *adev = to_amdgpu_device(mgr);
>>> uint64_t vis_usage = 0, mem_bytes, max_bytes;
>>> @@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>> return -ENOSPC;
>>> }
>>> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>>> - pages_per_node = ~0ul;
>>> - num_nodes = 1;
>>> - } else {
>>> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>> - pages_per_node = HPAGE_PMD_NR;
>>> -#else
>>> - /* default to 2MB */
>>> - pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>>> -#endif
>>> - pages_per_node = max_t(uint32_t, pages_per_node,
>>> - mem->page_alignment);
>>> - num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>>> - }
>>> -
>>> nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
>>> GFP_KERNEL | __GFP_ZERO);
>>> if (!nodes) {
>>> @@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>> i = 0;
>>> spin_lock(&mgr->lock);
>>> while (pages_left) {
>>> - uint32_t alignment = mem->page_alignment;
>>> + unsigned long alignment = mem->page_alignment;
>>> +
>>> + if (i >= num_nodes) {
>>> + r = -E2BIG;
>>> + goto error;
>>> + }
>>> if (pages >= pages_per_node)
>>> alignment = pages_per_node;
>>> @@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>>> return r;
>>> }
>>> +/**
>>> + * amdgpu_vram_mgr_alloc - allocate new range
>>> + *
>>> + * @man: TTM memory type manager
>>> + * @tbo: TTM BO we need this range for
>>> + * @place: placement flags and restrictions
>>> + * @mem: the resulting mem object
>>> + *
>>> + * Allocate VRAM for the given BO.
>>> + */
>>> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
>>> + struct ttm_buffer_object *tbo,
>>> + const struct ttm_place *place,
>>> + struct ttm_resource *mem)
>>> +{
>>> + unsigned long num_nodes, pages_per_node;
>>> + int r;
>>> +
>>> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>>> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
>>> +
>>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>> + pages_per_node = HPAGE_PMD_NR;
>>> +#else
>>> + /* default to 2MB */
>>> + pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>>> +#endif
>>> + pages_per_node = max_t(uint32_t, pages_per_node,
>>> + mem->page_alignment);
>>> + num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>>> +
>>> + if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
I think this should be < PAGE_SIZE? Otherwise amdgpu_vram_mgr_new always return -E2BIG. Or I am missing something?
But you want one page to save all drm mm nodes in the good case. What if user just create a bunch of small VRAM BO, say, 1 thound of 4KB VRAM BOs.
the system memory usage would change from 24KB to 4MB. I have no strong objection to it as time is more expensive in reality.
>>> + r = amdgpu_vram_mgr_new(man, tbo, place,
>>> + PAGE_SIZE / sizeof(struct drm_mm_node),
>>> + pages_per_node, mem);
>>> + if (r != -E2BIG)
>>> + return r;
>>> + }
>>> +
>>> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
>>> + mem);
>>> +}
>>> +
>>> /**
>>> * amdgpu_vram_mgr_del - free ranges
>>> *
>>> @@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
>>> }
>>> static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
>>> - .alloc = amdgpu_vram_mgr_new,
>>> + .alloc = amdgpu_vram_mgr_alloc,
>>> .free = amdgpu_vram_mgr_del,
>>> .debug = amdgpu_vram_mgr_debug
>>> };
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Cxinhui.pan%40amd.com%7C66465cc2d6b9468a2d8208d946a218ba%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637618484289336937%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=U1xfDEKdKmDr%2FeFF%2BXoLaZVcuTzcoFnoZOD%2Fwo4redo%3D&reserved=0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Cxinhui.pan%40amd.com%7C66465cc2d6b9468a2d8208d946a218ba%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637618484289336937%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=U1xfDEKdKmDr%2FeFF%2BXoLaZVcuTzcoFnoZOD%2Fwo4redo%3D&reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead
2021-07-13 19:19 ` Eric Huang
@ 2021-07-14 8:33 ` Christian König
2021-07-14 9:41 ` Pan, Xinhui
2021-07-15 13:50 ` Eric Huang
0 siblings, 2 replies; 11+ messages in thread
From: Christian König @ 2021-07-14 8:33 UTC (permalink / raw)
To: Eric Huang, amd-gfx; +Cc: Felix Kuehling, Christian König
Hi Eric,
feel free to push into amd-staging-dkms-5.11, but please don't push it
into amd-staging-drm-next.
The later will just cause a merge failure which Alex needs to resolve
manually.
I can take care of pushing to amd-staging-drm-next as soon as that is
rebased on latest upstream.
Regards,
Christian.
Am 13.07.21 um 21:19 schrieb Eric Huang:
> Hi Christian/Felix,
>
> If you don't have objection, it will be pushed into
> amd-staging-dkms-5.11 and amd-staging-drm-next.
>
> Thanks,
> Eric
>
> On 2021-07-13 3:17 p.m., Eric Huang wrote:
>> For allocations larger than 48MiB we need more than a page for the
>> housekeeping in the worst case resulting in the usual vmalloc overhead.
>>
>> Try to avoid this by assuming the good case and only falling back to the
>> worst case if this didn't worked.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 +++++++++++++++-----
>> 1 file changed, 53 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> index be4261c4512e..ecbe05e1db66 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
>> @@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct
>> ttm_resource *mem,
>> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
>> struct ttm_buffer_object *tbo,
>> const struct ttm_place *place,
>> + unsigned long num_nodes,
>> + unsigned long pages_per_node,
>> struct ttm_resource *mem)
>> {
>> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
>> + unsigned long lpfn, pages_left, pages;
>> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
>> struct amdgpu_device *adev = to_amdgpu_device(mgr);
>> uint64_t vis_usage = 0, mem_bytes, max_bytes;
>> @@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct
>> ttm_resource_manager *man,
>> return -ENOSPC;
>> }
>> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> - pages_per_node = ~0ul;
>> - num_nodes = 1;
>> - } else {
>> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> - pages_per_node = HPAGE_PMD_NR;
>> -#else
>> - /* default to 2MB */
>> - pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>> -#endif
>> - pages_per_node = max_t(uint32_t, pages_per_node,
>> - mem->page_alignment);
>> - num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>> - }
>> -
>> nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
>> GFP_KERNEL | __GFP_ZERO);
>> if (!nodes) {
>> @@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct
>> ttm_resource_manager *man,
>> i = 0;
>> spin_lock(&mgr->lock);
>> while (pages_left) {
>> - uint32_t alignment = mem->page_alignment;
>> + unsigned long alignment = mem->page_alignment;
>> +
>> + if (i >= num_nodes) {
>> + r = -E2BIG;
>> + goto error;
>> + }
>> if (pages >= pages_per_node)
>> alignment = pages_per_node;
>> @@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct
>> ttm_resource_manager *man,
>> return r;
>> }
>> +/**
>> + * amdgpu_vram_mgr_alloc - allocate new range
>> + *
>> + * @man: TTM memory type manager
>> + * @tbo: TTM BO we need this range for
>> + * @place: placement flags and restrictions
>> + * @mem: the resulting mem object
>> + *
>> + * Allocate VRAM for the given BO.
>> + */
>> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
>> + struct ttm_buffer_object *tbo,
>> + const struct ttm_place *place,
>> + struct ttm_resource *mem)
>> +{
>> + unsigned long num_nodes, pages_per_node;
>> + int r;
>> +
>> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
>> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> + pages_per_node = HPAGE_PMD_NR;
>> +#else
>> + /* default to 2MB */
>> + pages_per_node = 2UL << (20UL - PAGE_SHIFT);
>> +#endif
>> + pages_per_node = max_t(uint32_t, pages_per_node,
>> + mem->page_alignment);
>> + num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
>> +
>> + if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
>> + r = amdgpu_vram_mgr_new(man, tbo, place,
>> + PAGE_SIZE / sizeof(struct drm_mm_node),
>> + pages_per_node, mem);
>> + if (r != -E2BIG)
>> + return r;
>> + }
>> +
>> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes,
>> pages_per_node,
>> + mem);
>> +}
>> +
>> /**
>> * amdgpu_vram_mgr_del - free ranges
>> *
>> @@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct
>> ttm_resource_manager *man,
>> }
>> static const struct ttm_resource_manager_func
>> amdgpu_vram_mgr_func = {
>> - .alloc = amdgpu_vram_mgr_new,
>> + .alloc = amdgpu_vram_mgr_alloc,
>> .free = amdgpu_vram_mgr_del,
>> .debug = amdgpu_vram_mgr_debug
>> };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead
2021-07-13 19:17 Eric Huang
@ 2021-07-13 19:19 ` Eric Huang
2021-07-14 8:33 ` Christian König
0 siblings, 1 reply; 11+ messages in thread
From: Eric Huang @ 2021-07-13 19:19 UTC (permalink / raw)
To: amd-gfx; +Cc: Felix Kuehling, Christian König
Hi Christian/Felix,
If you don't have objection, it will be pushed into
amd-staging-dkms-5.11 and amd-staging-drm-next.
Thanks,
Eric
On 2021-07-13 3:17 p.m., Eric Huang wrote:
> For allocations larger than 48MiB we need more than a page for the
> housekeeping in the worst case resulting in the usual vmalloc overhead.
>
> Try to avoid this by assuming the good case and only falling back to the
> worst case if this didn't worked.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 +++++++++++++++-----
> 1 file changed, 53 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> index be4261c4512e..ecbe05e1db66 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
> @@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
> static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> struct ttm_buffer_object *tbo,
> const struct ttm_place *place,
> + unsigned long num_nodes,
> + unsigned long pages_per_node,
> struct ttm_resource *mem)
> {
> - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
> + unsigned long lpfn, pages_left, pages;
> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
> struct amdgpu_device *adev = to_amdgpu_device(mgr);
> uint64_t vis_usage = 0, mem_bytes, max_bytes;
> @@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> return -ENOSPC;
> }
>
> - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> - pages_per_node = ~0ul;
> - num_nodes = 1;
> - } else {
> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> - pages_per_node = HPAGE_PMD_NR;
> -#else
> - /* default to 2MB */
> - pages_per_node = 2UL << (20UL - PAGE_SHIFT);
> -#endif
> - pages_per_node = max_t(uint32_t, pages_per_node,
> - mem->page_alignment);
> - num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
> - }
> -
> nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
> GFP_KERNEL | __GFP_ZERO);
> if (!nodes) {
> @@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> i = 0;
> spin_lock(&mgr->lock);
> while (pages_left) {
> - uint32_t alignment = mem->page_alignment;
> + unsigned long alignment = mem->page_alignment;
> +
> + if (i >= num_nodes) {
> + r = -E2BIG;
> + goto error;
> + }
>
> if (pages >= pages_per_node)
> alignment = pages_per_node;
> @@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> return r;
> }
>
> +/**
> + * amdgpu_vram_mgr_alloc - allocate new range
> + *
> + * @man: TTM memory type manager
> + * @tbo: TTM BO we need this range for
> + * @place: placement flags and restrictions
> + * @mem: the resulting mem object
> + *
> + * Allocate VRAM for the given BO.
> + */
> +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
> + struct ttm_buffer_object *tbo,
> + const struct ttm_place *place,
> + struct ttm_resource *mem)
> +{
> + unsigned long num_nodes, pages_per_node;
> + int r;
> +
> + if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
> + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> + pages_per_node = HPAGE_PMD_NR;
> +#else
> + /* default to 2MB */
> + pages_per_node = 2UL << (20UL - PAGE_SHIFT);
> +#endif
> + pages_per_node = max_t(uint32_t, pages_per_node,
> + mem->page_alignment);
> + num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
> +
> + if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
> + r = amdgpu_vram_mgr_new(man, tbo, place,
> + PAGE_SIZE / sizeof(struct drm_mm_node),
> + pages_per_node, mem);
> + if (r != -E2BIG)
> + return r;
> + }
> +
> + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
> + mem);
> +}
> +
> /**
> * amdgpu_vram_mgr_del - free ranges
> *
> @@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
> }
>
> static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
> - .alloc = amdgpu_vram_mgr_new,
> + .alloc = amdgpu_vram_mgr_alloc,
> .free = amdgpu_vram_mgr_del,
> .debug = amdgpu_vram_mgr_debug
> };
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] drm/amdgpu: further lower VRAM allocation overhead
@ 2021-07-13 19:17 Eric Huang
2021-07-13 19:19 ` Eric Huang
0 siblings, 1 reply; 11+ messages in thread
From: Eric Huang @ 2021-07-13 19:17 UTC (permalink / raw)
To: amd-gfx; +Cc: Eric Huang, Felix Kuehling, Christian König
For allocations larger than 48MiB we need more than a page for the
housekeeping in the worst case resulting in the usual vmalloc overhead.
Try to avoid this by assuming the good case and only falling back to the
worst case if this didn't worked.
Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 +++++++++++++++-----
1 file changed, 53 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index be4261c4512e..ecbe05e1db66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
struct ttm_buffer_object *tbo,
const struct ttm_place *place,
+ unsigned long num_nodes,
+ unsigned long pages_per_node,
struct ttm_resource *mem)
{
- unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
+ unsigned long lpfn, pages_left, pages;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
uint64_t vis_usage = 0, mem_bytes, max_bytes;
@@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
return -ENOSPC;
}
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- pages_per_node = ~0ul;
- num_nodes = 1;
- } else {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- pages_per_node = HPAGE_PMD_NR;
-#else
- /* default to 2MB */
- pages_per_node = 2UL << (20UL - PAGE_SHIFT);
-#endif
- pages_per_node = max_t(uint32_t, pages_per_node,
- mem->page_alignment);
- num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
- }
-
nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
GFP_KERNEL | __GFP_ZERO);
if (!nodes) {
@@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
i = 0;
spin_lock(&mgr->lock);
while (pages_left) {
- uint32_t alignment = mem->page_alignment;
+ unsigned long alignment = mem->page_alignment;
+
+ if (i >= num_nodes) {
+ r = -E2BIG;
+ goto error;
+ }
if (pages >= pages_per_node)
alignment = pages_per_node;
@@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
return r;
}
+/**
+ * amdgpu_vram_mgr_alloc - allocate new range
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @mem: the resulting mem object
+ *
+ * Allocate VRAM for the given BO.
+ */
+static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource *mem)
+{
+ unsigned long num_nodes, pages_per_node;
+ int r;
+
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+ return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ pages_per_node = HPAGE_PMD_NR;
+#else
+ /* default to 2MB */
+ pages_per_node = 2UL << (20UL - PAGE_SHIFT);
+#endif
+ pages_per_node = max_t(uint32_t, pages_per_node,
+ mem->page_alignment);
+ num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
+
+ if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
+ r = amdgpu_vram_mgr_new(man, tbo, place,
+ PAGE_SIZE / sizeof(struct drm_mm_node),
+ pages_per_node, mem);
+ if (r != -E2BIG)
+ return r;
+ }
+
+ return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
+ mem);
+}
+
/**
* amdgpu_vram_mgr_del - free ranges
*
@@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
}
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
- .alloc = amdgpu_vram_mgr_new,
+ .alloc = amdgpu_vram_mgr_alloc,
.free = amdgpu_vram_mgr_del,
.debug = amdgpu_vram_mgr_debug
};
--
2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 11+ messages in thread
end of thread, other threads:[~2021-07-15 14:38 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-13 13:32 [PATCH] drm/amdgpu: further lower VRAM allocation overhead Christian König
2021-07-13 16:11 ` Felix Kuehling
2021-07-13 17:23 ` Eric Huang
2021-07-14 12:08 ` Christian König
2021-07-13 19:17 Eric Huang
2021-07-13 19:19 ` Eric Huang
2021-07-14 8:33 ` Christian König
2021-07-14 9:41 ` Pan, Xinhui
2021-07-14 12:17 ` Christian König
2021-07-15 13:50 ` Eric Huang
2021-07-15 14:38 ` Felix Kuehling
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.