* [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU
@ 2016-04-14 12:54 Christian König
2016-04-14 12:54 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
2016-04-14 14:23 ` [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Alex Deucher
0 siblings, 2 replies; 8+ messages in thread
From: Christian König @ 2016-04-14 12:54 UTC (permalink / raw)
To: alexdeucher; +Cc: dri-devel
From: Christian König <christian.koenig@amd.com>
This allows us to have small BOs on the LRU before big ones.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 ++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++--
2 files changed, 68 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c4a21c6..7b90323 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
/*
* TTM.
*/
+
+#define AMDGPU_TTM_LRU_SIZE 20
+
+struct amdgpu_mman_lru {
+ struct list_head *lru[TTM_NUM_MEM_TYPES];
+ struct list_head *swap_lru;
+};
+
struct amdgpu_mman {
struct ttm_bo_global_ref bo_global_ref;
struct drm_global_reference mem_global_ref;
@@ -408,6 +416,9 @@ struct amdgpu_mman {
struct amdgpu_ring *buffer_funcs_ring;
/* Scheduler entity for buffer moves */
struct amd_sched_entity entity;
+
+ /* custom LRU management */
+ struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE];
};
int amdgpu_copy_buffer(struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fefaa9b..b58a445 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -910,6 +910,50 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
return flags;
}
+static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
+{
+ struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
+ unsigned i;
+
+ for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
+ struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
+
+ if (&tbo->lru == lru->lru[tbo->mem.mem_type])
+ lru->lru[tbo->mem.mem_type] = tbo->lru.prev;
+
+ if (&tbo->swap == lru->swap_lru)
+ lru->swap_lru = tbo->swap.prev;
+ }
+}
+
+static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo)
+{
+ struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
+ unsigned log2_size = min(ilog2(tbo->num_pages), AMDGPU_TTM_LRU_SIZE);
+
+ return &adev->mman.log2_size[log2_size];
+}
+
+static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
+{
+ struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
+ struct list_head *res = lru->lru[tbo->mem.mem_type];
+
+ lru->lru[tbo->mem.mem_type] = &tbo->lru;
+
+ return res;
+}
+
+static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
+{
+ struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
+ struct list_head *res = lru->swap_lru;
+
+ lru->swap_lru = &tbo->swap;
+
+ return res;
+}
+
static struct ttm_bo_driver amdgpu_bo_driver = {
.ttm_tt_create = &amdgpu_ttm_tt_create,
.ttm_tt_populate = &amdgpu_ttm_tt_populate,
@@ -923,12 +967,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
.io_mem_free = &amdgpu_ttm_io_mem_free,
- .lru_tail = &ttm_bo_default_lru_tail,
- .swap_lru_tail = &ttm_bo_default_swap_lru_tail,
+ .lru_removal = &amdgpu_ttm_lru_removal,
+ .lru_tail = &amdgpu_ttm_lru_tail,
+ .swap_lru_tail = &amdgpu_ttm_swap_lru_tail,
};
int amdgpu_ttm_init(struct amdgpu_device *adev)
{
+ unsigned i, j;
int r;
r = amdgpu_ttm_global_init(adev);
@@ -946,6 +992,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
return r;
}
+
+ for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
+ struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
+
+ for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
+ lru->lru[j] = &adev->mman.bdev.man[j].lru;
+ lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
+ }
+
adev->mman.initialized = true;
r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
adev->mc.real_vram_size >> PAGE_SHIFT);
--
2.5.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs
2016-04-14 12:54 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Christian König
@ 2016-04-14 12:54 ` Christian König
2016-04-15 5:21 ` Ayyappa Ch
2016-04-14 14:23 ` [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Alex Deucher
1 sibling, 1 reply; 8+ messages in thread
From: Christian König @ 2016-04-14 12:54 UTC (permalink / raw)
To: alexdeucher; +Cc: dri-devel
From: Christian König <christian.koenig@amd.com>
Not needed any more.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 -----------------------
1 file changed, 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9392e50..00cf74a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -24,7 +24,6 @@
* Authors:
* Jerome Glisse <glisse@freedesktop.org>
*/
-#include <linux/list_sort.h>
#include <linux/pagemap.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
@@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
return 0;
}
-static int cmp_size_smaller_first(void *priv, struct list_head *a,
- struct list_head *b)
-{
- struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
- struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
-
- /* Sort A before B if A is smaller. */
- return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
-}
-
/**
* cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
@@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
if (!error) {
amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
- /* Sort the buffer list from the smallest to largest buffer,
- * which affects the order of buffers in the LRU list.
- * This assures that the smallest buffers are added first
- * to the LRU list, so they are likely to be later evicted
- * first, instead of large buffers whose eviction is more
- * expensive.
- *
- * This slightly lowers the number of bytes moved by TTM
- * per frame under memory pressure.
- */
- list_sort(NULL, &parser->validated, cmp_size_smaller_first);
-
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
parser->fence);
--
2.5.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU
2016-04-14 12:54 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Christian König
2016-04-14 12:54 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
@ 2016-04-14 14:23 ` Alex Deucher
2016-04-14 14:25 ` Christian König
1 sibling, 1 reply; 8+ messages in thread
From: Alex Deucher @ 2016-04-14 14:23 UTC (permalink / raw)
To: Christian König; +Cc: Maling list - DRI developers
On Thu, Apr 14, 2016 at 8:54 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> This allows us to have small BOs on the LRU before big ones.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Have you done any benchmarking to see how much this helps when there
is memory contention?
For the series:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 ++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++--
> 2 files changed, 68 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index c4a21c6..7b90323 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
> /*
> * TTM.
> */
> +
> +#define AMDGPU_TTM_LRU_SIZE 20
> +
> +struct amdgpu_mman_lru {
> + struct list_head *lru[TTM_NUM_MEM_TYPES];
> + struct list_head *swap_lru;
> +};
> +
> struct amdgpu_mman {
> struct ttm_bo_global_ref bo_global_ref;
> struct drm_global_reference mem_global_ref;
> @@ -408,6 +416,9 @@ struct amdgpu_mman {
> struct amdgpu_ring *buffer_funcs_ring;
> /* Scheduler entity for buffer moves */
> struct amd_sched_entity entity;
> +
> + /* custom LRU management */
> + struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE];
> };
>
> int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index fefaa9b..b58a445 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -910,6 +910,50 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
> return flags;
> }
>
> +static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
> +{
> + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
> + unsigned i;
> +
> + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
> + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
> +
> + if (&tbo->lru == lru->lru[tbo->mem.mem_type])
> + lru->lru[tbo->mem.mem_type] = tbo->lru.prev;
> +
> + if (&tbo->swap == lru->swap_lru)
> + lru->swap_lru = tbo->swap.prev;
> + }
> +}
> +
> +static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo)
> +{
> + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
> + unsigned log2_size = min(ilog2(tbo->num_pages), AMDGPU_TTM_LRU_SIZE);
> +
> + return &adev->mman.log2_size[log2_size];
> +}
> +
> +static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
> +{
> + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
> + struct list_head *res = lru->lru[tbo->mem.mem_type];
> +
> + lru->lru[tbo->mem.mem_type] = &tbo->lru;
> +
> + return res;
> +}
> +
> +static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
> +{
> + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
> + struct list_head *res = lru->swap_lru;
> +
> + lru->swap_lru = &tbo->swap;
> +
> + return res;
> +}
> +
> static struct ttm_bo_driver amdgpu_bo_driver = {
> .ttm_tt_create = &amdgpu_ttm_tt_create,
> .ttm_tt_populate = &amdgpu_ttm_tt_populate,
> @@ -923,12 +967,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
> .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
> .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
> .io_mem_free = &amdgpu_ttm_io_mem_free,
> - .lru_tail = &ttm_bo_default_lru_tail,
> - .swap_lru_tail = &ttm_bo_default_swap_lru_tail,
> + .lru_removal = &amdgpu_ttm_lru_removal,
> + .lru_tail = &amdgpu_ttm_lru_tail,
> + .swap_lru_tail = &amdgpu_ttm_swap_lru_tail,
> };
>
> int amdgpu_ttm_init(struct amdgpu_device *adev)
> {
> + unsigned i, j;
> int r;
>
> r = amdgpu_ttm_global_init(adev);
> @@ -946,6 +992,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
> return r;
> }
> +
> + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
> + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
> +
> + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
> + lru->lru[j] = &adev->mman.bdev.man[j].lru;
> + lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
> + }
> +
> adev->mman.initialized = true;
> r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
> adev->mc.real_vram_size >> PAGE_SHIFT);
> --
> 2.5.0
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU
2016-04-14 14:23 ` [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Alex Deucher
@ 2016-04-14 14:25 ` Christian König
2016-04-14 15:53 ` Christian König
0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2016-04-14 14:25 UTC (permalink / raw)
To: Alex Deucher; +Cc: Maling list - DRI developers
Am 14.04.2016 um 16:23 schrieb Alex Deucher:
> On Thu, Apr 14, 2016 at 8:54 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> This allows us to have small BOs on the LRU before big ones.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Have you done any benchmarking to see how much this helps when there
> is memory contention?
Still working on this. Marek could you help with that? You usually have
the Unigin benchmarks ready at hand.
Christian.
>
> For the series:
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 ++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++--
>> 2 files changed, 68 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index c4a21c6..7b90323 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
>> /*
>> * TTM.
>> */
>> +
>> +#define AMDGPU_TTM_LRU_SIZE 20
>> +
>> +struct amdgpu_mman_lru {
>> + struct list_head *lru[TTM_NUM_MEM_TYPES];
>> + struct list_head *swap_lru;
>> +};
>> +
>> struct amdgpu_mman {
>> struct ttm_bo_global_ref bo_global_ref;
>> struct drm_global_reference mem_global_ref;
>> @@ -408,6 +416,9 @@ struct amdgpu_mman {
>> struct amdgpu_ring *buffer_funcs_ring;
>> /* Scheduler entity for buffer moves */
>> struct amd_sched_entity entity;
>> +
>> + /* custom LRU management */
>> + struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE];
>> };
>>
>> int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index fefaa9b..b58a445 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -910,6 +910,50 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>> return flags;
>> }
>>
>> +static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
>> +{
>> + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
>> + unsigned i;
>> +
>> + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
>> + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
>> +
>> + if (&tbo->lru == lru->lru[tbo->mem.mem_type])
>> + lru->lru[tbo->mem.mem_type] = tbo->lru.prev;
>> +
>> + if (&tbo->swap == lru->swap_lru)
>> + lru->swap_lru = tbo->swap.prev;
>> + }
>> +}
>> +
>> +static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo)
>> +{
>> + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
>> + unsigned log2_size = min(ilog2(tbo->num_pages), AMDGPU_TTM_LRU_SIZE);
>> +
>> + return &adev->mman.log2_size[log2_size];
>> +}
>> +
>> +static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
>> +{
>> + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
>> + struct list_head *res = lru->lru[tbo->mem.mem_type];
>> +
>> + lru->lru[tbo->mem.mem_type] = &tbo->lru;
>> +
>> + return res;
>> +}
>> +
>> +static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
>> +{
>> + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
>> + struct list_head *res = lru->swap_lru;
>> +
>> + lru->swap_lru = &tbo->swap;
>> +
>> + return res;
>> +}
>> +
>> static struct ttm_bo_driver amdgpu_bo_driver = {
>> .ttm_tt_create = &amdgpu_ttm_tt_create,
>> .ttm_tt_populate = &amdgpu_ttm_tt_populate,
>> @@ -923,12 +967,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
>> .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
>> .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>> .io_mem_free = &amdgpu_ttm_io_mem_free,
>> - .lru_tail = &ttm_bo_default_lru_tail,
>> - .swap_lru_tail = &ttm_bo_default_swap_lru_tail,
>> + .lru_removal = &amdgpu_ttm_lru_removal,
>> + .lru_tail = &amdgpu_ttm_lru_tail,
>> + .swap_lru_tail = &amdgpu_ttm_swap_lru_tail,
>> };
>>
>> int amdgpu_ttm_init(struct amdgpu_device *adev)
>> {
>> + unsigned i, j;
>> int r;
>>
>> r = amdgpu_ttm_global_init(adev);
>> @@ -946,6 +992,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>> DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
>> return r;
>> }
>> +
>> + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
>> + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
>> +
>> + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
>> + lru->lru[j] = &adev->mman.bdev.man[j].lru;
>> + lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
>> + }
>> +
>> adev->mman.initialized = true;
>> r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
>> adev->mc.real_vram_size >> PAGE_SHIFT);
>> --
>> 2.5.0
>>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU
2016-04-14 14:25 ` Christian König
@ 2016-04-14 15:53 ` Christian König
0 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2016-04-14 15:53 UTC (permalink / raw)
To: Alex Deucher; +Cc: Maling list - DRI developers
Am 14.04.2016 um 16:25 schrieb Christian König:
> Am 14.04.2016 um 16:23 schrieb Alex Deucher:
>> On Thu, Apr 14, 2016 at 8:54 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> This allows us to have small BOs on the LRU before big ones.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Have you done any benchmarking to see how much this helps when there
>> is memory contention?
>
> Still working on this. Marek could you help with that? You usually
> have the Unigin benchmarks ready at hand.
Alex please wait before you merge them. Further testing with Unigin
Valey showed an interesting bug which needs to be fixed first.
Christian.
>
> Christian.
>
>>
>> For the series:
>> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 ++++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59
>>> +++++++++++++++++++++++++++++++--
>>> 2 files changed, 68 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index c4a21c6..7b90323 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct
>>> amdgpu_ring *ring);
>>> /*
>>> * TTM.
>>> */
>>> +
>>> +#define AMDGPU_TTM_LRU_SIZE 20
>>> +
>>> +struct amdgpu_mman_lru {
>>> + struct list_head *lru[TTM_NUM_MEM_TYPES];
>>> + struct list_head *swap_lru;
>>> +};
>>> +
>>> struct amdgpu_mman {
>>> struct ttm_bo_global_ref bo_global_ref;
>>> struct drm_global_reference mem_global_ref;
>>> @@ -408,6 +416,9 @@ struct amdgpu_mman {
>>> struct amdgpu_ring *buffer_funcs_ring;
>>> /* Scheduler entity for buffer moves */
>>> struct amd_sched_entity entity;
>>> +
>>> + /* custom LRU management */
>>> + struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE];
>>> };
>>>
>>> int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index fefaa9b..b58a445 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -910,6 +910,50 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct
>>> amdgpu_device *adev, struct ttm_tt *ttm,
>>> return flags;
>>> }
>>>
>>> +static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
>>> +{
>>> + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
>>> + unsigned i;
>>> +
>>> + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
>>> + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
>>> +
>>> + if (&tbo->lru == lru->lru[tbo->mem.mem_type])
>>> + lru->lru[tbo->mem.mem_type] = tbo->lru.prev;
>>> +
>>> + if (&tbo->swap == lru->swap_lru)
>>> + lru->swap_lru = tbo->swap.prev;
>>> + }
>>> +}
>>> +
>>> +static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct
>>> ttm_buffer_object *tbo)
>>> +{
>>> + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
>>> + unsigned log2_size = min(ilog2(tbo->num_pages),
>>> AMDGPU_TTM_LRU_SIZE);
>>> +
>>> + return &adev->mman.log2_size[log2_size];
>>> +}
>>> +
>>> +static struct list_head *amdgpu_ttm_lru_tail(struct
>>> ttm_buffer_object *tbo)
>>> +{
>>> + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
>>> + struct list_head *res = lru->lru[tbo->mem.mem_type];
>>> +
>>> + lru->lru[tbo->mem.mem_type] = &tbo->lru;
>>> +
>>> + return res;
>>> +}
>>> +
>>> +static struct list_head *amdgpu_ttm_swap_lru_tail(struct
>>> ttm_buffer_object *tbo)
>>> +{
>>> + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
>>> + struct list_head *res = lru->swap_lru;
>>> +
>>> + lru->swap_lru = &tbo->swap;
>>> +
>>> + return res;
>>> +}
>>> +
>>> static struct ttm_bo_driver amdgpu_bo_driver = {
>>> .ttm_tt_create = &amdgpu_ttm_tt_create,
>>> .ttm_tt_populate = &amdgpu_ttm_tt_populate,
>>> @@ -923,12 +967,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
>>> .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
>>> .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>>> .io_mem_free = &amdgpu_ttm_io_mem_free,
>>> - .lru_tail = &ttm_bo_default_lru_tail,
>>> - .swap_lru_tail = &ttm_bo_default_swap_lru_tail,
>>> + .lru_removal = &amdgpu_ttm_lru_removal,
>>> + .lru_tail = &amdgpu_ttm_lru_tail,
>>> + .swap_lru_tail = &amdgpu_ttm_swap_lru_tail,
>>> };
>>>
>>> int amdgpu_ttm_init(struct amdgpu_device *adev)
>>> {
>>> + unsigned i, j;
>>> int r;
>>>
>>> r = amdgpu_ttm_global_init(adev);
>>> @@ -946,6 +992,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>> DRM_ERROR("failed initializing buffer object
>>> driver(%d).\n", r);
>>> return r;
>>> }
>>> +
>>> + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
>>> + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
>>> +
>>> + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
>>> + lru->lru[j] = &adev->mman.bdev.man[j].lru;
>>> + lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
>>> + }
>>> +
>>> adev->mman.initialized = true;
>>> r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
>>> adev->mc.real_vram_size >>
>>> PAGE_SHIFT);
>>> --
>>> 2.5.0
>>>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs
2016-04-14 12:54 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
@ 2016-04-15 5:21 ` Ayyappa Ch
0 siblings, 0 replies; 8+ messages in thread
From: Ayyappa Ch @ 2016-04-15 5:21 UTC (permalink / raw)
To: Christian König; +Cc: dri-devel
Hello Christian ,
As per below comment large buffer eviction is more expensive. So
removing this code will solve the same problem?
- /* Sort the buffer list from the smallest to largest buffer,
- * which affects the order of buffers in the LRU list.
- * This assures that the smallest buffers are added first
- * to the LRU list, so they are likely to be later evicted
- * first, instead of large buffers whose eviction is more
- * expensive.
- *
- * This slightly lowers the number of bytes moved by TTM
- * per frame under memory pressure.
- */
- list_sort(NULL, &parser->validated, cmp_size_smaller_first);
-
Thanks,
Ayyappa.
On Thu, Apr 14, 2016 at 6:24 PM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Not needed any more.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 -----------------------
> 1 file changed, 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9392e50..00cf74a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -24,7 +24,6 @@
> * Authors:
> * Jerome Glisse <glisse@freedesktop.org>
> */
> -#include <linux/list_sort.h>
> #include <linux/pagemap.h>
> #include <drm/drmP.h>
> #include <drm/amdgpu_drm.h>
> @@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
> return 0;
> }
>
> -static int cmp_size_smaller_first(void *priv, struct list_head *a,
> - struct list_head *b)
> -{
> - struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
> - struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
> -
> - /* Sort A before B if A is smaller. */
> - return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
> -}
> -
> /**
> * cs_parser_fini() - clean parser states
> * @parser: parser structure holding parsing context.
> @@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
> if (!error) {
> amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
>
> - /* Sort the buffer list from the smallest to largest buffer,
> - * which affects the order of buffers in the LRU list.
> - * This assures that the smallest buffers are added first
> - * to the LRU list, so they are likely to be later evicted
> - * first, instead of large buffers whose eviction is more
> - * expensive.
> - *
> - * This slightly lowers the number of bytes moved by TTM
> - * per frame under memory pressure.
> - */
> - list_sort(NULL, &parser->validated, cmp_size_smaller_first);
> -
> ttm_eu_fence_buffer_objects(&parser->ticket,
> &parser->validated,
> parser->fence);
> --
> 2.5.0
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs
2016-04-15 15:19 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
@ 2016-04-15 16:01 ` Alex Deucher
0 siblings, 0 replies; 8+ messages in thread
From: Alex Deucher @ 2016-04-15 16:01 UTC (permalink / raw)
To: Christian König; +Cc: Maling list - DRI developers
On Fri, Apr 15, 2016 at 11:19 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Not needed any more.
Applied the series.
Alex
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 -----------------------
> 1 file changed, 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9392e50..00cf74a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -24,7 +24,6 @@
> * Authors:
> * Jerome Glisse <glisse@freedesktop.org>
> */
> -#include <linux/list_sort.h>
> #include <linux/pagemap.h>
> #include <drm/drmP.h>
> #include <drm/amdgpu_drm.h>
> @@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
> return 0;
> }
>
> -static int cmp_size_smaller_first(void *priv, struct list_head *a,
> - struct list_head *b)
> -{
> - struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
> - struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
> -
> - /* Sort A before B if A is smaller. */
> - return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
> -}
> -
> /**
> * cs_parser_fini() - clean parser states
> * @parser: parser structure holding parsing context.
> @@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
> if (!error) {
> amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
>
> - /* Sort the buffer list from the smallest to largest buffer,
> - * which affects the order of buffers in the LRU list.
> - * This assures that the smallest buffers are added first
> - * to the LRU list, so they are likely to be later evicted
> - * first, instead of large buffers whose eviction is more
> - * expensive.
> - *
> - * This slightly lowers the number of bytes moved by TTM
> - * per frame under memory pressure.
> - */
> - list_sort(NULL, &parser->validated, cmp_size_smaller_first);
> -
> ttm_eu_fence_buffer_objects(&parser->ticket,
> &parser->validated,
> parser->fence);
> --
> 2.5.0
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs
2016-04-15 15:19 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU v2 Christian König
@ 2016-04-15 15:19 ` Christian König
2016-04-15 16:01 ` Alex Deucher
0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2016-04-15 15:19 UTC (permalink / raw)
To: alexdeucher; +Cc: dri-devel
From: Christian König <christian.koenig@amd.com>
Not needed any more.
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 -----------------------
1 file changed, 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9392e50..00cf74a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -24,7 +24,6 @@
* Authors:
* Jerome Glisse <glisse@freedesktop.org>
*/
-#include <linux/list_sort.h>
#include <linux/pagemap.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
@@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
return 0;
}
-static int cmp_size_smaller_first(void *priv, struct list_head *a,
- struct list_head *b)
-{
- struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
- struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
-
- /* Sort A before B if A is smaller. */
- return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
-}
-
/**
* cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
@@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
if (!error) {
amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
- /* Sort the buffer list from the smallest to largest buffer,
- * which affects the order of buffers in the LRU list.
- * This assures that the smallest buffers are added first
- * to the LRU list, so they are likely to be later evicted
- * first, instead of large buffers whose eviction is more
- * expensive.
- *
- * This slightly lowers the number of bytes moved by TTM
- * per frame under memory pressure.
- */
- list_sort(NULL, &parser->validated, cmp_size_smaller_first);
-
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
parser->fence);
--
2.5.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2016-04-15 16:01 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-14 12:54 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Christian König
2016-04-14 12:54 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
2016-04-15 5:21 ` Ayyappa Ch
2016-04-14 14:23 ` [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Alex Deucher
2016-04-14 14:25 ` Christian König
2016-04-14 15:53 ` Christian König
2016-04-15 15:19 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU v2 Christian König
2016-04-15 15:19 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
2016-04-15 16:01 ` Alex Deucher
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.