All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU
@ 2016-04-14 12:54 Christian König
  2016-04-14 12:54 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
  2016-04-14 14:23 ` [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Alex Deucher
  0 siblings, 2 replies; 8+ messages in thread
From: Christian König @ 2016-04-14 12:54 UTC (permalink / raw)
  To: alexdeucher; +Cc: dri-devel

From: Christian König <christian.koenig@amd.com>

This allows us to have small BOs on the LRU before big ones.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 11 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++--
 2 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c4a21c6..7b90323 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
 /*
  * TTM.
  */
+
+#define AMDGPU_TTM_LRU_SIZE	20
+
+struct amdgpu_mman_lru {
+	struct list_head		*lru[TTM_NUM_MEM_TYPES];
+	struct list_head		*swap_lru;
+};
+
 struct amdgpu_mman {
 	struct ttm_bo_global_ref        bo_global_ref;
 	struct drm_global_reference	mem_global_ref;
@@ -408,6 +416,9 @@ struct amdgpu_mman {
 	struct amdgpu_ring			*buffer_funcs_ring;
 	/* Scheduler entity for buffer moves */
 	struct amd_sched_entity			entity;
+
+	/* custom LRU management */
+	struct amdgpu_mman_lru			log2_size[AMDGPU_TTM_LRU_SIZE];
 };
 
 int amdgpu_copy_buffer(struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fefaa9b..b58a445 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -910,6 +910,50 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 	return flags;
 }
 
+static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
+	unsigned i;
+
+	for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
+		struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
+
+		if (&tbo->lru == lru->lru[tbo->mem.mem_type])
+			lru->lru[tbo->mem.mem_type] = tbo->lru.prev;
+
+		if (&tbo->swap == lru->swap_lru)
+			lru->swap_lru = tbo->swap.prev;
+	}
+}
+
+static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
+	unsigned log2_size = min(ilog2(tbo->num_pages), AMDGPU_TTM_LRU_SIZE);
+
+	return &adev->mman.log2_size[log2_size];
+}
+
+static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
+	struct list_head *res = lru->lru[tbo->mem.mem_type];
+
+	lru->lru[tbo->mem.mem_type] = &tbo->lru;
+
+	return res;
+}
+
+static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
+	struct list_head *res = lru->swap_lru;
+
+	lru->swap_lru = &tbo->swap;
+
+	return res;
+}
+
 static struct ttm_bo_driver amdgpu_bo_driver = {
 	.ttm_tt_create = &amdgpu_ttm_tt_create,
 	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
@@ -923,12 +967,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
 	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
 	.io_mem_free = &amdgpu_ttm_io_mem_free,
-	.lru_tail = &ttm_bo_default_lru_tail,
-	.swap_lru_tail = &ttm_bo_default_swap_lru_tail,
+	.lru_removal = &amdgpu_ttm_lru_removal,
+	.lru_tail = &amdgpu_ttm_lru_tail,
+	.swap_lru_tail = &amdgpu_ttm_swap_lru_tail,
 };
 
 int amdgpu_ttm_init(struct amdgpu_device *adev)
 {
+	unsigned i, j;
 	int r;
 
 	r = amdgpu_ttm_global_init(adev);
@@ -946,6 +992,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
 	}
+
+	for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
+		struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
+
+		for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
+			lru->lru[j] = &adev->mman.bdev.man[j].lru;
+		lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
+	}
+
 	adev->mman.initialized = true;
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
 				adev->mc.real_vram_size >> PAGE_SHIFT);
-- 
2.5.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs
  2016-04-14 12:54 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Christian König
@ 2016-04-14 12:54 ` Christian König
  2016-04-15  5:21   ` Ayyappa Ch
  2016-04-14 14:23 ` [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Alex Deucher
  1 sibling, 1 reply; 8+ messages in thread
From: Christian König @ 2016-04-14 12:54 UTC (permalink / raw)
  To: alexdeucher; +Cc: dri-devel

From: Christian König <christian.koenig@amd.com>

Not needed any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9392e50..00cf74a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -24,7 +24,6 @@
  * Authors:
  *    Jerome Glisse <glisse@freedesktop.org>
  */
-#include <linux/list_sort.h>
 #include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
@@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 	return 0;
 }
 
-static int cmp_size_smaller_first(void *priv, struct list_head *a,
-				  struct list_head *b)
-{
-	struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
-	struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
-
-	/* Sort A before B if A is smaller. */
-	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
-}
-
 /**
  * cs_parser_fini() - clean parser states
  * @parser:	parser structure holding parsing context.
@@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 	if (!error) {
 		amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
 
-		/* Sort the buffer list from the smallest to largest buffer,
-		 * which affects the order of buffers in the LRU list.
-		 * This assures that the smallest buffers are added first
-		 * to the LRU list, so they are likely to be later evicted
-		 * first, instead of large buffers whose eviction is more
-		 * expensive.
-		 *
-		 * This slightly lowers the number of bytes moved by TTM
-		 * per frame under memory pressure.
-		 */
-		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
-
 		ttm_eu_fence_buffer_objects(&parser->ticket,
 					    &parser->validated,
 					    parser->fence);
-- 
2.5.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU
  2016-04-14 12:54 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Christian König
  2016-04-14 12:54 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
@ 2016-04-14 14:23 ` Alex Deucher
  2016-04-14 14:25   ` Christian König
  1 sibling, 1 reply; 8+ messages in thread
From: Alex Deucher @ 2016-04-14 14:23 UTC (permalink / raw)
  To: Christian König; +Cc: Maling list - DRI developers

On Thu, Apr 14, 2016 at 8:54 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> This allows us to have small BOs on the LRU before big ones.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Have you done any benchmarking to see how much this helps when there
is memory contention?

For the series:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 11 ++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++--
>  2 files changed, 68 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index c4a21c6..7b90323 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
>  /*
>   * TTM.
>   */
> +
> +#define AMDGPU_TTM_LRU_SIZE    20
> +
> +struct amdgpu_mman_lru {
> +       struct list_head                *lru[TTM_NUM_MEM_TYPES];
> +       struct list_head                *swap_lru;
> +};
> +
>  struct amdgpu_mman {
>         struct ttm_bo_global_ref        bo_global_ref;
>         struct drm_global_reference     mem_global_ref;
> @@ -408,6 +416,9 @@ struct amdgpu_mman {
>         struct amdgpu_ring                      *buffer_funcs_ring;
>         /* Scheduler entity for buffer moves */
>         struct amd_sched_entity                 entity;
> +
> +       /* custom LRU management */
> +       struct amdgpu_mman_lru                  log2_size[AMDGPU_TTM_LRU_SIZE];
>  };
>
>  int amdgpu_copy_buffer(struct amdgpu_ring *ring,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index fefaa9b..b58a445 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -910,6 +910,50 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>         return flags;
>  }
>
> +static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
> +{
> +       struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
> +       unsigned i;
> +
> +       for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
> +               struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
> +
> +               if (&tbo->lru == lru->lru[tbo->mem.mem_type])
> +                       lru->lru[tbo->mem.mem_type] = tbo->lru.prev;
> +
> +               if (&tbo->swap == lru->swap_lru)
> +                       lru->swap_lru = tbo->swap.prev;
> +       }
> +}
> +
> +static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo)
> +{
> +       struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
> +       unsigned log2_size = min(ilog2(tbo->num_pages), AMDGPU_TTM_LRU_SIZE);
> +
> +       return &adev->mman.log2_size[log2_size];
> +}
> +
> +static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
> +{
> +       struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
> +       struct list_head *res = lru->lru[tbo->mem.mem_type];
> +
> +       lru->lru[tbo->mem.mem_type] = &tbo->lru;
> +
> +       return res;
> +}
> +
> +static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
> +{
> +       struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
> +       struct list_head *res = lru->swap_lru;
> +
> +       lru->swap_lru = &tbo->swap;
> +
> +       return res;
> +}
> +
>  static struct ttm_bo_driver amdgpu_bo_driver = {
>         .ttm_tt_create = &amdgpu_ttm_tt_create,
>         .ttm_tt_populate = &amdgpu_ttm_tt_populate,
> @@ -923,12 +967,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
>         .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
>         .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>         .io_mem_free = &amdgpu_ttm_io_mem_free,
> -       .lru_tail = &ttm_bo_default_lru_tail,
> -       .swap_lru_tail = &ttm_bo_default_swap_lru_tail,
> +       .lru_removal = &amdgpu_ttm_lru_removal,
> +       .lru_tail = &amdgpu_ttm_lru_tail,
> +       .swap_lru_tail = &amdgpu_ttm_swap_lru_tail,
>  };
>
>  int amdgpu_ttm_init(struct amdgpu_device *adev)
>  {
> +       unsigned i, j;
>         int r;
>
>         r = amdgpu_ttm_global_init(adev);
> @@ -946,6 +992,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>                 DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
>                 return r;
>         }
> +
> +       for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
> +               struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
> +
> +               for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
> +                       lru->lru[j] = &adev->mman.bdev.man[j].lru;
> +               lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
> +       }
> +
>         adev->mman.initialized = true;
>         r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
>                                 adev->mc.real_vram_size >> PAGE_SHIFT);
> --
> 2.5.0
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU
  2016-04-14 14:23 ` [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Alex Deucher
@ 2016-04-14 14:25   ` Christian König
  2016-04-14 15:53     ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2016-04-14 14:25 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Maling list - DRI developers

Am 14.04.2016 um 16:23 schrieb Alex Deucher:
> On Thu, Apr 14, 2016 at 8:54 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> This allows us to have small BOs on the LRU before big ones.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Have you done any benchmarking to see how much this helps when there
> is memory contention?

Still working on this. Marek could you help with that? You usually have 
the Unigin benchmarks ready at hand.

Christian.

>
> For the series:
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 11 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++--
>>   2 files changed, 68 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index c4a21c6..7b90323 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
>>   /*
>>    * TTM.
>>    */
>> +
>> +#define AMDGPU_TTM_LRU_SIZE    20
>> +
>> +struct amdgpu_mman_lru {
>> +       struct list_head                *lru[TTM_NUM_MEM_TYPES];
>> +       struct list_head                *swap_lru;
>> +};
>> +
>>   struct amdgpu_mman {
>>          struct ttm_bo_global_ref        bo_global_ref;
>>          struct drm_global_reference     mem_global_ref;
>> @@ -408,6 +416,9 @@ struct amdgpu_mman {
>>          struct amdgpu_ring                      *buffer_funcs_ring;
>>          /* Scheduler entity for buffer moves */
>>          struct amd_sched_entity                 entity;
>> +
>> +       /* custom LRU management */
>> +       struct amdgpu_mman_lru                  log2_size[AMDGPU_TTM_LRU_SIZE];
>>   };
>>
>>   int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index fefaa9b..b58a445 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -910,6 +910,50 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>>          return flags;
>>   }
>>
>> +static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
>> +{
>> +       struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
>> +       unsigned i;
>> +
>> +       for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
>> +               struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
>> +
>> +               if (&tbo->lru == lru->lru[tbo->mem.mem_type])
>> +                       lru->lru[tbo->mem.mem_type] = tbo->lru.prev;
>> +
>> +               if (&tbo->swap == lru->swap_lru)
>> +                       lru->swap_lru = tbo->swap.prev;
>> +       }
>> +}
>> +
>> +static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo)
>> +{
>> +       struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
>> +       unsigned log2_size = min(ilog2(tbo->num_pages), AMDGPU_TTM_LRU_SIZE);
>> +
>> +       return &adev->mman.log2_size[log2_size];
>> +}
>> +
>> +static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
>> +{
>> +       struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
>> +       struct list_head *res = lru->lru[tbo->mem.mem_type];
>> +
>> +       lru->lru[tbo->mem.mem_type] = &tbo->lru;
>> +
>> +       return res;
>> +}
>> +
>> +static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
>> +{
>> +       struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
>> +       struct list_head *res = lru->swap_lru;
>> +
>> +       lru->swap_lru = &tbo->swap;
>> +
>> +       return res;
>> +}
>> +
>>   static struct ttm_bo_driver amdgpu_bo_driver = {
>>          .ttm_tt_create = &amdgpu_ttm_tt_create,
>>          .ttm_tt_populate = &amdgpu_ttm_tt_populate,
>> @@ -923,12 +967,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
>>          .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
>>          .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>>          .io_mem_free = &amdgpu_ttm_io_mem_free,
>> -       .lru_tail = &ttm_bo_default_lru_tail,
>> -       .swap_lru_tail = &ttm_bo_default_swap_lru_tail,
>> +       .lru_removal = &amdgpu_ttm_lru_removal,
>> +       .lru_tail = &amdgpu_ttm_lru_tail,
>> +       .swap_lru_tail = &amdgpu_ttm_swap_lru_tail,
>>   };
>>
>>   int amdgpu_ttm_init(struct amdgpu_device *adev)
>>   {
>> +       unsigned i, j;
>>          int r;
>>
>>          r = amdgpu_ttm_global_init(adev);
>> @@ -946,6 +992,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>                  DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
>>                  return r;
>>          }
>> +
>> +       for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
>> +               struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
>> +
>> +               for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
>> +                       lru->lru[j] = &adev->mman.bdev.man[j].lru;
>> +               lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
>> +       }
>> +
>>          adev->mman.initialized = true;
>>          r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
>>                                  adev->mc.real_vram_size >> PAGE_SHIFT);
>> --
>> 2.5.0
>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU
  2016-04-14 14:25   ` Christian König
@ 2016-04-14 15:53     ` Christian König
  0 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2016-04-14 15:53 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Maling list - DRI developers

Am 14.04.2016 um 16:25 schrieb Christian König:
> Am 14.04.2016 um 16:23 schrieb Alex Deucher:
>> On Thu, Apr 14, 2016 at 8:54 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> This allows us to have small BOs on the LRU before big ones.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Have you done any benchmarking to see how much this helps when there
>> is memory contention?
>
> Still working on this. Marek could you help with that? You usually 
> have the Unigin benchmarks ready at hand.

Alex please wait before you merge them. Further testing with Unigin 
Valey showed an interesting bug which needs to be fixed first.

Christian.

>
> Christian.
>
>>
>> For the series:
>> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 11 ++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 
>>> +++++++++++++++++++++++++++++++--
>>>   2 files changed, 68 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index c4a21c6..7b90323 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct 
>>> amdgpu_ring *ring);
>>>   /*
>>>    * TTM.
>>>    */
>>> +
>>> +#define AMDGPU_TTM_LRU_SIZE    20
>>> +
>>> +struct amdgpu_mman_lru {
>>> +       struct list_head *lru[TTM_NUM_MEM_TYPES];
>>> +       struct list_head                *swap_lru;
>>> +};
>>> +
>>>   struct amdgpu_mman {
>>>          struct ttm_bo_global_ref        bo_global_ref;
>>>          struct drm_global_reference     mem_global_ref;
>>> @@ -408,6 +416,9 @@ struct amdgpu_mman {
>>>          struct amdgpu_ring *buffer_funcs_ring;
>>>          /* Scheduler entity for buffer moves */
>>>          struct amd_sched_entity                 entity;
>>> +
>>> +       /* custom LRU management */
>>> +       struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE];
>>>   };
>>>
>>>   int amdgpu_copy_buffer(struct amdgpu_ring *ring,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index fefaa9b..b58a445 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -910,6 +910,50 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct 
>>> amdgpu_device *adev, struct ttm_tt *ttm,
>>>          return flags;
>>>   }
>>>
>>> +static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
>>> +{
>>> +       struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
>>> +       unsigned i;
>>> +
>>> +       for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
>>> +               struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
>>> +
>>> +               if (&tbo->lru == lru->lru[tbo->mem.mem_type])
>>> +                       lru->lru[tbo->mem.mem_type] = tbo->lru.prev;
>>> +
>>> +               if (&tbo->swap == lru->swap_lru)
>>> +                       lru->swap_lru = tbo->swap.prev;
>>> +       }
>>> +}
>>> +
>>> +static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct 
>>> ttm_buffer_object *tbo)
>>> +{
>>> +       struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
>>> +       unsigned log2_size = min(ilog2(tbo->num_pages), 
>>> AMDGPU_TTM_LRU_SIZE);
>>> +
>>> +       return &adev->mman.log2_size[log2_size];
>>> +}
>>> +
>>> +static struct list_head *amdgpu_ttm_lru_tail(struct 
>>> ttm_buffer_object *tbo)
>>> +{
>>> +       struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
>>> +       struct list_head *res = lru->lru[tbo->mem.mem_type];
>>> +
>>> +       lru->lru[tbo->mem.mem_type] = &tbo->lru;
>>> +
>>> +       return res;
>>> +}
>>> +
>>> +static struct list_head *amdgpu_ttm_swap_lru_tail(struct 
>>> ttm_buffer_object *tbo)
>>> +{
>>> +       struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo);
>>> +       struct list_head *res = lru->swap_lru;
>>> +
>>> +       lru->swap_lru = &tbo->swap;
>>> +
>>> +       return res;
>>> +}
>>> +
>>>   static struct ttm_bo_driver amdgpu_bo_driver = {
>>>          .ttm_tt_create = &amdgpu_ttm_tt_create,
>>>          .ttm_tt_populate = &amdgpu_ttm_tt_populate,
>>> @@ -923,12 +967,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
>>>          .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
>>>          .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>>>          .io_mem_free = &amdgpu_ttm_io_mem_free,
>>> -       .lru_tail = &ttm_bo_default_lru_tail,
>>> -       .swap_lru_tail = &ttm_bo_default_swap_lru_tail,
>>> +       .lru_removal = &amdgpu_ttm_lru_removal,
>>> +       .lru_tail = &amdgpu_ttm_lru_tail,
>>> +       .swap_lru_tail = &amdgpu_ttm_swap_lru_tail,
>>>   };
>>>
>>>   int amdgpu_ttm_init(struct amdgpu_device *adev)
>>>   {
>>> +       unsigned i, j;
>>>          int r;
>>>
>>>          r = amdgpu_ttm_global_init(adev);
>>> @@ -946,6 +992,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>>                  DRM_ERROR("failed initializing buffer object 
>>> driver(%d).\n", r);
>>>                  return r;
>>>          }
>>> +
>>> +       for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
>>> +               struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i];
>>> +
>>> +               for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
>>> +                       lru->lru[j] = &adev->mman.bdev.man[j].lru;
>>> +               lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
>>> +       }
>>> +
>>>          adev->mman.initialized = true;
>>>          r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
>>>                                  adev->mc.real_vram_size >> 
>>> PAGE_SHIFT);
>>> -- 
>>> 2.5.0
>>>
>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs
  2016-04-14 12:54 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
@ 2016-04-15  5:21   ` Ayyappa Ch
  0 siblings, 0 replies; 8+ messages in thread
From: Ayyappa Ch @ 2016-04-15  5:21 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

Hello Christian ,

As per below comment  large buffer eviction is more expensive. So
removing this code will solve the same problem?

-               /* Sort the buffer list from the smallest to largest buffer,
-                * which affects the order of buffers in the LRU list.
-                * This assures that the smallest buffers are added first
-                * to the LRU list, so they are likely to be later evicted
-                * first, instead of large buffers whose eviction is more
-                * expensive.
-                *
-                * This slightly lowers the number of bytes moved by TTM
-                * per frame under memory pressure.
-                */
-               list_sort(NULL, &parser->validated, cmp_size_smaller_first);
-

Thanks,
Ayyappa.

On Thu, Apr 14, 2016 at 6:24 PM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Not needed any more.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 -----------------------
>  1 file changed, 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9392e50..00cf74a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -24,7 +24,6 @@
>   * Authors:
>   *    Jerome Glisse <glisse@freedesktop.org>
>   */
> -#include <linux/list_sort.h>
>  #include <linux/pagemap.h>
>  #include <drm/drmP.h>
>  #include <drm/amdgpu_drm.h>
> @@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>         return 0;
>  }
>
> -static int cmp_size_smaller_first(void *priv, struct list_head *a,
> -                                 struct list_head *b)
> -{
> -       struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
> -       struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
> -
> -       /* Sort A before B if A is smaller. */
> -       return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
> -}
> -
>  /**
>   * cs_parser_fini() - clean parser states
>   * @parser:    parser structure holding parsing context.
> @@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
>         if (!error) {
>                 amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
>
> -               /* Sort the buffer list from the smallest to largest buffer,
> -                * which affects the order of buffers in the LRU list.
> -                * This assures that the smallest buffers are added first
> -                * to the LRU list, so they are likely to be later evicted
> -                * first, instead of large buffers whose eviction is more
> -                * expensive.
> -                *
> -                * This slightly lowers the number of bytes moved by TTM
> -                * per frame under memory pressure.
> -                */
> -               list_sort(NULL, &parser->validated, cmp_size_smaller_first);
> -
>                 ttm_eu_fence_buffer_objects(&parser->ticket,
>                                             &parser->validated,
>                                             parser->fence);
> --
> 2.5.0
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs
  2016-04-15 15:19 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
@ 2016-04-15 16:01   ` Alex Deucher
  0 siblings, 0 replies; 8+ messages in thread
From: Alex Deucher @ 2016-04-15 16:01 UTC (permalink / raw)
  To: Christian König; +Cc: Maling list - DRI developers

On Fri, Apr 15, 2016 at 11:19 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Not needed any more.

Applied the series.

Alex

>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 -----------------------
>  1 file changed, 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9392e50..00cf74a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -24,7 +24,6 @@
>   * Authors:
>   *    Jerome Glisse <glisse@freedesktop.org>
>   */
> -#include <linux/list_sort.h>
>  #include <linux/pagemap.h>
>  #include <drm/drmP.h>
>  #include <drm/amdgpu_drm.h>
> @@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>         return 0;
>  }
>
> -static int cmp_size_smaller_first(void *priv, struct list_head *a,
> -                                 struct list_head *b)
> -{
> -       struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
> -       struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
> -
> -       /* Sort A before B if A is smaller. */
> -       return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
> -}
> -
>  /**
>   * cs_parser_fini() - clean parser states
>   * @parser:    parser structure holding parsing context.
> @@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
>         if (!error) {
>                 amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
>
> -               /* Sort the buffer list from the smallest to largest buffer,
> -                * which affects the order of buffers in the LRU list.
> -                * This assures that the smallest buffers are added first
> -                * to the LRU list, so they are likely to be later evicted
> -                * first, instead of large buffers whose eviction is more
> -                * expensive.
> -                *
> -                * This slightly lowers the number of bytes moved by TTM
> -                * per frame under memory pressure.
> -                */
> -               list_sort(NULL, &parser->validated, cmp_size_smaller_first);
> -
>                 ttm_eu_fence_buffer_objects(&parser->ticket,
>                                             &parser->validated,
>                                             parser->fence);
> --
> 2.5.0
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs
  2016-04-15 15:19 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU v2 Christian König
@ 2016-04-15 15:19 ` Christian König
  2016-04-15 16:01   ` Alex Deucher
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2016-04-15 15:19 UTC (permalink / raw)
  To: alexdeucher; +Cc: dri-devel

From: Christian König <christian.koenig@amd.com>

Not needed any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9392e50..00cf74a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -24,7 +24,6 @@
  * Authors:
  *    Jerome Glisse <glisse@freedesktop.org>
  */
-#include <linux/list_sort.h>
 #include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
@@ -527,16 +526,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 	return 0;
 }
 
-static int cmp_size_smaller_first(void *priv, struct list_head *a,
-				  struct list_head *b)
-{
-	struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
-	struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
-
-	/* Sort A before B if A is smaller. */
-	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
-}
-
 /**
  * cs_parser_fini() - clean parser states
  * @parser:	parser structure holding parsing context.
@@ -553,18 +542,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 	if (!error) {
 		amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
 
-		/* Sort the buffer list from the smallest to largest buffer,
-		 * which affects the order of buffers in the LRU list.
-		 * This assures that the smallest buffers are added first
-		 * to the LRU list, so they are likely to be later evicted
-		 * first, instead of large buffers whose eviction is more
-		 * expensive.
-		 *
-		 * This slightly lowers the number of bytes moved by TTM
-		 * per frame under memory pressure.
-		 */
-		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
-
 		ttm_eu_fence_buffer_objects(&parser->ticket,
 					    &parser->validated,
 					    parser->fence);
-- 
2.5.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-04-15 16:01 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-14 12:54 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Christian König
2016-04-14 12:54 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
2016-04-15  5:21   ` Ayyappa Ch
2016-04-14 14:23 ` [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU Alex Deucher
2016-04-14 14:25   ` Christian König
2016-04-14 15:53     ` Christian König
2016-04-15 15:19 [PATCH 1/2] drm/amdgpu: group BOs by log2 of the size on the LRU v2 Christian König
2016-04-15 15:19 ` [PATCH 2/2] drm/amdgpu: remove sorting of CS BOs Christian König
2016-04-15 16:01   ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.