All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
@ 2015-03-11 15:44 Alex Deucher
  2015-03-11 18:21 ` Christian König
  0 siblings, 1 reply; 23+ messages in thread
From: Alex Deucher @ 2015-03-11 15:44 UTC (permalink / raw)
  To: dri-devel, oded.gabbay; +Cc: Alex Deucher, stable

radeon_bo_create() calls radeon_ttm_placement_from_domain()
before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
uses the ttm bo size to determine when to select top down
allocation but since the ttm bo is not initialized yet the
check is always false.

Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon.h        |  3 ++-
 drivers/gpu/drm/radeon/radeon_gem.c    |  2 +-
 drivers/gpu/drm/radeon/radeon_mn.c     |  2 +-
 drivers/gpu/drm/radeon/radeon_object.c | 17 ++++++++++-------
 drivers/gpu/drm/radeon/radeon_ttm.c    | 12 ++++++++----
 5 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5587603..726e89f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device *rdev);
 extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
 extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
-extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
+extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+					     u64 size);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
 extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index ac3c131..d613d0c 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 			goto release_object;
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 		radeon_bo_unreserve(bo);
 		up_read(&current->mm->mmap_sem);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index a69bd44..e51f09b 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 				DRM_ERROR("(%d) failed to wait for user bo\n", r);
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 		if (r)
 			DRM_ERROR("(%d) failed to validate user bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 43e0994..07f8fd5 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
+void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+				      u64 size)
 {
 	u32 c = 0, i;
 
@@ -179,7 +180,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 	 * improve fragmentation quality.
 	 * 512kb was measured as the most optimal number.
 	 */
-	if (rbo->tbo.mem.size > 512 * 1024) {
+	if (size > 512 * 1024) {
 		for (i = 0; i < c; i++) {
 			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
 		}
@@ -252,7 +253,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 	bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, size);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
@@ -350,7 +351,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 
 		return 0;
 	}
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
 		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
@@ -557,7 +558,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 			}
 
 		retry:
-			radeon_ttm_placement_from_domain(bo, domain);
+			radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
 			if (ring == R600_RING_TYPE_UVD_INDEX)
 				radeon_uvd_force_into_uvd_segment(bo, allowed);
 
@@ -800,7 +801,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return 0;
 
 	/* hurrah the memory is not visible ! */
-	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
+	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
+					 rbo->tbo.mem.size);
 	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < rbo->placement.num_placement; i++) {
 		/* Force into visible VRAM */
@@ -810,7 +812,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 	r = ttm_bo_validate(bo, &rbo->placement, false, false);
 	if (unlikely(r == -ENOMEM)) {
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+						 rbo->tbo.mem.size);
 		return ttm_bo_validate(bo, &rbo->placement, false, false);
 	} else if (unlikely(r != 0)) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index d02aa1d..ce8ed2d 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+							 rbo->tbo.mem.size);
 		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
 			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
 			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
@@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			 * BOs to be evicted from VRAM
 			 */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
-							 RADEON_GEM_DOMAIN_GTT);
+							 RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size);
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
@@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 				}
 			}
 		} else
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size);
 		break;
 	case TTM_PL_TT:
 	default:
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+						 rbo->tbo.mem.size);
 	}
 	*placement = rbo->placement;
 }
-- 
1.8.3.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-11 15:44 [PATCH] drm/radeon: fix TOPDOWN handling for bo_create Alex Deucher
@ 2015-03-11 18:21 ` Christian König
  2015-03-11 20:51   ` Alex Deucher
  0 siblings, 1 reply; 23+ messages in thread
From: Christian König @ 2015-03-11 18:21 UTC (permalink / raw)
  To: Alex Deucher, dri-devel, oded.gabbay; +Cc: Alex Deucher, stable

On 11.03.2015 16:44, Alex Deucher wrote:
> radeon_bo_create() calls radeon_ttm_placement_from_domain()
> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
> uses the ttm bo size to determine when to select top down
> allocation but since the ttm bo is not initialized yet the
> check is always false.
>
> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> Cc: stable@vger.kernel.org

And I was already wondering why the heck the BOs always made this 
ping/pong in memory after creation.

Patch is Reviewed-by: Christian König <christian.koenig@amd.com>

Regards,
Christian.

> ---
>   drivers/gpu/drm/radeon/radeon.h        |  3 ++-
>   drivers/gpu/drm/radeon/radeon_gem.c    |  2 +-
>   drivers/gpu/drm/radeon/radeon_mn.c     |  2 +-
>   drivers/gpu/drm/radeon/radeon_object.c | 17 ++++++++++-------
>   drivers/gpu/drm/radeon/radeon_ttm.c    | 12 ++++++++----
>   5 files changed, 22 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 5587603..726e89f 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device *rdev);
>   extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
>   extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
>   extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
> -extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
> +extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
> +					     u64 size);
>   extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
>   extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>   				     uint32_t flags);
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index ac3c131..d613d0c 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
>   			goto release_object;
>   		}
>   
> -		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
> +		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size);
>   		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
>   		radeon_bo_unreserve(bo);
>   		up_read(&current->mm->mmap_sem);
> diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
> index a69bd44..e51f09b 100644
> --- a/drivers/gpu/drm/radeon/radeon_mn.c
> +++ b/drivers/gpu/drm/radeon/radeon_mn.c
> @@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
>   				DRM_ERROR("(%d) failed to wait for user bo\n", r);
>   		}
>   
> -		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
> +		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size);
>   		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
>   		if (r)
>   			DRM_ERROR("(%d) failed to validate user bo\n", r);
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index 43e0994..07f8fd5 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
>   	return false;
>   }
>   
> -void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
> +void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
> +				      u64 size)
>   {
>   	u32 c = 0, i;
>   
> @@ -179,7 +180,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
>   	 * improve fragmentation quality.
>   	 * 512kb was measured as the most optimal number.
>   	 */
> -	if (rbo->tbo.mem.size > 512 * 1024) {
> +	if (size > 512 * 1024) {
>   		for (i = 0; i < c; i++) {
>   			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
>   		}
> @@ -252,7 +253,7 @@ int radeon_bo_create(struct radeon_device *rdev,
>   	bo->flags &= ~RADEON_GEM_GTT_WC;
>   #endif
>   
> -	radeon_ttm_placement_from_domain(bo, domain);
> +	radeon_ttm_placement_from_domain(bo, domain, size);
>   	/* Kernel allocation are uninterruptible */
>   	down_read(&rdev->pm.mclk_lock);
>   	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
> @@ -350,7 +351,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
>   
>   		return 0;
>   	}
> -	radeon_ttm_placement_from_domain(bo, domain);
> +	radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
>   	for (i = 0; i < bo->placement.num_placement; i++) {
>   		/* force to pin into visible video ram */
>   		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
> @@ -557,7 +558,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
>   			}
>   
>   		retry:
> -			radeon_ttm_placement_from_domain(bo, domain);
> +			radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
>   			if (ring == R600_RING_TYPE_UVD_INDEX)
>   				radeon_uvd_force_into_uvd_segment(bo, allowed);
>   
> @@ -800,7 +801,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>   		return 0;
>   
>   	/* hurrah the memory is not visible ! */
> -	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
> +	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
> +					 rbo->tbo.mem.size);
>   	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
>   	for (i = 0; i < rbo->placement.num_placement; i++) {
>   		/* Force into visible VRAM */
> @@ -810,7 +812,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>   	}
>   	r = ttm_bo_validate(bo, &rbo->placement, false, false);
>   	if (unlikely(r == -ENOMEM)) {
> -		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
> +		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
> +						 rbo->tbo.mem.size);
>   		return ttm_bo_validate(bo, &rbo->placement, false, false);
>   	} else if (unlikely(r != 0)) {
>   		return r;
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index d02aa1d..ce8ed2d 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
>   	switch (bo->mem.mem_type) {
>   	case TTM_PL_VRAM:
>   		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
> -			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
> +			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
> +							 rbo->tbo.mem.size);
>   		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
>   			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
>   			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
> @@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
>   			 * BOs to be evicted from VRAM
>   			 */
>   			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
> -							 RADEON_GEM_DOMAIN_GTT);
> +							 RADEON_GEM_DOMAIN_GTT,
> +							 rbo->tbo.mem.size);
>   			rbo->placement.num_busy_placement = 0;
>   			for (i = 0; i < rbo->placement.num_placement; i++) {
>   				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
> @@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
>   				}
>   			}
>   		} else
> -			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
> +			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
> +							 rbo->tbo.mem.size);
>   		break;
>   	case TTM_PL_TT:
>   	default:
> -		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
> +		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
> +						 rbo->tbo.mem.size);
>   	}
>   	*placement = rbo->placement;
>   }

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-11 18:21 ` Christian König
@ 2015-03-11 20:51   ` Alex Deucher
  2015-03-11 21:14     ` Alex Deucher
  0 siblings, 1 reply; 23+ messages in thread
From: Alex Deucher @ 2015-03-11 20:51 UTC (permalink / raw)
  To: Christian König
  Cc: Maling list - DRI developers, Oded Gabbay, Alex Deucher, for 3.8

[-- Attachment #1: Type: text/plain, Size: 9851 bytes --]

On Wed, Mar 11, 2015 at 2:21 PM, Christian König
<deathsimple@vodafone.de> wrote:
> On 11.03.2015 16:44, Alex Deucher wrote:
>>
>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>> uses the ttm bo size to determine when to select top down
>> allocation but since the ttm bo is not initialized yet the
>> check is always false.
>>
>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>> Cc: stable@vger.kernel.org
>
>
> And I was already wondering why the heck the BOs always made this ping/pong
> in memory after creation.
>
> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>

And fixing that promptly broke VCE due to vram location requirements.
Updated patch attached.  Thoughts?

Alex

>
> Regards,
> Christian.
>
>
>> ---
>>   drivers/gpu/drm/radeon/radeon.h        |  3 ++-
>>   drivers/gpu/drm/radeon/radeon_gem.c    |  2 +-
>>   drivers/gpu/drm/radeon/radeon_mn.c     |  2 +-
>>   drivers/gpu/drm/radeon/radeon_object.c | 17 ++++++++++-------
>>   drivers/gpu/drm/radeon/radeon_ttm.c    | 12 ++++++++----
>>   5 files changed, 22 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>> b/drivers/gpu/drm/radeon/radeon.h
>> index 5587603..726e89f 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device
>> *rdev);
>>   extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void
>> *data);
>>   extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev,
>> int enable);
>>   extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int
>> enable);
>> -extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32
>> domain);
>> +extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32
>> domain,
>> +                                            u64 size);
>>   extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
>>   extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>                                      uint32_t flags);
>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>> b/drivers/gpu/drm/radeon/radeon_gem.c
>> index ac3c131..d613d0c 100644
>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>> @@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev,
>> void *data,
>>                         goto release_object;
>>                 }
>>   -             radeon_ttm_placement_from_domain(bo,
>> RADEON_GEM_DOMAIN_GTT);
>> +               radeon_ttm_placement_from_domain(bo,
>> RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size);
>>                 r = ttm_bo_validate(&bo->tbo, &bo->placement, true,
>> false);
>>                 radeon_bo_unreserve(bo);
>>                 up_read(&current->mm->mmap_sem);
>> diff --git a/drivers/gpu/drm/radeon/radeon_mn.c
>> b/drivers/gpu/drm/radeon/radeon_mn.c
>> index a69bd44..e51f09b 100644
>> --- a/drivers/gpu/drm/radeon/radeon_mn.c
>> +++ b/drivers/gpu/drm/radeon/radeon_mn.c
>> @@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct
>> mmu_notifier *mn,
>>                                 DRM_ERROR("(%d) failed to wait for user
>> bo\n", r);
>>                 }
>>   -             radeon_ttm_placement_from_domain(bo,
>> RADEON_GEM_DOMAIN_CPU);
>> +               radeon_ttm_placement_from_domain(bo,
>> RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size);
>>                 r = ttm_bo_validate(&bo->tbo, &bo->placement, false,
>> false);
>>                 if (r)
>>                         DRM_ERROR("(%d) failed to validate user bo\n", r);
>> diff --git a/drivers/gpu/drm/radeon/radeon_object.c
>> b/drivers/gpu/drm/radeon/radeon_object.c
>> index 43e0994..07f8fd5 100644
>> --- a/drivers/gpu/drm/radeon/radeon_object.c
>> +++ b/drivers/gpu/drm/radeon/radeon_object.c
>> @@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object
>> *bo)
>>         return false;
>>   }
>>   -void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32
>> domain)
>> +void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
>> +                                     u64 size)
>>   {
>>         u32 c = 0, i;
>>   @@ -179,7 +180,7 @@ void radeon_ttm_placement_from_domain(struct
>> radeon_bo *rbo, u32 domain)
>>          * improve fragmentation quality.
>>          * 512kb was measured as the most optimal number.
>>          */
>> -       if (rbo->tbo.mem.size > 512 * 1024) {
>> +       if (size > 512 * 1024) {
>>                 for (i = 0; i < c; i++) {
>>                         rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
>>                 }
>> @@ -252,7 +253,7 @@ int radeon_bo_create(struct radeon_device *rdev,
>>         bo->flags &= ~RADEON_GEM_GTT_WC;
>>   #endif
>>   -     radeon_ttm_placement_from_domain(bo, domain);
>> +       radeon_ttm_placement_from_domain(bo, domain, size);
>>         /* Kernel allocation are uninterruptible */
>>         down_read(&rdev->pm.mclk_lock);
>>         r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
>> @@ -350,7 +351,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32
>> domain, u64 max_offset,
>>                 return 0;
>>         }
>> -       radeon_ttm_placement_from_domain(bo, domain);
>> +       radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
>>         for (i = 0; i < bo->placement.num_placement; i++) {
>>                 /* force to pin into visible video ram */
>>                 if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
>> @@ -557,7 +558,7 @@ int radeon_bo_list_validate(struct radeon_device
>> *rdev,
>>                         }
>>                 retry:
>> -                       radeon_ttm_placement_from_domain(bo, domain);
>> +                       radeon_ttm_placement_from_domain(bo, domain,
>> bo->tbo.mem.size);
>>                         if (ring == R600_RING_TYPE_UVD_INDEX)
>>                                 radeon_uvd_force_into_uvd_segment(bo,
>> allowed);
>>   @@ -800,7 +801,8 @@ int radeon_bo_fault_reserve_notify(struct
>> ttm_buffer_object *bo)
>>                 return 0;
>>         /* hurrah the memory is not visible ! */
>> -       radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
>> +       radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
>> +                                        rbo->tbo.mem.size);
>>         lpfn =  rdev->mc.visible_vram_size >> PAGE_SHIFT;
>>         for (i = 0; i < rbo->placement.num_placement; i++) {
>>                 /* Force into visible VRAM */
>> @@ -810,7 +812,8 @@ int radeon_bo_fault_reserve_notify(struct
>> ttm_buffer_object *bo)
>>         }
>>         r = ttm_bo_validate(bo, &rbo->placement, false, false);
>>         if (unlikely(r == -ENOMEM)) {
>> -               radeon_ttm_placement_from_domain(rbo,
>> RADEON_GEM_DOMAIN_GTT);
>> +               radeon_ttm_placement_from_domain(rbo,
>> RADEON_GEM_DOMAIN_GTT,
>> +                                                rbo->tbo.mem.size);
>>                 return ttm_bo_validate(bo, &rbo->placement, false, false);
>>         } else if (unlikely(r != 0)) {
>>                 return r;
>> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c
>> b/drivers/gpu/drm/radeon/radeon_ttm.c
>> index d02aa1d..ce8ed2d 100644
>> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
>> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
>> @@ -197,7 +197,8 @@ static void radeon_evict_flags(struct
>> ttm_buffer_object *bo,
>>         switch (bo->mem.mem_type) {
>>         case TTM_PL_VRAM:
>>                 if
>> (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
>> -                       radeon_ttm_placement_from_domain(rbo,
>> RADEON_GEM_DOMAIN_CPU);
>> +                       radeon_ttm_placement_from_domain(rbo,
>> RADEON_GEM_DOMAIN_CPU,
>> +
>> rbo->tbo.mem.size);
>>                 else if (rbo->rdev->mc.visible_vram_size <
>> rbo->rdev->mc.real_vram_size &&
>>                          bo->mem.start < (rbo->rdev->mc.visible_vram_size
>> >> PAGE_SHIFT)) {
>>                         unsigned fpfn = rbo->rdev->mc.visible_vram_size >>
>> PAGE_SHIFT;
>> @@ -209,7 +210,8 @@ static void radeon_evict_flags(struct
>> ttm_buffer_object *bo,
>>                          * BOs to be evicted from VRAM
>>                          */
>>                         radeon_ttm_placement_from_domain(rbo,
>> RADEON_GEM_DOMAIN_VRAM |
>> -
>> RADEON_GEM_DOMAIN_GTT);
>> +
>> RADEON_GEM_DOMAIN_GTT,
>> +
>> rbo->tbo.mem.size);
>>                         rbo->placement.num_busy_placement = 0;
>>                         for (i = 0; i < rbo->placement.num_placement; i++)
>> {
>>                                 if (rbo->placements[i].flags &
>> TTM_PL_FLAG_VRAM) {
>> @@ -222,11 +224,13 @@ static void radeon_evict_flags(struct
>> ttm_buffer_object *bo,
>>                                 }
>>                         }
>>                 } else
>> -                       radeon_ttm_placement_from_domain(rbo,
>> RADEON_GEM_DOMAIN_GTT);
>> +                       radeon_ttm_placement_from_domain(rbo,
>> RADEON_GEM_DOMAIN_GTT,
>> +
>> rbo->tbo.mem.size);
>>                 break;
>>         case TTM_PL_TT:
>>         default:
>> -               radeon_ttm_placement_from_domain(rbo,
>> RADEON_GEM_DOMAIN_CPU);
>> +               radeon_ttm_placement_from_domain(rbo,
>> RADEON_GEM_DOMAIN_CPU,
>> +                                                rbo->tbo.mem.size);
>>         }
>>         *placement = rbo->placement;
>>   }
>
>

[-- Attachment #2: 0001-drm-radeon-fix-TOPDOWN-handling-for-bo_create-v2.patch --]
[-- Type: text/x-patch, Size: 9510 bytes --]

From aa93fb79095c76182952773836c1e6ed3af971fd Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 11 Mar 2015 11:27:26 -0400
Subject: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

radeon_bo_create() calls radeon_ttm_placement_from_domain()
before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
uses the ttm bo size to determine when to select top down
allocation but since the ttm bo is not initialized yet the
check is always false.

v2: only use topdown for vram if the user has not requested
CPU access explicitly.  Fixes VCE.

Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon.h        |  3 ++-
 drivers/gpu/drm/radeon/radeon_gem.c    |  2 +-
 drivers/gpu/drm/radeon/radeon_mn.c     |  2 +-
 drivers/gpu/drm/radeon/radeon_object.c | 30 +++++++++++++++++++++---------
 drivers/gpu/drm/radeon/radeon_ttm.c    | 12 ++++++++----
 drivers/gpu/drm/radeon/radeon_uvd.c    |  2 +-
 drivers/gpu/drm/radeon/radeon_vce.c    |  2 +-
 7 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5587603..726e89f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device *rdev);
 extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
 extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
-extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
+extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+					     u64 size);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
 extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index ac3c131..d613d0c 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 			goto release_object;
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 		radeon_bo_unreserve(bo);
 		up_read(&current->mm->mmap_sem);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index a69bd44..e51f09b 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 				DRM_ERROR("(%d) failed to wait for user bo\n", r);
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 		if (r)
 			DRM_ERROR("(%d) failed to validate user bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 43e0994..eee1f9f 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
+void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+				      u64 size)
 {
 	u32 c = 0, i;
 
@@ -179,9 +180,18 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 	 * improve fragmentation quality.
 	 * 512kb was measured as the most optimal number.
 	 */
-	if (rbo->tbo.mem.size > 512 * 1024) {
-		for (i = 0; i < c; i++) {
-			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
+	if (size > 512 * 1024) {
+		if (domain & RADEON_GEM_DOMAIN_VRAM) {
+			if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) ||
+			    !(rbo->flags & RADEON_GEM_CPU_ACCESS)) {
+				for (i = 0; i < c; i++) {
+					rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
+				}
+			}
+		} else {
+			for (i = 0; i < c; i++) {
+				rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
+			}
 		}
 	}
 }
@@ -252,7 +262,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 	bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, size);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
@@ -350,7 +360,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 
 		return 0;
 	}
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
 		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
@@ -557,7 +567,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 			}
 
 		retry:
-			radeon_ttm_placement_from_domain(bo, domain);
+			radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
 			if (ring == R600_RING_TYPE_UVD_INDEX)
 				radeon_uvd_force_into_uvd_segment(bo, allowed);
 
@@ -800,7 +810,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return 0;
 
 	/* hurrah the memory is not visible ! */
-	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
+	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
+					 rbo->tbo.mem.size);
 	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < rbo->placement.num_placement; i++) {
 		/* Force into visible VRAM */
@@ -810,7 +821,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 	r = ttm_bo_validate(bo, &rbo->placement, false, false);
 	if (unlikely(r == -ENOMEM)) {
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+						 rbo->tbo.mem.size);
 		return ttm_bo_validate(bo, &rbo->placement, false, false);
 	} else if (unlikely(r != 0)) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index d02aa1d..ce8ed2d 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+							 rbo->tbo.mem.size);
 		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
 			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
 			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
@@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			 * BOs to be evicted from VRAM
 			 */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
-							 RADEON_GEM_DOMAIN_GTT);
+							 RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size);
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
@@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 				}
 			}
 		} else
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size);
 		break;
 	case TTM_PL_TT:
 	default:
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+						 rbo->tbo.mem.size);
 	}
 	*placement = rbo->placement;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index c10b2ae..52b2682 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -141,7 +141,7 @@ int radeon_uvd_init(struct radeon_device *rdev)
 		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
 		  RADEON_GPU_PAGE_SIZE;
 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL,
 			     NULL, &rdev->uvd.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 976fe43..3d75502 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -126,7 +126,7 @@ int radeon_vce_init(struct radeon_device *rdev)
 	size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
 	       RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL, NULL,
 			     &rdev->vce.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-11 20:51   ` Alex Deucher
@ 2015-03-11 21:14     ` Alex Deucher
  2015-03-12  9:02       ` Michel Dänzer
  0 siblings, 1 reply; 23+ messages in thread
From: Alex Deucher @ 2015-03-11 21:14 UTC (permalink / raw)
  To: Christian König
  Cc: Maling list - DRI developers, Oded Gabbay, Alex Deucher, for 3.8

[-- Attachment #1: Type: text/plain, Size: 10272 bytes --]

On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>
>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>> uses the ttm bo size to determine when to select top down
>>> allocation but since the ttm bo is not initialized yet the
>>> check is always false.
>>>
>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>> Cc: stable@vger.kernel.org
>>
>>
>> And I was already wondering why the heck the BOs always made this ping/pong
>> in memory after creation.
>>
>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>
> And fixing that promptly broke VCE due to vram location requirements.
> Updated patch attached.  Thoughts?

And one more take to make things a bit more explicit for static kernel
driver allocations.

Alex

>
> Alex
>
>>
>> Regards,
>> Christian.
>>
>>
>>> ---
>>>   drivers/gpu/drm/radeon/radeon.h        |  3 ++-
>>>   drivers/gpu/drm/radeon/radeon_gem.c    |  2 +-
>>>   drivers/gpu/drm/radeon/radeon_mn.c     |  2 +-
>>>   drivers/gpu/drm/radeon/radeon_object.c | 17 ++++++++++-------
>>>   drivers/gpu/drm/radeon/radeon_ttm.c    | 12 ++++++++----
>>>   5 files changed, 22 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>> b/drivers/gpu/drm/radeon/radeon.h
>>> index 5587603..726e89f 100644
>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>> @@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device
>>> *rdev);
>>>   extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void
>>> *data);
>>>   extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev,
>>> int enable);
>>>   extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int
>>> enable);
>>> -extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32
>>> domain);
>>> +extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32
>>> domain,
>>> +                                            u64 size);
>>>   extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
>>>   extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>>                                      uint32_t flags);
>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>> index ac3c131..d613d0c 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>> @@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev,
>>> void *data,
>>>                         goto release_object;
>>>                 }
>>>   -             radeon_ttm_placement_from_domain(bo,
>>> RADEON_GEM_DOMAIN_GTT);
>>> +               radeon_ttm_placement_from_domain(bo,
>>> RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size);
>>>                 r = ttm_bo_validate(&bo->tbo, &bo->placement, true,
>>> false);
>>>                 radeon_bo_unreserve(bo);
>>>                 up_read(&current->mm->mmap_sem);
>>> diff --git a/drivers/gpu/drm/radeon/radeon_mn.c
>>> b/drivers/gpu/drm/radeon/radeon_mn.c
>>> index a69bd44..e51f09b 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_mn.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_mn.c
>>> @@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct
>>> mmu_notifier *mn,
>>>                                 DRM_ERROR("(%d) failed to wait for user
>>> bo\n", r);
>>>                 }
>>>   -             radeon_ttm_placement_from_domain(bo,
>>> RADEON_GEM_DOMAIN_CPU);
>>> +               radeon_ttm_placement_from_domain(bo,
>>> RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size);
>>>                 r = ttm_bo_validate(&bo->tbo, &bo->placement, false,
>>> false);
>>>                 if (r)
>>>                         DRM_ERROR("(%d) failed to validate user bo\n", r);
>>> diff --git a/drivers/gpu/drm/radeon/radeon_object.c
>>> b/drivers/gpu/drm/radeon/radeon_object.c
>>> index 43e0994..07f8fd5 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_object.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_object.c
>>> @@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object
>>> *bo)
>>>         return false;
>>>   }
>>>   -void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32
>>> domain)
>>> +void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
>>> +                                     u64 size)
>>>   {
>>>         u32 c = 0, i;
>>>   @@ -179,7 +180,7 @@ void radeon_ttm_placement_from_domain(struct
>>> radeon_bo *rbo, u32 domain)
>>>          * improve fragmentation quality.
>>>          * 512kb was measured as the most optimal number.
>>>          */
>>> -       if (rbo->tbo.mem.size > 512 * 1024) {
>>> +       if (size > 512 * 1024) {
>>>                 for (i = 0; i < c; i++) {
>>>                         rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
>>>                 }
>>> @@ -252,7 +253,7 @@ int radeon_bo_create(struct radeon_device *rdev,
>>>         bo->flags &= ~RADEON_GEM_GTT_WC;
>>>   #endif
>>>   -     radeon_ttm_placement_from_domain(bo, domain);
>>> +       radeon_ttm_placement_from_domain(bo, domain, size);
>>>         /* Kernel allocation are uninterruptible */
>>>         down_read(&rdev->pm.mclk_lock);
>>>         r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
>>> @@ -350,7 +351,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32
>>> domain, u64 max_offset,
>>>                 return 0;
>>>         }
>>> -       radeon_ttm_placement_from_domain(bo, domain);
>>> +       radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
>>>         for (i = 0; i < bo->placement.num_placement; i++) {
>>>                 /* force to pin into visible video ram */
>>>                 if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
>>> @@ -557,7 +558,7 @@ int radeon_bo_list_validate(struct radeon_device
>>> *rdev,
>>>                         }
>>>                 retry:
>>> -                       radeon_ttm_placement_from_domain(bo, domain);
>>> +                       radeon_ttm_placement_from_domain(bo, domain,
>>> bo->tbo.mem.size);
>>>                         if (ring == R600_RING_TYPE_UVD_INDEX)
>>>                                 radeon_uvd_force_into_uvd_segment(bo,
>>> allowed);
>>>   @@ -800,7 +801,8 @@ int radeon_bo_fault_reserve_notify(struct
>>> ttm_buffer_object *bo)
>>>                 return 0;
>>>         /* hurrah the memory is not visible ! */
>>> -       radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
>>> +       radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
>>> +                                        rbo->tbo.mem.size);
>>>         lpfn =  rdev->mc.visible_vram_size >> PAGE_SHIFT;
>>>         for (i = 0; i < rbo->placement.num_placement; i++) {
>>>                 /* Force into visible VRAM */
>>> @@ -810,7 +812,8 @@ int radeon_bo_fault_reserve_notify(struct
>>> ttm_buffer_object *bo)
>>>         }
>>>         r = ttm_bo_validate(bo, &rbo->placement, false, false);
>>>         if (unlikely(r == -ENOMEM)) {
>>> -               radeon_ttm_placement_from_domain(rbo,
>>> RADEON_GEM_DOMAIN_GTT);
>>> +               radeon_ttm_placement_from_domain(rbo,
>>> RADEON_GEM_DOMAIN_GTT,
>>> +                                                rbo->tbo.mem.size);
>>>                 return ttm_bo_validate(bo, &rbo->placement, false, false);
>>>         } else if (unlikely(r != 0)) {
>>>                 return r;
>>> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c
>>> b/drivers/gpu/drm/radeon/radeon_ttm.c
>>> index d02aa1d..ce8ed2d 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
>>> @@ -197,7 +197,8 @@ static void radeon_evict_flags(struct
>>> ttm_buffer_object *bo,
>>>         switch (bo->mem.mem_type) {
>>>         case TTM_PL_VRAM:
>>>                 if
>>> (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
>>> -                       radeon_ttm_placement_from_domain(rbo,
>>> RADEON_GEM_DOMAIN_CPU);
>>> +                       radeon_ttm_placement_from_domain(rbo,
>>> RADEON_GEM_DOMAIN_CPU,
>>> +
>>> rbo->tbo.mem.size);
>>>                 else if (rbo->rdev->mc.visible_vram_size <
>>> rbo->rdev->mc.real_vram_size &&
>>>                          bo->mem.start < (rbo->rdev->mc.visible_vram_size
>>> >> PAGE_SHIFT)) {
>>>                         unsigned fpfn = rbo->rdev->mc.visible_vram_size >>
>>> PAGE_SHIFT;
>>> @@ -209,7 +210,8 @@ static void radeon_evict_flags(struct
>>> ttm_buffer_object *bo,
>>>                          * BOs to be evicted from VRAM
>>>                          */
>>>                         radeon_ttm_placement_from_domain(rbo,
>>> RADEON_GEM_DOMAIN_VRAM |
>>> -
>>> RADEON_GEM_DOMAIN_GTT);
>>> +
>>> RADEON_GEM_DOMAIN_GTT,
>>> +
>>> rbo->tbo.mem.size);
>>>                         rbo->placement.num_busy_placement = 0;
>>>                         for (i = 0; i < rbo->placement.num_placement; i++)
>>> {
>>>                                 if (rbo->placements[i].flags &
>>> TTM_PL_FLAG_VRAM) {
>>> @@ -222,11 +224,13 @@ static void radeon_evict_flags(struct
>>> ttm_buffer_object *bo,
>>>                                 }
>>>                         }
>>>                 } else
>>> -                       radeon_ttm_placement_from_domain(rbo,
>>> RADEON_GEM_DOMAIN_GTT);
>>> +                       radeon_ttm_placement_from_domain(rbo,
>>> RADEON_GEM_DOMAIN_GTT,
>>> +
>>> rbo->tbo.mem.size);
>>>                 break;
>>>         case TTM_PL_TT:
>>>         default:
>>> -               radeon_ttm_placement_from_domain(rbo,
>>> RADEON_GEM_DOMAIN_CPU);
>>> +               radeon_ttm_placement_from_domain(rbo,
>>> RADEON_GEM_DOMAIN_CPU,
>>> +                                                rbo->tbo.mem.size);
>>>         }
>>>         *placement = rbo->placement;
>>>   }
>>
>>

[-- Attachment #2: 0001-drm-radeon-fix-TOPDOWN-handling-for-bo_create-v3.patch --]
[-- Type: text/x-patch, Size: 12758 bytes --]

From 019f96d56915c4ba02ad4bb25acefbea103a084e Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 11 Mar 2015 11:27:26 -0400
Subject: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

radeon_bo_create() calls radeon_ttm_placement_from_domain()
before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
uses the ttm bo size to determine when to select top down
allocation but since the ttm bo is not initialized yet the
check is always false.

v2: only use topdown for vram if the user has not requested
CPU access explicitly.  Fixes VCE.

v3: explictly set CPU access on kernel allocations where we
expect allocations to be at the start of vram to avoid
fragmentation and extra migration.

Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/evergreen.c     |  6 +++---
 drivers/gpu/drm/radeon/r600.c          |  3 ++-
 drivers/gpu/drm/radeon/radeon.h        |  3 ++-
 drivers/gpu/drm/radeon/radeon_gart.c   |  2 +-
 drivers/gpu/drm/radeon/radeon_gem.c    |  2 +-
 drivers/gpu/drm/radeon/radeon_mn.c     |  2 +-
 drivers/gpu/drm/radeon/radeon_object.c | 30 +++++++++++++++++++++---------
 drivers/gpu/drm/radeon/radeon_ttm.c    | 14 +++++++++-----
 drivers/gpu/drm/radeon/radeon_uvd.c    |  2 +-
 drivers/gpu/drm/radeon/radeon_vce.c    |  2 +-
 10 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 973df06..e765632 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -4026,7 +4026,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 		/* save restore block */
 		if (rdev->rlc.save_restore_obj == NULL) {
 			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+					     RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL,
 					     NULL, &rdev->rlc.save_restore_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
@@ -4105,7 +4105,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 
 		if (rdev->rlc.clear_state_obj == NULL) {
 			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+					     RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL,
 					     NULL, &rdev->rlc.clear_state_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
@@ -4182,7 +4182,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 		if (rdev->rlc.cp_table_obj == NULL) {
 			r = radeon_bo_create(rdev, rdev->rlc.cp_table_size,
 					     PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+					     RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL,
 					     NULL, &rdev->rlc.cp_table_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 2fcad34..9e2f2fa 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1431,7 +1431,8 @@ int r600_vram_scratch_init(struct radeon_device *rdev)
 	if (rdev->vram_scratch.robj == NULL) {
 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE,
 				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-				     0, NULL, NULL, &rdev->vram_scratch.robj);
+				     RADEON_GEM_CPU_ACCESS,
+				     NULL, NULL, &rdev->vram_scratch.robj);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5587603..726e89f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device *rdev);
 extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
 extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
-extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
+extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+					     u64 size);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
 extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 5450fa9..fd1c778 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -128,7 +128,7 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
 	if (rdev->gart.robj == NULL) {
 		r = radeon_bo_create(rdev, rdev->gart.table_size,
 				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-				     0, NULL, NULL, &rdev->gart.robj);
+				     RADEON_GEM_CPU_ACCESS, NULL, NULL, &rdev->gart.robj);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index ac3c131..d613d0c 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 			goto release_object;
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 		radeon_bo_unreserve(bo);
 		up_read(&current->mm->mmap_sem);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index a69bd44..e51f09b 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 				DRM_ERROR("(%d) failed to wait for user bo\n", r);
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 		if (r)
 			DRM_ERROR("(%d) failed to validate user bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 43e0994..eee1f9f 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
+void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+				      u64 size)
 {
 	u32 c = 0, i;
 
@@ -179,9 +180,18 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 	 * improve fragmentation quality.
 	 * 512kb was measured as the most optimal number.
 	 */
-	if (rbo->tbo.mem.size > 512 * 1024) {
-		for (i = 0; i < c; i++) {
-			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
+	if (size > 512 * 1024) {
+		if (domain & RADEON_GEM_DOMAIN_VRAM) {
+			if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) ||
+			    !(rbo->flags & RADEON_GEM_CPU_ACCESS)) {
+				for (i = 0; i < c; i++) {
+					rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
+				}
+			}
+		} else {
+			for (i = 0; i < c; i++) {
+				rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
+			}
 		}
 	}
 }
@@ -252,7 +262,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 	bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, size);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
@@ -350,7 +360,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 
 		return 0;
 	}
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
 		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
@@ -557,7 +567,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 			}
 
 		retry:
-			radeon_ttm_placement_from_domain(bo, domain);
+			radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
 			if (ring == R600_RING_TYPE_UVD_INDEX)
 				radeon_uvd_force_into_uvd_segment(bo, allowed);
 
@@ -800,7 +810,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return 0;
 
 	/* hurrah the memory is not visible ! */
-	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
+	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
+					 rbo->tbo.mem.size);
 	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < rbo->placement.num_placement; i++) {
 		/* Force into visible VRAM */
@@ -810,7 +821,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 	r = ttm_bo_validate(bo, &rbo->placement, false, false);
 	if (unlikely(r == -ENOMEM)) {
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+						 rbo->tbo.mem.size);
 		return ttm_bo_validate(bo, &rbo->placement, false, false);
 	} else if (unlikely(r != 0)) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index d02aa1d..8fbf784 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+							 rbo->tbo.mem.size);
 		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
 			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
 			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
@@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			 * BOs to be evicted from VRAM
 			 */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
-							 RADEON_GEM_DOMAIN_GTT);
+							 RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size);
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
@@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 				}
 			}
 		} else
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size);
 		break;
 	case TTM_PL_TT:
 	default:
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+						 rbo->tbo.mem.size);
 	}
 	*placement = rbo->placement;
 }
@@ -888,7 +892,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 
 	r = radeon_bo_create(rdev, 256 * 1024, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL,
 			     NULL, &rdev->stollen_vga_memory);
 	if (r) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index c10b2ae..52b2682 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -141,7 +141,7 @@ int radeon_uvd_init(struct radeon_device *rdev)
 		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
 		  RADEON_GPU_PAGE_SIZE;
 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL,
 			     NULL, &rdev->uvd.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 976fe43..3d75502 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -126,7 +126,7 @@ int radeon_vce_init(struct radeon_device *rdev)
 	size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
 	       RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL, NULL,
 			     &rdev->vce.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-11 21:14     ` Alex Deucher
@ 2015-03-12  9:02       ` Michel Dänzer
  2015-03-12  9:23         ` Christian König
                           ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Michel Dänzer @ 2015-03-12  9:02 UTC (permalink / raw)
  To: Alex Deucher, Christian König
  Cc: Lauri Kasanen, Maling list - DRI developers

On 12.03.2015 06:14, Alex Deucher wrote:
> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>
>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>> uses the ttm bo size to determine when to select top down
>>>> allocation but since the ttm bo is not initialized yet the
>>>> check is always false.
>>>>
>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>> Cc: stable@vger.kernel.org
>>>
>>>
>>> And I was already wondering why the heck the BOs always made this ping/pong
>>> in memory after creation.
>>>
>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>
>> And fixing that promptly broke VCE due to vram location requirements.
>> Updated patch attached.  Thoughts?
> 
> And one more take to make things a bit more explicit for static kernel
> driver allocations.

struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
problem is really that some BOs are expected to be within a certain
range from the beginning of VRAM, but lpfn isn't set accordingly. It
would be better to fix that by setting lpfn directly than indirectly via
RADEON_GEM_CPU_ACCESS.


Anyway, since this isn't the first bug which prevents
TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
wonder if its performance impact should be re-evaluated. Lauri?


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-12  9:02       ` Michel Dänzer
@ 2015-03-12  9:23         ` Christian König
  2015-03-12  9:30           ` Oded Gabbay
  2015-03-12 13:09           ` Alex Deucher
  2015-03-12 10:21         ` Lauri Kasanen
  2015-03-13  9:11         ` Daniel Vetter
  2 siblings, 2 replies; 23+ messages in thread
From: Christian König @ 2015-03-12  9:23 UTC (permalink / raw)
  To: Michel Dänzer, Alex Deucher
  Cc: Lauri Kasanen, Maling list - DRI developers

On 12.03.2015 10:02, Michel Dänzer wrote:
> On 12.03.2015 06:14, Alex Deucher wrote:
>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>> uses the ttm bo size to determine when to select top down
>>>>> allocation but since the ttm bo is not initialized yet the
>>>>> check is always false.
>>>>>
>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>> Cc: stable@vger.kernel.org
>>>>
>>>> And I was already wondering why the heck the BOs always made this ping/pong
>>>> in memory after creation.
>>>>
>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>> And fixing that promptly broke VCE due to vram location requirements.
>>> Updated patch attached.  Thoughts?
>> And one more take to make things a bit more explicit for static kernel
>> driver allocations.
> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
> problem is really that some BOs are expected to be within a certain
> range from the beginning of VRAM, but lpfn isn't set accordingly. It
> would be better to fix that by setting lpfn directly than indirectly via
> RADEON_GEM_CPU_ACCESS.

Yeah, agree. We should probably try to find the root cause of this instead.

As far as I know VCE has no documented limitation on where buffers are 
placed (unlike UVD). So this is a bit strange. Are you sure that it 
isn't UVD which breaks here?

Regards,
Christian.

>
>
> Anyway, since this isn't the first bug which prevents
> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
> wonder if its performance impact should be re-evaluated. Lauri?
>
>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-12  9:23         ` Christian König
@ 2015-03-12  9:30           ` Oded Gabbay
  2015-03-12  9:36             ` Christian König
  2015-03-12 13:09           ` Alex Deucher
  1 sibling, 1 reply; 23+ messages in thread
From: Oded Gabbay @ 2015-03-12  9:30 UTC (permalink / raw)
  To: Christian König, Michel Dänzer, Alex Deucher
  Cc: Lauri Kasanen, Maling list - DRI developers



On 03/12/2015 11:23 AM, Christian König wrote:
> On 12.03.2015 10:02, Michel Dänzer wrote:
>> On 12.03.2015 06:14, Alex Deucher wrote:
>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>> uses the ttm bo size to determine when to select top down
>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>> check is always false.
>>>>>>
>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>> Cc: stable@vger.kernel.org
>>>>>
>>>>> And I was already wondering why the heck the BOs always made this
>>>>> ping/pong
>>>>> in memory after creation.
>>>>>
>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>> Updated patch attached.  Thoughts?
>>> And one more take to make things a bit more explicit for static kernel
>>> driver allocations.
>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>> problem is really that some BOs are expected to be within a certain
>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>> would be better to fix that by setting lpfn directly than indirectly via
>> RADEON_GEM_CPU_ACCESS.
> 
> Yeah, agree. We should probably try to find the root cause of this instead.
> 
> As far as I know VCE has no documented limitation on where buffers are
> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
> UVD which breaks here?
> 
> Regards,
> Christian.
I noticed this bug when trying to allocate very large BOs (385MB) from the
other side of VRAM.
However, even with this fix, the following scenario still fails:
1. Allocate BO of 385MB on VRAM with no CPU access.
2. Map it to VRAM
3. Allocate second BO of 385MB on VRAM with no CPU access

The last step fails as the ttm can't find a place to put this second BO. I
suspect the Top-Down thing isn't being respected at all by the
creation/pinning of BO.

I think that what happens is that the first BO is pinned right after the
first 256 MB, instead of pinning it at the end of the VRAM.
Then, when trying to create the second BO, there is no room for it, as there
is only 256MB before the first BO, and 383MB after the first BO.

I need to debug it further, but will probably only do that on Sunday.

	Oded

> 
>>
>>
>> Anyway, since this isn't the first bug which prevents
>> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
>> wonder if its performance impact should be re-evaluated. Lauri?
>>
>>
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-12  9:30           ` Oded Gabbay
@ 2015-03-12  9:36             ` Christian König
  2015-03-15 15:07               ` Oded Gabbay
  0 siblings, 1 reply; 23+ messages in thread
From: Christian König @ 2015-03-12  9:36 UTC (permalink / raw)
  To: Oded Gabbay, Michel Dänzer, Alex Deucher
  Cc: Lauri Kasanen, Maling list - DRI developers

On 12.03.2015 10:30, Oded Gabbay wrote:
>
> On 03/12/2015 11:23 AM, Christian König wrote:
>> On 12.03.2015 10:02, Michel Dänzer wrote:
>>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>>> <deathsimple@vodafone.de> wrote:
>>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>>> uses the ttm bo size to determine when to select top down
>>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>>> check is always false.
>>>>>>>
>>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>>> Cc: stable@vger.kernel.org
>>>>>> And I was already wondering why the heck the BOs always made this
>>>>>> ping/pong
>>>>>> in memory after creation.
>>>>>>
>>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>>> Updated patch attached.  Thoughts?
>>>> And one more take to make things a bit more explicit for static kernel
>>>> driver allocations.
>>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>>> problem is really that some BOs are expected to be within a certain
>>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>>> would be better to fix that by setting lpfn directly than indirectly via
>>> RADEON_GEM_CPU_ACCESS.
>> Yeah, agree. We should probably try to find the root cause of this instead.
>>
>> As far as I know VCE has no documented limitation on where buffers are
>> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
>> UVD which breaks here?
>>
>> Regards,
>> Christian.
> I noticed this bug when trying to allocate very large BOs (385MB) from the
> other side of VRAM.
> However, even with this fix, the following scenario still fails:
> 1. Allocate BO of 385MB on VRAM with no CPU access.
> 2. Map it to VRAM
> 3. Allocate second BO of 385MB on VRAM with no CPU access
>
> The last step fails as the ttm can't find a place to put this second BO. I
> suspect the Top-Down thing isn't being respected at all by the
> creation/pinning of BO.
>
> I think that what happens is that the first BO is pinned right after the
> first 256 MB, instead of pinning it at the end of the VRAM.
> Then, when trying to create the second BO, there is no room for it, as there
> is only 256MB before the first BO, and 383MB after the first BO.
>
> I need to debug it further, but will probably only do that on Sunday.

What is the content of radeon_vram_mm (in debugfs) after you allocated 
the first BO?

The placement should be visible there pretty fine.

Regards,
Christian.

>
> 	Oded
>
>>>
>>> Anyway, since this isn't the first bug which prevents
>>> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
>>> wonder if its performance impact should be re-evaluated. Lauri?
>>>
>>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-12  9:02       ` Michel Dänzer
  2015-03-12  9:23         ` Christian König
@ 2015-03-12 10:21         ` Lauri Kasanen
  2015-03-13  9:11         ` Daniel Vetter
  2 siblings, 0 replies; 23+ messages in thread
From: Lauri Kasanen @ 2015-03-12 10:21 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: Lauri Kasanen, Maling list - DRI developers

On Thu, 12 Mar 2015 18:02:56 +0900
Michel Dänzer <michel@daenzer.net> wrote:

> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
> problem is really that some BOs are expected to be within a certain
> range from the beginning of VRAM, but lpfn isn't set accordingly. It
> would be better to fix that by setting lpfn directly than indirectly via
> RADEON_GEM_CPU_ACCESS.
> 
> 
> Anyway, since this isn't the first bug which prevents
> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
> wonder if its performance impact should be re-evaluated. Lauri?

I'm sorry, I'm not in a place where I could spend the time to redo the
benchmarks.

If it causes too many issues it is of course easy to disable, but so
far the issues shown have not been caused by it - it merely exposed
wrong settings/bugs elsewhere. From this POV I would say it's good to
have it enabled, to stress the various parts.

This doesn't warm the heart of the guy with flicker after suspend, so
perhaps a kernel module parameter to disable it (defaulting to enabled)?

- Lauri
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-12  9:23         ` Christian König
  2015-03-12  9:30           ` Oded Gabbay
@ 2015-03-12 13:09           ` Alex Deucher
  2015-03-13  2:55             ` Michel Dänzer
  1 sibling, 1 reply; 23+ messages in thread
From: Alex Deucher @ 2015-03-12 13:09 UTC (permalink / raw)
  To: Christian König
  Cc: Michel Dänzer, Lauri Kasanen, Maling list - DRI developers

On Thu, Mar 12, 2015 at 5:23 AM, Christian König
<deathsimple@vodafone.de> wrote:
> On 12.03.2015 10:02, Michel Dänzer wrote:
>>
>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>
>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com>
>>> wrote:
>>>>
>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>>
>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>
>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>> uses the ttm bo size to determine when to select top down
>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>> check is always false.
>>>>>>
>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>> Cc: stable@vger.kernel.org
>>>>>
>>>>>
>>>>> And I was already wondering why the heck the BOs always made this
>>>>> ping/pong
>>>>> in memory after creation.
>>>>>
>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>
>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>> Updated patch attached.  Thoughts?
>>>
>>> And one more take to make things a bit more explicit for static kernel
>>> driver allocations.
>>
>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>> problem is really that some BOs are expected to be within a certain
>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>> would be better to fix that by setting lpfn directly than indirectly via
>> RADEON_GEM_CPU_ACCESS.
>
>
> Yeah, agree. We should probably try to find the root cause of this instead.
>
> As far as I know VCE has no documented limitation on where buffers are
> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
> UVD which breaks here?

It's definitely VCE, I don't know why UVD didn't have a problem.  I
considered using pin_restricted to make sure it got pinned in the CPU
visible region, but that had two problems: 1. it would end up getting
migrated when pinned, 2. it would end up at the top of the restricted
region since the top down flag is set which would end up fragmenting
vram.

Alex

>
> Regards,
> Christian.
>
>
>>
>>
>> Anyway, since this isn't the first bug which prevents
>> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
>> wonder if its performance impact should be re-evaluated. Lauri?
>>
>>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-12 13:09           ` Alex Deucher
@ 2015-03-13  2:55             ` Michel Dänzer
  2015-03-16 22:32               ` Alex Deucher
  0 siblings, 1 reply; 23+ messages in thread
From: Michel Dänzer @ 2015-03-13  2:55 UTC (permalink / raw)
  To: Alex Deucher, Christian König; +Cc: Maling list - DRI developers

On 12.03.2015 22:09, Alex Deucher wrote:
> On Thu, Mar 12, 2015 at 5:23 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> On 12.03.2015 10:02, Michel Dänzer wrote:
>>>
>>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>>
>>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com>
>>>> wrote:
>>>>>
>>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>
>>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>>
>>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>>> uses the ttm bo size to determine when to select top down
>>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>>> check is always false.
>>>>>>>
>>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>>> Cc: stable@vger.kernel.org
>>>>>>
>>>>>>
>>>>>> And I was already wondering why the heck the BOs always made this
>>>>>> ping/pong
>>>>>> in memory after creation.
>>>>>>
>>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>
>>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>>> Updated patch attached.  Thoughts?
>>>>
>>>> And one more take to make things a bit more explicit for static kernel
>>>> driver allocations.
>>>
>>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>>> problem is really that some BOs are expected to be within a certain
>>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>>> would be better to fix that by setting lpfn directly than indirectly via
>>> RADEON_GEM_CPU_ACCESS.
>>
>>
>> Yeah, agree. We should probably try to find the root cause of this instead.
>>
>> As far as I know VCE has no documented limitation on where buffers are
>> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
>> UVD which breaks here?
> 
> It's definitely VCE, I don't know why UVD didn't have a problem.  I
> considered using pin_restricted to make sure it got pinned in the CPU
> visible region, but that had two problems: 1. it would end up getting
> migrated when pinned,

Maybe something like radeon_uvd_force_into_uvd_segment() is needed for
VCE as well?


> 2. it would end up at the top of the restricted
> region since the top down flag is set which would end up fragmenting
> vram.

If that's an issue (which outweighs the supposed benefit of
TTM_PL_FLAG_TOPDOWN), then again the proper solution would be not to set
TTM_PL_FLAG_TOPDOWN when rbo->placements[i].lpfn != 0 and smaller than
the whole available region, instead of checking for VRAM and
RADEON_GEM_CPU_ACCESS.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-12  9:02       ` Michel Dänzer
  2015-03-12  9:23         ` Christian König
  2015-03-12 10:21         ` Lauri Kasanen
@ 2015-03-13  9:11         ` Daniel Vetter
  2015-03-13  9:46           ` Michel Dänzer
  2 siblings, 1 reply; 23+ messages in thread
From: Daniel Vetter @ 2015-03-13  9:11 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: Lauri Kasanen, Maling list - DRI developers

On Thu, Mar 12, 2015 at 06:02:56PM +0900, Michel Dänzer wrote:
> On 12.03.2015 06:14, Alex Deucher wrote:
> > On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
> >> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
> >> <deathsimple@vodafone.de> wrote:
> >>> On 11.03.2015 16:44, Alex Deucher wrote:
> >>>>
> >>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
> >>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
> >>>> uses the ttm bo size to determine when to select top down
> >>>> allocation but since the ttm bo is not initialized yet the
> >>>> check is always false.
> >>>>
> >>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
> >>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> >>>> Cc: stable@vger.kernel.org
> >>>
> >>>
> >>> And I was already wondering why the heck the BOs always made this ping/pong
> >>> in memory after creation.
> >>>
> >>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
> >>
> >> And fixing that promptly broke VCE due to vram location requirements.
> >> Updated patch attached.  Thoughts?
> > 
> > And one more take to make things a bit more explicit for static kernel
> > driver allocations.
> 
> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
> problem is really that some BOs are expected to be within a certain
> range from the beginning of VRAM, but lpfn isn't set accordingly. It
> would be better to fix that by setting lpfn directly than indirectly via
> RADEON_GEM_CPU_ACCESS.
> 
> 
> Anyway, since this isn't the first bug which prevents
> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
> wonder if its performance impact should be re-evaluated. Lauri?

Topdown allocation in drm_mm is just a hint/bias really, it won't try too
hard to segregate things. If you depend upon perfect topdown allocation
for correctness then this won't work well. The trouble starts once you've
split your free space up - it's not going to look for the topmost hole
first but still picks just the one on top of the stack.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-13  9:11         ` Daniel Vetter
@ 2015-03-13  9:46           ` Michel Dänzer
  2015-03-13 16:36             ` Daniel Vetter
  0 siblings, 1 reply; 23+ messages in thread
From: Michel Dänzer @ 2015-03-13  9:46 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Lauri Kasanen, Maling list - DRI developers

On 13.03.2015 18:11, Daniel Vetter wrote:
> On Thu, Mar 12, 2015 at 06:02:56PM +0900, Michel Dänzer wrote:
>> On 12.03.2015 06:14, Alex Deucher wrote:
>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>
>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>> uses the ttm bo size to determine when to select top down
>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>> check is always false.
>>>>>>
>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>> Cc: stable@vger.kernel.org
>>>>>
>>>>>
>>>>> And I was already wondering why the heck the BOs always made this ping/pong
>>>>> in memory after creation.
>>>>>
>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>
>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>> Updated patch attached.  Thoughts?
>>>
>>> And one more take to make things a bit more explicit for static kernel
>>> driver allocations.
>>
>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>> problem is really that some BOs are expected to be within a certain
>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>> would be better to fix that by setting lpfn directly than indirectly via
>> RADEON_GEM_CPU_ACCESS.
>>
>>
>> Anyway, since this isn't the first bug which prevents
>> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
>> wonder if its performance impact should be re-evaluated. Lauri?
> 
> Topdown allocation in drm_mm is just a hint/bias really, it won't try too
> hard to segregate things. If you depend upon perfect topdown allocation
> for correctness then this won't work well. The trouble starts once you've
> split your free space up - it's not going to look for the topmost hole
> first but still picks just the one on top of the stack.

TTM_PL_FLAG_TOPDOWN sets DRM_MM_SEARCH_BELOW as well as
DRM_MM_CREATE_TOP. So it should traverse the list of holes in reverse
order, right?


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-13  9:46           ` Michel Dänzer
@ 2015-03-13 16:36             ` Daniel Vetter
  2015-03-13 17:57               ` Alex Deucher
  0 siblings, 1 reply; 23+ messages in thread
From: Daniel Vetter @ 2015-03-13 16:36 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: Lauri Kasanen, Maling list - DRI developers

On Fri, Mar 13, 2015 at 06:46:33PM +0900, Michel Dänzer wrote:
> On 13.03.2015 18:11, Daniel Vetter wrote:
> > On Thu, Mar 12, 2015 at 06:02:56PM +0900, Michel Dänzer wrote:
> >> On 12.03.2015 06:14, Alex Deucher wrote:
> >>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
> >>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
> >>>> <deathsimple@vodafone.de> wrote:
> >>>>> On 11.03.2015 16:44, Alex Deucher wrote:
> >>>>>>
> >>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
> >>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
> >>>>>> uses the ttm bo size to determine when to select top down
> >>>>>> allocation but since the ttm bo is not initialized yet the
> >>>>>> check is always false.
> >>>>>>
> >>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
> >>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> >>>>>> Cc: stable@vger.kernel.org
> >>>>>
> >>>>>
> >>>>> And I was already wondering why the heck the BOs always made this ping/pong
> >>>>> in memory after creation.
> >>>>>
> >>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
> >>>>
> >>>> And fixing that promptly broke VCE due to vram location requirements.
> >>>> Updated patch attached.  Thoughts?
> >>>
> >>> And one more take to make things a bit more explicit for static kernel
> >>> driver allocations.
> >>
> >> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
> >> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
> >> problem is really that some BOs are expected to be within a certain
> >> range from the beginning of VRAM, but lpfn isn't set accordingly. It
> >> would be better to fix that by setting lpfn directly than indirectly via
> >> RADEON_GEM_CPU_ACCESS.
> >>
> >>
> >> Anyway, since this isn't the first bug which prevents
> >> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
> >> wonder if its performance impact should be re-evaluated. Lauri?
> > 
> > Topdown allocation in drm_mm is just a hint/bias really, it won't try too
> > hard to segregate things. If you depend upon perfect topdown allocation
> > for correctness then this won't work well. The trouble starts once you've
> > split your free space up - it's not going to look for the topmost hole
> > first but still picks just the one on top of the stack.
> 
> TTM_PL_FLAG_TOPDOWN sets DRM_MM_SEARCH_BELOW as well as
> DRM_MM_CREATE_TOP. So it should traverse the list of holes in reverse
> order, right?

Sure that additional segregation helps a bit more, but in the end if you
split things badly and are a bit unlucky then the buffer can end up pretty
much anywhere. Just wanted to mention that in case someone gets confused
when the buffers end up in unexpected places.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-13 16:36             ` Daniel Vetter
@ 2015-03-13 17:57               ` Alex Deucher
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Deucher @ 2015-03-13 17:57 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Michel Dänzer, Lauri Kasanen, Maling list - DRI developers

On Fri, Mar 13, 2015 at 12:36 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Fri, Mar 13, 2015 at 06:46:33PM +0900, Michel Dänzer wrote:
>> On 13.03.2015 18:11, Daniel Vetter wrote:
>> > On Thu, Mar 12, 2015 at 06:02:56PM +0900, Michel Dänzer wrote:
>> >> On 12.03.2015 06:14, Alex Deucher wrote:
>> >>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>> >>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>> >>>> <deathsimple@vodafone.de> wrote:
>> >>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>> >>>>>>
>> >>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>> >>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>> >>>>>> uses the ttm bo size to determine when to select top down
>> >>>>>> allocation but since the ttm bo is not initialized yet the
>> >>>>>> check is always false.
>> >>>>>>
>> >>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>> >>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>> >>>>>> Cc: stable@vger.kernel.org
>> >>>>>
>> >>>>>
>> >>>>> And I was already wondering why the heck the BOs always made this ping/pong
>> >>>>> in memory after creation.
>> >>>>>
>> >>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>> >>>>
>> >>>> And fixing that promptly broke VCE due to vram location requirements.
>> >>>> Updated patch attached.  Thoughts?
>> >>>
>> >>> And one more take to make things a bit more explicit for static kernel
>> >>> driver allocations.
>> >>
>> >> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>> >> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>> >> problem is really that some BOs are expected to be within a certain
>> >> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>> >> would be better to fix that by setting lpfn directly than indirectly via
>> >> RADEON_GEM_CPU_ACCESS.
>> >>
>> >>
>> >> Anyway, since this isn't the first bug which prevents
>> >> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
>> >> wonder if its performance impact should be re-evaluated. Lauri?
>> >
>> > Topdown allocation in drm_mm is just a hint/bias really, it won't try too
>> > hard to segregate things. If you depend upon perfect topdown allocation
>> > for correctness then this won't work well. The trouble starts once you've
>> > split your free space up - it's not going to look for the topmost hole
>> > first but still picks just the one on top of the stack.
>>
>> TTM_PL_FLAG_TOPDOWN sets DRM_MM_SEARCH_BELOW as well as
>> DRM_MM_CREATE_TOP. So it should traverse the list of holes in reverse
>> order, right?
>
> Sure that additional segregation helps a bit more, but in the end if you
> split things badly and are a bit unlucky then the buffer can end up pretty
> much anywhere. Just wanted to mention that in case someone gets confused
> when the buffers end up in unexpected places.

There's no explicit requirement that they have to be at the top or
bottom per se, it's just the the buffers in question have a specific
restricted location requirement and they are set up at driver init
time and not moved for the life of the driver so I'd rather not put
them somewhere too sub-optimal.

Alex

> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-12  9:36             ` Christian König
@ 2015-03-15 15:07               ` Oded Gabbay
  0 siblings, 0 replies; 23+ messages in thread
From: Oded Gabbay @ 2015-03-15 15:07 UTC (permalink / raw)
  To: Christian König, Michel Dänzer, Alex Deucher, Bridgman,
	John, Thangirala, Hari, Wicaksono, Besar
  Cc: Lauri Kasanen, Maling list - DRI developers



On 03/12/2015 11:36 AM, Christian König wrote:
> On 12.03.2015 10:30, Oded Gabbay wrote:
>>
>> On 03/12/2015 11:23 AM, Christian König wrote:
>>> On 12.03.2015 10:02, Michel Dänzer wrote:
>>>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>>>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>>>> uses the ttm bo size to determine when to select top down
>>>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>>>> check is always false.
>>>>>>>>
>>>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>> And I was already wondering why the heck the BOs always made this
>>>>>>> ping/pong
>>>>>>> in memory after creation.
>>>>>>>
>>>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>>>> Updated patch attached.  Thoughts?
>>>>> And one more take to make things a bit more explicit for static kernel
>>>>> driver allocations.
>>>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>>>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>>>> problem is really that some BOs are expected to be within a certain
>>>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>>>> would be better to fix that by setting lpfn directly than indirectly via
>>>> RADEON_GEM_CPU_ACCESS.
>>> Yeah, agree. We should probably try to find the root cause of this instead.
>>>
>>> As far as I know VCE has no documented limitation on where buffers are
>>> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
>>> UVD which breaks here?
>>>
>>> Regards,
>>> Christian.
>> I noticed this bug when trying to allocate very large BOs (385MB) from the
>> other side of VRAM.
>> However, even with this fix, the following scenario still fails:
>> 1. Allocate BO of 385MB on VRAM with no CPU access.
>> 2. Map it to VRAM
>> 3. Allocate second BO of 385MB on VRAM with no CPU access
>>
>> The last step fails as the ttm can't find a place to put this second BO. I
>> suspect the Top-Down thing isn't being respected at all by the
>> creation/pinning of BO.
>>
>> I think that what happens is that the first BO is pinned right after the
>> first 256 MB, instead of pinning it at the end of the VRAM.
>> Then, when trying to create the second BO, there is no room for it, as there
>> is only 256MB before the first BO, and 383MB after the first BO.
>>
>> I need to debug it further, but will probably only do that on Sunday.
>
> What is the content of radeon_vram_mm (in debugfs) after you allocated the first
> BO?
>
> The placement should be visible there pretty fine.
>
> Regards,
> Christian.
>
Here are the contents before the allocation:
root@odedg-test:/sys/kernel/debug/dri/0# cat radeon_vram_mm
0x00000000-0x00000040: 0x00000040: used
0x00000040-0x00000041: 0x00000001: used
0x00000041-0x00000042: 0x00000001: used
0x00000042-0x00000043: 0x00000001: used
0x00000043-0x00000044: 0x00000001: used
0x00000044-0x0000eab4: 0x0000ea70: free
0x0000eab4-0x0000edb4: 0x00000300: used
0x0000edb4-0x0000f3b4: 0x00000600: free
0x0000f3b4-0x0000f6b4: 0x00000300: used
0x0000f6b4-0x0000f8b4: 0x00000200: used
0x0000f8b4-0x0000fdc8: 0x00000514: used
0x0000fdc8-0x00010000: 0x00000238: used
0x00010000-0x00040000: 0x00030000: free
total: 262144, used 3984 free 258160

And here they are after the allocation of 385MB BO (not pinned yet):

root@odedg-test:/sys/kernel/debug/dri/0# cat radeon_vram_mm
0x00000000-0x00000040: 0x00000040: used
0x00000040-0x00000041: 0x00000001: used
0x00000041-0x00000042: 0x00000001: used
0x00000042-0x00000043: 0x00000001: used
0x00000043-0x00000044: 0x00000001: used
0x00000044-0x0000eab4: 0x0000ea70: free
0x0000eab4-0x0000edb4: 0x00000300: used
0x0000edb4-0x0000edb8: 0x00000004: free
0x0000edb8-0x0000edb9: 0x00000001: used
0x0000edb9-0x0000edc1: 0x00000008: used
0x0000edc1-0x0000edc9: 0x00000008: used
0x0000edc9-0x0000edd1: 0x00000008: used
0x0000edd1-0x0000edd9: 0x00000008: used
0x0000edd9-0x0000ede1: 0x00000008: used
0x0000ede1-0x0000ede9: 0x00000008: used
0x0000ede9-0x0000edf1: 0x00000008: used
0x0000edf1-0x0000edf9: 0x00000008: used
0x0000edf9-0x0000ee01: 0x00000008: used
0x0000ee01-0x0000ee09: 0x00000008: used
0x0000ee09-0x0000ee11: 0x00000008: used
0x0000ee11-0x0000ee19: 0x00000008: used
0x0000ee19-0x0000ee21: 0x00000008: used
0x0000ee21-0x0000ee29: 0x00000008: used
0x0000ee29-0x0000ee31: 0x00000008: used
0x0000ee31-0x0000ee39: 0x00000008: used
0x0000ee39-0x0000ee41: 0x00000008: used
0x0000ee41-0x0000ee49: 0x00000008: used
0x0000ee49-0x0000ee51: 0x00000008: used
0x0000ee51-0x0000ee59: 0x00000008: used
0x0000ee59-0x0000ee61: 0x00000008: used
0x0000ee61-0x0000ee69: 0x00000008: used
0x0000ee69-0x0000ee71: 0x00000008: used
0x0000ee71-0x0000ee79: 0x00000008: used
0x0000ee79-0x0000ee81: 0x00000008: used
0x0000ee81-0x0000f3b4: 0x00000533: free
0x0000f3b4-0x0000f6b4: 0x00000300: used
0x0000f6b4-0x0000f8b4: 0x00000200: used
0x0000f8b4-0x0000fdc8: 0x00000514: used
0x0000fdc8-0x00010000: 0x00000238: used
0x00010000-0x00027f00: 0x00017f00: free
0x00027f00-0x00040000: 0x00018100: used
total: 262144, used 102745 free 159399

So apparently ttm take into consideration the TTM_PL_FLAG_TOPDOWN flag.
However, because the rest of the memory is fragmented, I can't allocate more 
than 383MB (0x00010000-0x00027f00)

I assume the contents of 0-0x10000 are taken by the graphics stack and maybe 
some of them are pinned ? Because there is a large free hole at 
0x00000044-0x0000eab4: 0x0000ea70: free

This is an example where dividing the allocation to multiple BOs (of 1-2MB) 
could overcome the fragmentation issue.


	Oded


>>
>>     Oded
>>
>>>>
>>>> Anyway, since this isn't the first bug which prevents
>>>> TTM_PL_FLAG_TOPDOWN from working as intended in the radeon driver, I
>>>> wonder if its performance impact should be re-evaluated. Lauri?
>>>>
>>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-13  2:55             ` Michel Dänzer
@ 2015-03-16 22:32               ` Alex Deucher
  2015-03-17  3:48                 ` Michel Dänzer
  2015-03-17 11:40                 ` Christian König
  0 siblings, 2 replies; 23+ messages in thread
From: Alex Deucher @ 2015-03-16 22:32 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: Maling list - DRI developers

[-- Attachment #1: Type: text/plain, Size: 3296 bytes --]

On Thu, Mar 12, 2015 at 10:55 PM, Michel Dänzer <michel@daenzer.net> wrote:
> On 12.03.2015 22:09, Alex Deucher wrote:
>> On Thu, Mar 12, 2015 at 5:23 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>> On 12.03.2015 10:02, Michel Dänzer wrote:
>>>>
>>>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>>>
>>>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com>
>>>>> wrote:
>>>>>>
>>>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>
>>>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>>>
>>>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>>>> uses the ttm bo size to determine when to select top down
>>>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>>>> check is always false.
>>>>>>>>
>>>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>
>>>>>>>
>>>>>>> And I was already wondering why the heck the BOs always made this
>>>>>>> ping/pong
>>>>>>> in memory after creation.
>>>>>>>
>>>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>
>>>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>>>> Updated patch attached.  Thoughts?
>>>>>
>>>>> And one more take to make things a bit more explicit for static kernel
>>>>> driver allocations.
>>>>
>>>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>>>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>>>> problem is really that some BOs are expected to be within a certain
>>>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>>>> would be better to fix that by setting lpfn directly than indirectly via
>>>> RADEON_GEM_CPU_ACCESS.
>>>
>>>
>>> Yeah, agree. We should probably try to find the root cause of this instead.
>>>
>>> As far as I know VCE has no documented limitation on where buffers are
>>> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
>>> UVD which breaks here?
>>
>> It's definitely VCE, I don't know why UVD didn't have a problem.  I
>> considered using pin_restricted to make sure it got pinned in the CPU
>> visible region, but that had two problems: 1. it would end up getting
>> migrated when pinned,
>
> Maybe something like radeon_uvd_force_into_uvd_segment() is needed for
> VCE as well?
>
>
>> 2. it would end up at the top of the restricted
>> region since the top down flag is set which would end up fragmenting
>> vram.
>
> If that's an issue (which outweighs the supposed benefit of
> TTM_PL_FLAG_TOPDOWN), then again the proper solution would be not to set
> TTM_PL_FLAG_TOPDOWN when rbo->placements[i].lpfn != 0 and smaller than
> the whole available region, instead of checking for VRAM and
> RADEON_GEM_CPU_ACCESS.
>

How about something like the attached patch?  I'm not really sure
about the restrictions for the UVD and VCE fw and stack/heap buffers,
but this seems to work.  It seems like the current UVD/VCE code works
by accident since the check for TOPDOWN fails.

Alex

[-- Attachment #2: 0001-drm-radeon-handle-pfn-restrictions-and-TOPDOWN-in-ra.patch --]
[-- Type: text/x-patch, Size: 26208 bytes --]

From 304963717d0ad761fc860928c8b08df297635668 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 11 Mar 2015 11:27:26 -0400
Subject: [PATCH] drm/radeon: handle pfn restrictions and TOPDOWN in
 radeon_bo_create()

Explicitly set the pfn restrictions on bo_create. Previously
we were relying on bottom up behavior in a number of places.
Make it explicit.

Also pass the size explicitly since radeon_bo_create() calls
radeon_ttm_placement_from_domain() before ttm_bo_init() is called.
radeon_ttm_placement_from_domain() uses the ttm bo size to
determine when to select top down allocation but since the ttm bo
is not initialized yet the check is always false.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/cik.c              |  4 +-
 drivers/gpu/drm/radeon/evergreen.c        |  6 +--
 drivers/gpu/drm/radeon/r600.c             |  4 +-
 drivers/gpu/drm/radeon/radeon.h           |  3 +-
 drivers/gpu/drm/radeon/radeon_benchmark.c |  6 ++-
 drivers/gpu/drm/radeon/radeon_device.c    |  2 +-
 drivers/gpu/drm/radeon/radeon_gart.c      |  1 +
 drivers/gpu/drm/radeon/radeon_gem.c       |  5 +-
 drivers/gpu/drm/radeon/radeon_kfd.c       |  2 +-
 drivers/gpu/drm/radeon/radeon_mn.c        |  5 +-
 drivers/gpu/drm/radeon/radeon_object.c    | 86 ++++++++++++++++---------------
 drivers/gpu/drm/radeon/radeon_object.h    |  3 +-
 drivers/gpu/drm/radeon/radeon_prime.c     |  2 +-
 drivers/gpu/drm/radeon/radeon_ring.c      |  2 +-
 drivers/gpu/drm/radeon/radeon_sa.c        |  2 +-
 drivers/gpu/drm/radeon/radeon_test.c      |  4 +-
 drivers/gpu/drm/radeon/radeon_ttm.c       | 14 +++--
 drivers/gpu/drm/radeon/radeon_uvd.c       |  9 +---
 drivers/gpu/drm/radeon/radeon_vce.c       |  4 +-
 drivers/gpu/drm/radeon/radeon_vm.c        |  4 +-
 20 files changed, 87 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 28faea9..8d70176 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4756,7 +4756,7 @@ static int cik_mec_init(struct radeon_device *rdev)
 		r = radeon_bo_create(rdev,
 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
 				     PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL, NULL,
 				     &rdev->mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
@@ -4922,7 +4922,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
 			r = radeon_bo_create(rdev,
 					     sizeof(struct bonaire_mqd),
 					     PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
+					     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL,
 					     NULL, &rdev->ring[idx].mqd_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f848acf..02b5d2e 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -4054,7 +4054,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 		/* save restore block */
 		if (rdev->rlc.save_restore_obj == NULL) {
 			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+					     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 					     NULL, &rdev->rlc.save_restore_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
@@ -4133,7 +4133,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 
 		if (rdev->rlc.clear_state_obj == NULL) {
 			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+					     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 					     NULL, &rdev->rlc.clear_state_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
@@ -4210,7 +4210,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 		if (rdev->rlc.cp_table_obj == NULL) {
 			r = radeon_bo_create(rdev, rdev->rlc.cp_table_size,
 					     PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+					     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 					     NULL, &rdev->rlc.cp_table_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 8f6d862..7249620 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1457,7 +1457,7 @@ int r600_vram_scratch_init(struct radeon_device *rdev)
 	if (rdev->vram_scratch.robj == NULL) {
 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE,
 				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-				     0, NULL, NULL, &rdev->vram_scratch.robj);
+				     0, 0, 0, NULL, NULL, &rdev->vram_scratch.robj);
 		if (r) {
 			return r;
 		}
@@ -3390,7 +3390,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev)
 	if (rdev->ih.ring_obj == NULL) {
 		r = radeon_bo_create(rdev, rdev->ih.ring_size,
 				     PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0,
 				     NULL, NULL, &rdev->ih.ring_obj);
 		if (r) {
 			DRM_ERROR("radeon: failed to create ih ring buffer (%d).\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 35ab65d..809fc49 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2980,7 +2980,8 @@ extern void radeon_surface_init(struct radeon_device *rdev);
 extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
 extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
-extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
+extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+					     u64 size, unsigned fpfn, unsigned lpfn);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
 extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 87d5fb2..1caa887 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -94,7 +94,8 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
 	int time;
 
 	n = RADEON_BENCHMARK_ITERATIONS;
-	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, 0, NULL, NULL, &sobj);
+	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, 0, 0, 0,
+			     NULL, NULL, &sobj);
 	if (r) {
 		goto out_cleanup;
 	}
@@ -106,7 +107,8 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
 	if (r) {
 		goto out_cleanup;
 	}
-	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, 0, NULL, NULL, &dobj);
+	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, 0, 0, 0,
+			     NULL, NULL, &dobj);
 	if (r) {
 		goto out_cleanup;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b7ca4c5..d1cebfe 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -465,7 +465,7 @@ int radeon_wb_init(struct radeon_device *rdev)
 
 	if (rdev->wb.wb_obj == NULL) {
 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL, NULL,
 				     &rdev->wb.wb_obj);
 		if (r) {
 			dev_warn(rdev->dev, "(%d) create WB bo failed\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 5450fa9..1513c1c 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -128,6 +128,7 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
 	if (rdev->gart.robj == NULL) {
 		r = radeon_bo_create(rdev, rdev->gart.table_size,
 				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
+				     0, (rdev->mc.visible_vram_size >> PAGE_SHIFT),
 				     0, NULL, NULL, &rdev->gart.robj);
 		if (r) {
 			return r;
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index ac3c131..58f4556 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -67,7 +67,7 @@ int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size,
 
 retry:
 	r = radeon_bo_create(rdev, size, alignment, kernel, initial_domain,
-			     flags, NULL, NULL, &robj);
+			     0, 0, flags, NULL, NULL, &robj);
 	if (r) {
 		if (r != -ERESTARTSYS) {
 			if (initial_domain == RADEON_GEM_DOMAIN_VRAM) {
@@ -337,7 +337,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 			goto release_object;
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size,
+						 0, 0);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 		radeon_bo_unreserve(bo);
 		up_read(&current->mm->mmap_sem);
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 061eaa9..e0caa6c 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -212,7 +212,7 @@ static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 		return -ENOMEM;
 
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
-				RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
+			     0, 0, RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
 	if (r) {
 		dev_err(rdev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index a69bd44..7a8b0e5 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -141,14 +141,15 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 				DRM_ERROR("(%d) failed to wait for user bo\n", r);
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size,
+						 0, 0);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 		if (r)
 			DRM_ERROR("(%d) failed to validate user bo\n", r);
 
 		radeon_bo_unreserve(bo);
 	}
-	
+
 	mutex_unlock(&rmn->lock);
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 43e0994..aa2f815 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
+void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+				      u64 size, unsigned fpfn, unsigned lpfn)
 {
 	u32 c = 0, i;
 
@@ -105,14 +106,17 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 		 */
 		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
 		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
-			rbo->placements[c].fpfn =
-				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+			if (fpfn > (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT))
+				rbo->placements[c].fpfn = fpfn;
+			else
+				rbo->placements[c].fpfn =
+					rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
 			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
 						     TTM_PL_FLAG_UNCACHED |
 						     TTM_PL_FLAG_VRAM;
 		}
 
-		rbo->placements[c].fpfn = 0;
+		rbo->placements[c].fpfn = fpfn;
 		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
 					     TTM_PL_FLAG_UNCACHED |
 					     TTM_PL_FLAG_VRAM;
@@ -120,18 +124,17 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 
 	if (domain & RADEON_GEM_DOMAIN_GTT) {
 		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_TT;
-
 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
 			   (rbo->rdev->flags & RADEON_IS_AGP)) {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
 				TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_TT;
 		} else {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
 						     TTM_PL_FLAG_TT;
 		}
@@ -139,24 +142,24 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 
 	if (domain & RADEON_GEM_DOMAIN_CPU) {
 		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_SYSTEM;
 
 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
 		    rbo->rdev->flags & RADEON_IS_AGP) {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
 				TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_SYSTEM;
 		} else {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
 						     TTM_PL_FLAG_SYSTEM;
 		}
 	}
 	if (!c) {
-		rbo->placements[c].fpfn = 0;
+		rbo->placements[c].fpfn = fpfn;
 		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
 					     TTM_PL_FLAG_SYSTEM;
 	}
@@ -171,7 +174,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 			rbo->placements[i].lpfn =
 				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
 		else
-			rbo->placements[i].lpfn = 0;
+			rbo->placements[i].lpfn = lpfn;
 	}
 
 	/*
@@ -179,17 +182,18 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 	 * improve fragmentation quality.
 	 * 512kb was measured as the most optimal number.
 	 */
-	if (rbo->tbo.mem.size > 512 * 1024) {
+	if (size > 512 * 1024) {
 		for (i = 0; i < c; i++) {
-			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
+			if (rbo->placements[i].lpfn == 0)
+				rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
 		}
 	}
 }
 
 int radeon_bo_create(struct radeon_device *rdev,
 		     unsigned long size, int byte_align, bool kernel,
-		     u32 domain, u32 flags, struct sg_table *sg,
-		     struct reservation_object *resv,
+		     u32 domain, unsigned fpfn, unsigned lpfn, u32 flags,
+		     struct sg_table *sg, struct reservation_object *resv,
 		     struct radeon_bo **bo_ptr)
 {
 	struct radeon_bo *bo;
@@ -252,7 +256,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 	bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, size, fpfn, lpfn);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
@@ -328,6 +332,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 			     u64 *gpu_addr)
 {
 	int r, i;
+	unsigned lpfn;
 
 	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
 		return -EPERM;
@@ -350,19 +355,16 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 
 		return 0;
 	}
-	radeon_ttm_placement_from_domain(bo, domain);
-	for (i = 0; i < bo->placement.num_placement; i++) {
-		/* force to pin into visible video ram */
-		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
-		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
-		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
-			bo->placements[i].lpfn =
-				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
-		else
-			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
-
+	/* force to pin into visible video ram */
+	if ((domain == RADEON_GEM_DOMAIN_VRAM) &&
+	    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
+	    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
+		lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+	else
+		lpfn = max_offset >> PAGE_SHIFT;
+	radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size, 0, lpfn);
+	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-	}
 
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
@@ -557,9 +559,13 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 			}
 
 		retry:
-			radeon_ttm_placement_from_domain(bo, domain);
-			if (ring == R600_RING_TYPE_UVD_INDEX)
+			if (ring == R600_RING_TYPE_UVD_INDEX) {
+				radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size,
+								 0, (256 * 1024 * 1024) >> PAGE_SHIFT);
 				radeon_uvd_force_into_uvd_segment(bo, allowed);
+			} else {
+				radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size, 0, 0);
+			}
 
 			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
@@ -784,7 +790,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	struct radeon_device *rdev;
 	struct radeon_bo *rbo;
 	unsigned long offset, size, lpfn;
-	int i, r;
+	int r;
 
 	if (!radeon_ttm_bo_is_radeon_bo(bo))
 		return 0;
@@ -800,17 +806,13 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return 0;
 
 	/* hurrah the memory is not visible ! */
-	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
-	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
-	for (i = 0; i < rbo->placement.num_placement; i++) {
-		/* Force into visible VRAM */
-		if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
-		    (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
-			rbo->placements[i].lpfn = lpfn;
-	}
+	lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
+	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
+					 rbo->tbo.mem.size, 0, lpfn);
 	r = ttm_bo_validate(bo, &rbo->placement, false, false);
 	if (unlikely(r == -ENOMEM)) {
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+						 rbo->tbo.mem.size, 0, 0);
 		return ttm_bo_validate(bo, &rbo->placement, false, false);
 	} else if (unlikely(r != 0)) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index d8d295e..806dc5c 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -124,7 +124,8 @@ extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
 
 extern int radeon_bo_create(struct radeon_device *rdev,
 			    unsigned long size, int byte_align,
-			    bool kernel, u32 domain, u32 flags,
+			    bool kernel, u32 domain,
+			    unsigned fpfn, unsigned lpfn, u32 flags,
 			    struct sg_table *sg,
 			    struct reservation_object *resv,
 			    struct radeon_bo **bo_ptr);
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index f3609c9..d808d8f 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -68,7 +68,7 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev,
 
 	ww_mutex_lock(&resv->lock, NULL);
 	ret = radeon_bo_create(rdev, attach->dmabuf->size, PAGE_SIZE, false,
-			       RADEON_GEM_DOMAIN_GTT, 0, sg, resv, &bo);
+			       RADEON_GEM_DOMAIN_GTT, 0, 0, 0, sg, resv, &bo);
 	ww_mutex_unlock(&resv->lock);
 	if (ret)
 		return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 2456f69..d9f0150 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -383,7 +383,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	/* Allocate ring buffer */
 	if (ring->ring_obj == NULL) {
 		r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL,
 				     NULL, &ring->ring_obj);
 		if (r) {
 			dev_err(rdev->dev, "(%d) ring create failed\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index c507896..3013fb18 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -65,7 +65,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 	}
 
 	r = radeon_bo_create(rdev, size, align, true,
-			     domain, flags, NULL, NULL, &sa_manager->bo);
+			     domain, 0, 0, flags, NULL, NULL, &sa_manager->bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r);
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 79181816..073d6d2 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -67,7 +67,7 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 	}
 
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-			     0, NULL, NULL, &vram_obj);
+			     0, 0, 0, NULL, NULL, &vram_obj);
 	if (r) {
 		DRM_ERROR("Failed to create VRAM object\n");
 		goto out_cleanup;
@@ -87,7 +87,7 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 		struct radeon_fence *fence = NULL;
 
 		r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL, NULL,
 				     gtt_obj + i);
 		if (r) {
 			DRM_ERROR("Failed to create GTT object %d\n", i);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index d02aa1d..befb590 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+							 rbo->tbo.mem.size, 0, 0);
 		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
 			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
 			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
@@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			 * BOs to be evicted from VRAM
 			 */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
-							 RADEON_GEM_DOMAIN_GTT);
+							 RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size, 0, 0);
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
@@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 				}
 			}
 		} else
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size, 0, 0);
 		break;
 	case TTM_PL_TT:
 	default:
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+						 rbo->tbo.mem.size, 0, 0);
 	}
 	*placement = rbo->placement;
 }
@@ -888,7 +892,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 
 	r = radeon_bo_create(rdev, 256 * 1024, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, 0, (256 * 1024) >> PAGE_SHIFT, 0, NULL,
 			     NULL, &rdev->stollen_vga_memory);
 	if (r) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index c10b2ae..6261463 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -141,7 +141,7 @@ int radeon_uvd_init(struct radeon_device *rdev)
 		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
 		  RADEON_GPU_PAGE_SIZE;
 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, 0, (256 * 1024 * 1024) >> PAGE_SHIFT, 0, NULL,
 			     NULL, &rdev->uvd.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
@@ -259,13 +259,6 @@ int radeon_uvd_resume(struct radeon_device *rdev)
 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
 				       uint32_t allowed_domains)
 {
-	int i;
-
-	for (i = 0; i < rbo->placement.num_placement; ++i) {
-		rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
-		rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
-	}
-
 	/* If it must be in VRAM it must be in the first segment as well */
 	if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
 		return;
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 976fe43..c35f0a9 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -126,8 +126,8 @@ int radeon_vce_init(struct radeon_device *rdev)
 	size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
 	       RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
-			     &rdev->vce.vcpu_bo);
+			     RADEON_GEM_DOMAIN_VRAM, 0, (256 * 1024 * 1024) >> PAGE_SHIFT, 0,
+			     NULL, NULL, &rdev->vce.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 2a5a4a9..e845d4c 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -542,7 +542,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 
 		r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
 				     RADEON_GPU_PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_VRAM, 0,
+				     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0,
 				     NULL, NULL, &pt);
 		if (r)
 			return r;
@@ -1186,7 +1186,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	}
 
 	r = radeon_bo_create(rdev, pd_size, align, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 			     NULL, &vm->page_directory);
 	if (r)
 		return r;
-- 
1.8.3.1


[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-16 22:32               ` Alex Deucher
@ 2015-03-17  3:48                 ` Michel Dänzer
  2015-03-17 15:19                   ` Alex Deucher
  2015-03-17 11:40                 ` Christian König
  1 sibling, 1 reply; 23+ messages in thread
From: Michel Dänzer @ 2015-03-17  3:48 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Maling list - DRI developers

On 17.03.2015 07:32, Alex Deucher wrote:
> On Thu, Mar 12, 2015 at 10:55 PM, Michel Dänzer <michel@daenzer.net> wrote:
>> On 12.03.2015 22:09, Alex Deucher wrote:
>>> On Thu, Mar 12, 2015 at 5:23 AM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> On 12.03.2015 10:02, Michel Dänzer wrote:
>>>>>
>>>>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>>>>
>>>>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com>
>>>>>> wrote:
>>>>>>>
>>>>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>>
>>>>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>>>>
>>>>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>>>>> uses the ttm bo size to determine when to select top down
>>>>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>>>>> check is always false.
>>>>>>>>>
>>>>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>>
>>>>>>>>
>>>>>>>> And I was already wondering why the heck the BOs always made this
>>>>>>>> ping/pong
>>>>>>>> in memory after creation.
>>>>>>>>
>>>>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>>
>>>>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>>>>> Updated patch attached.  Thoughts?
>>>>>>
>>>>>> And one more take to make things a bit more explicit for static kernel
>>>>>> driver allocations.
>>>>>
>>>>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>>>>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>>>>> problem is really that some BOs are expected to be within a certain
>>>>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>>>>> would be better to fix that by setting lpfn directly than indirectly via
>>>>> RADEON_GEM_CPU_ACCESS.
>>>>
>>>>
>>>> Yeah, agree. We should probably try to find the root cause of this instead.
>>>>
>>>> As far as I know VCE has no documented limitation on where buffers are
>>>> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
>>>> UVD which breaks here?
>>>
>>> It's definitely VCE, I don't know why UVD didn't have a problem.  I
>>> considered using pin_restricted to make sure it got pinned in the CPU
>>> visible region, but that had two problems: 1. it would end up getting
>>> migrated when pinned,
>>
>> Maybe something like radeon_uvd_force_into_uvd_segment() is needed for
>> VCE as well?
>>
>>
>>> 2. it would end up at the top of the restricted
>>> region since the top down flag is set which would end up fragmenting
>>> vram.
>>
>> If that's an issue (which outweighs the supposed benefit of
>> TTM_PL_FLAG_TOPDOWN), then again the proper solution would be not to set
>> TTM_PL_FLAG_TOPDOWN when rbo->placements[i].lpfn != 0 and smaller than
>> the whole available region, instead of checking for VRAM and
>> RADEON_GEM_CPU_ACCESS.
>>
> 
> How about something like the attached patch?  I'm not really sure
> about the restrictions for the UVD and VCE fw and stack/heap buffers,
> but this seems to work.  It seems like the current UVD/VCE code works
> by accident since the check for TOPDOWN fails.

This patch is getting a bit messy, mixing several logically separate
changes. Can you split it up accordingly? E.g. one patch just adding the
new fpfn and lpfn function parameters but passing 0 for them (so no
functional change), then one or several patches with the corresponding
functional changes, and finally one patch adding the new size parameter
(and thus making TTM_PL_FLAG_TOPDOWN actually used for newly allocated
BOs). I think that would help for reviewing and generally understanding
the changes.


> @@ -105,14 +106,17 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
>  		 */
>  		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
>  		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
> -			rbo->placements[c].fpfn =
> -				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
> +			if (fpfn > (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT))
> +				rbo->placements[c].fpfn = fpfn;
> +			else
> +				rbo->placements[c].fpfn =
> +					rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
>  			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>  						     TTM_PL_FLAG_UNCACHED |
>  						     TTM_PL_FLAG_VRAM;
>  		}

If (fpfn >= rbo->rdev->mc.visible_vram_size), this whole block can be
skipped, since the next placement will be identical.

OTOH, fpfn is currently always 0 anyway, so maybe it's better not to add
that parameter in the first place.


Other than that, looks good to me.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-16 22:32               ` Alex Deucher
  2015-03-17  3:48                 ` Michel Dänzer
@ 2015-03-17 11:40                 ` Christian König
  2015-03-17 13:49                   ` Alex Deucher
  1 sibling, 1 reply; 23+ messages in thread
From: Christian König @ 2015-03-17 11:40 UTC (permalink / raw)
  To: Alex Deucher, Michel Dänzer; +Cc: Maling list - DRI developers

On 16.03.2015 23:32, Alex Deucher wrote:
> On Thu, Mar 12, 2015 at 10:55 PM, Michel Dänzer <michel@daenzer.net> wrote:
>> On 12.03.2015 22:09, Alex Deucher wrote:
>>> On Thu, Mar 12, 2015 at 5:23 AM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> On 12.03.2015 10:02, Michel Dänzer wrote:
>>>>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com>
>>>>>> wrote:
>>>>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>>>>> uses the ttm bo size to determine when to select top down
>>>>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>>>>> check is always false.
>>>>>>>>>
>>>>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>>
>>>>>>>> And I was already wondering why the heck the BOs always made this
>>>>>>>> ping/pong
>>>>>>>> in memory after creation.
>>>>>>>>
>>>>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>>>>> Updated patch attached.  Thoughts?
>>>>>> And one more take to make things a bit more explicit for static kernel
>>>>>> driver allocations.
>>>>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>>>>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>>>>> problem is really that some BOs are expected to be within a certain
>>>>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>>>>> would be better to fix that by setting lpfn directly than indirectly via
>>>>> RADEON_GEM_CPU_ACCESS.
>>>>
>>>> Yeah, agree. We should probably try to find the root cause of this instead.
>>>>
>>>> As far as I know VCE has no documented limitation on where buffers are
>>>> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
>>>> UVD which breaks here?
>>> It's definitely VCE, I don't know why UVD didn't have a problem.  I
>>> considered using pin_restricted to make sure it got pinned in the CPU
>>> visible region, but that had two problems: 1. it would end up getting
>>> migrated when pinned,
>> Maybe something like radeon_uvd_force_into_uvd_segment() is needed for
>> VCE as well?
>>
>>
>>> 2. it would end up at the top of the restricted
>>> region since the top down flag is set which would end up fragmenting
>>> vram.
>> If that's an issue (which outweighs the supposed benefit of
>> TTM_PL_FLAG_TOPDOWN), then again the proper solution would be not to set
>> TTM_PL_FLAG_TOPDOWN when rbo->placements[i].lpfn != 0 and smaller than
>> the whole available region, instead of checking for VRAM and
>> RADEON_GEM_CPU_ACCESS.
>>
> How about something like the attached patch?  I'm not really sure
> about the restrictions for the UVD and VCE fw and stack/heap buffers,
> but this seems to work.  It seems like the current UVD/VCE code works
> by accident since the check for TOPDOWN fails.

Well, I would still like to rather find the bug in the VCE code cause 
there shouldn't be a hardware limitation like this and that really hurt 
us already with UVD.

What system do you use to test this? How much address space do you have 
for VRAM/GART? Is it possible that we get over a 4GB boundary with that?

Regards,
Christian.

>
> Alex

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-17 11:40                 ` Christian König
@ 2015-03-17 13:49                   ` Alex Deucher
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Deucher @ 2015-03-17 13:49 UTC (permalink / raw)
  To: Christian König; +Cc: Michel Dänzer, Maling list - DRI developers

On Tue, Mar 17, 2015 at 7:40 AM, Christian König
<deathsimple@vodafone.de> wrote:
> On 16.03.2015 23:32, Alex Deucher wrote:
>>
>> On Thu, Mar 12, 2015 at 10:55 PM, Michel Dänzer <michel@daenzer.net>
>> wrote:
>>>
>>> On 12.03.2015 22:09, Alex Deucher wrote:
>>>>
>>>> On Thu, Mar 12, 2015 at 5:23 AM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>>
>>>>> On 12.03.2015 10:02, Michel Dänzer wrote:
>>>>>>
>>>>>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>>>>>
>>>>>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com>
>>>>>>> wrote:
>>>>>>>>
>>>>>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>>>
>>>>>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>>>>>
>>>>>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>>>>>> before ttm_bo_init() is called.
>>>>>>>>>> radeon_ttm_placement_from_domain()
>>>>>>>>>> uses the ttm bo size to determine when to select top down
>>>>>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>>>>>> check is always false.
>>>>>>>>>>
>>>>>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> And I was already wondering why the heck the BOs always made this
>>>>>>>>> ping/pong
>>>>>>>>> in memory after creation.
>>>>>>>>>
>>>>>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>>>
>>>>>>>> And fixing that promptly broke VCE due to vram location
>>>>>>>> requirements.
>>>>>>>> Updated patch attached.  Thoughts?
>>>>>>>
>>>>>>> And one more take to make things a bit more explicit for static
>>>>>>> kernel
>>>>>>> driver allocations.
>>>>>>
>>>>>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>>>>>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>>>>>> problem is really that some BOs are expected to be within a certain
>>>>>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>>>>>> would be better to fix that by setting lpfn directly than indirectly
>>>>>> via
>>>>>> RADEON_GEM_CPU_ACCESS.
>>>>>
>>>>>
>>>>> Yeah, agree. We should probably try to find the root cause of this
>>>>> instead.
>>>>>
>>>>> As far as I know VCE has no documented limitation on where buffers are
>>>>> placed (unlike UVD). So this is a bit strange. Are you sure that it
>>>>> isn't
>>>>> UVD which breaks here?
>>>>
>>>> It's definitely VCE, I don't know why UVD didn't have a problem.  I
>>>> considered using pin_restricted to make sure it got pinned in the CPU
>>>> visible region, but that had two problems: 1. it would end up getting
>>>> migrated when pinned,
>>>
>>> Maybe something like radeon_uvd_force_into_uvd_segment() is needed for
>>> VCE as well?
>>>
>>>
>>>> 2. it would end up at the top of the restricted
>>>> region since the top down flag is set which would end up fragmenting
>>>> vram.
>>>
>>> If that's an issue (which outweighs the supposed benefit of
>>> TTM_PL_FLAG_TOPDOWN), then again the proper solution would be not to set
>>> TTM_PL_FLAG_TOPDOWN when rbo->placements[i].lpfn != 0 and smaller than
>>> the whole available region, instead of checking for VRAM and
>>> RADEON_GEM_CPU_ACCESS.
>>>
>> How about something like the attached patch?  I'm not really sure
>> about the restrictions for the UVD and VCE fw and stack/heap buffers,
>> but this seems to work.  It seems like the current UVD/VCE code works
>> by accident since the check for TOPDOWN fails.
>
>
> Well, I would still like to rather find the bug in the VCE code cause there
> shouldn't be a hardware limitation like this and that really hurt us already
> with UVD.
>
> What system do you use to test this? How much address space do you have for
> VRAM/GART? Is it possible that we get over a 4GB boundary with that?


I was testing on a KV with 15GB of ram, 1 GB of vram carve out, 1 GB
of gart.  If I just apply the original patch to fix TOPDOWN, VCE
breaks.  ecpu failures.  If I limit the vce fw/stack/heap buffer to
the first 256 MB, all is well.  UVD seems to work fine either way.

Alex

>
> Regards,
> Christian.
>
>>
>> Alex
>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-17  3:48                 ` Michel Dänzer
@ 2015-03-17 15:19                   ` Alex Deucher
  2015-03-17 15:43                     ` Christian König
  0 siblings, 1 reply; 23+ messages in thread
From: Alex Deucher @ 2015-03-17 15:19 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: Maling list - DRI developers

[-- Attachment #1: Type: text/plain, Size: 5511 bytes --]

On Mon, Mar 16, 2015 at 11:48 PM, Michel Dänzer <michel@daenzer.net> wrote:
> On 17.03.2015 07:32, Alex Deucher wrote:
>> On Thu, Mar 12, 2015 at 10:55 PM, Michel Dänzer <michel@daenzer.net> wrote:
>>> On 12.03.2015 22:09, Alex Deucher wrote:
>>>> On Thu, Mar 12, 2015 at 5:23 AM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>> On 12.03.2015 10:02, Michel Dänzer wrote:
>>>>>>
>>>>>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>>>>>
>>>>>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com>
>>>>>>> wrote:
>>>>>>>>
>>>>>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>>>
>>>>>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>>>>>
>>>>>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>>>>>> uses the ttm bo size to determine when to select top down
>>>>>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>>>>>> check is always false.
>>>>>>>>>>
>>>>>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> And I was already wondering why the heck the BOs always made this
>>>>>>>>> ping/pong
>>>>>>>>> in memory after creation.
>>>>>>>>>
>>>>>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>>>
>>>>>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>>>>>> Updated patch attached.  Thoughts?
>>>>>>>
>>>>>>> And one more take to make things a bit more explicit for static kernel
>>>>>>> driver allocations.
>>>>>>
>>>>>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>>>>>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>>>>>> problem is really that some BOs are expected to be within a certain
>>>>>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>>>>>> would be better to fix that by setting lpfn directly than indirectly via
>>>>>> RADEON_GEM_CPU_ACCESS.
>>>>>
>>>>>
>>>>> Yeah, agree. We should probably try to find the root cause of this instead.
>>>>>
>>>>> As far as I know VCE has no documented limitation on where buffers are
>>>>> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
>>>>> UVD which breaks here?
>>>>
>>>> It's definitely VCE, I don't know why UVD didn't have a problem.  I
>>>> considered using pin_restricted to make sure it got pinned in the CPU
>>>> visible region, but that had two problems: 1. it would end up getting
>>>> migrated when pinned,
>>>
>>> Maybe something like radeon_uvd_force_into_uvd_segment() is needed for
>>> VCE as well?
>>>
>>>
>>>> 2. it would end up at the top of the restricted
>>>> region since the top down flag is set which would end up fragmenting
>>>> vram.
>>>
>>> If that's an issue (which outweighs the supposed benefit of
>>> TTM_PL_FLAG_TOPDOWN), then again the proper solution would be not to set
>>> TTM_PL_FLAG_TOPDOWN when rbo->placements[i].lpfn != 0 and smaller than
>>> the whole available region, instead of checking for VRAM and
>>> RADEON_GEM_CPU_ACCESS.
>>>
>>
>> How about something like the attached patch?  I'm not really sure
>> about the restrictions for the UVD and VCE fw and stack/heap buffers,
>> but this seems to work.  It seems like the current UVD/VCE code works
>> by accident since the check for TOPDOWN fails.
>
> This patch is getting a bit messy, mixing several logically separate
> changes. Can you split it up accordingly? E.g. one patch just adding the
> new fpfn and lpfn function parameters but passing 0 for them (so no
> functional change), then one or several patches with the corresponding
> functional changes, and finally one patch adding the new size parameter
> (and thus making TTM_PL_FLAG_TOPDOWN actually used for newly allocated
> BOs). I think that would help for reviewing and generally understanding
> the changes.
>
>
>> @@ -105,14 +106,17 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
>>                */
>>               if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
>>                   rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
>> -                     rbo->placements[c].fpfn =
>> -                             rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
>> +                     if (fpfn > (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT))
>> +                             rbo->placements[c].fpfn = fpfn;
>> +                     else
>> +                             rbo->placements[c].fpfn =
>> +                                     rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
>>                       rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>>                                                    TTM_PL_FLAG_UNCACHED |
>>                                                    TTM_PL_FLAG_VRAM;
>>               }
>
> If (fpfn >= rbo->rdev->mc.visible_vram_size), this whole block can be
> skipped, since the next placement will be identical.
>
> OTOH, fpfn is currently always 0 anyway, so maybe it's better not to add
> that parameter in the first place.
>
>
> Other than that, looks good to me.

Broken out patches attached.  Also available here:
http://cgit.freedesktop.org/~agd5f/linux/log/?h=topdown-fixes

Alex

[-- Attachment #2: 0005-drm-radeon-optimize-radeon_uvd_force_into_uvd_segmen.patch --]
[-- Type: text/x-patch, Size: 2123 bytes --]

From 2166db91448750a91c0f19fa57f9f6fc6302ad19 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 10:46:08 -0400
Subject: [PATCH 05/10] drm/radeon: optimize
 radeon_uvd_force_into_uvd_segment()

Use the new lpfn parameter to radeon_ttm_placement_from_domain()
to set the restriction upfront rather than adjusting in
radeon_uvd_force_into_uvd_segment().

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_object.c | 8 ++++++--
 drivers/gpu/drm/radeon/radeon_uvd.c    | 7 -------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index af166e5..b1e429a 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -557,9 +557,13 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 			}
 
 		retry:
-			radeon_ttm_placement_from_domain(bo, domain, 0, 0);
-			if (ring == R600_RING_TYPE_UVD_INDEX)
+			if (ring == R600_RING_TYPE_UVD_INDEX) {
+				radeon_ttm_placement_from_domain(bo, domain,
+								 0, (256 * 1024 * 1024) >> PAGE_SHIFT);
 				radeon_uvd_force_into_uvd_segment(bo, allowed);
+			} else {
+				radeon_ttm_placement_from_domain(bo, domain, 0, 0);
+			}
 
 			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index e98e4c6..cea3eb1 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -259,13 +259,6 @@ int radeon_uvd_resume(struct radeon_device *rdev)
 void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
 				       uint32_t allowed_domains)
 {
-	int i;
-
-	for (i = 0; i < rbo->placement.num_placement; ++i) {
-		rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
-		rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
-	}
-
 	/* If it must be in VRAM it must be in the first segment as well */
 	if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
 		return;
-- 
1.8.3.1


[-- Attachment #3: 0010-drm-radeon-fix-TOPDOWN-handling-for-bo_create-v4.patch --]
[-- Type: text/x-patch, Size: 8021 bytes --]

From db0be21d83078b2fe4cc6e9115d0b63a72a7e505 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 11:13:03 -0400
Subject: [PATCH 10/10] drm/radeon: fix TOPDOWN handling for bo_create (v4)

radeon_bo_create() calls radeon_ttm_placement_from_domain()
before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
uses the ttm bo size to determine when to select top down
allocation but since the ttm bo is not initialized yet the
check is always false.

v2: only use topdown for vram if the user has not requested
CPU access explicitly.  Fixes VCE.

v3: explictly set CPU access on kernel allocations where we
expect allocations to be at the start of vram to avoid
fragmentation and extra migration.

v4: drop v2/v3 changes, rebase on top of pfn changes

Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  2 +-
 drivers/gpu/drm/radeon/radeon_gem.c    |  3 ++-
 drivers/gpu/drm/radeon/radeon_mn.c     |  3 ++-
 drivers/gpu/drm/radeon/radeon_object.c | 19 +++++++++++--------
 drivers/gpu/drm/radeon/radeon_ttm.c    | 12 ++++++++----
 5 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7de3e21..809fc49 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2981,7 +2981,7 @@ extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
 extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
-					     unsigned fpfn, unsigned lpfn);
+					     u64 size, unsigned fpfn, unsigned lpfn);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
 extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 0175296..3e785647 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -337,7 +337,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 			goto release_object;
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT,
+						 bo->tbo.mem.size, 0, 0);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 		radeon_bo_unreserve(bo);
 		up_read(&current->mm->mmap_sem);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index 359f1f2..0def222 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -141,7 +141,8 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 				DRM_ERROR("(%d) failed to wait for user bo\n", r);
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, 0, 0);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU,
+						 bo->tbo.mem.size, 0, 0);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 		if (r)
 			DRM_ERROR("(%d) failed to validate user bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 205dfb8..44b594f 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -94,7 +94,7 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
 }
 
 void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
-				      unsigned fpfn, unsigned lpfn)
+				      u64 size, unsigned fpfn, unsigned lpfn)
 {
 	u32 c = 0, i;
 
@@ -181,9 +181,10 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
 	 * improve fragmentation quality.
 	 * 512kb was measured as the most optimal number.
 	 */
-	if (rbo->tbo.mem.size > 512 * 1024) {
+	if (size > 512 * 1024) {
 		for (i = 0; i < c; i++) {
-			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
+			if (rbo->placements[i].lpfn == 0)
+				rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
 		}
 	}
 }
@@ -254,7 +255,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 	bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
-	radeon_ttm_placement_from_domain(bo, domain, fpfn, lpfn);
+	radeon_ttm_placement_from_domain(bo, domain, size, fpfn, lpfn);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
@@ -360,7 +361,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 		lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
 	else
 		lpfn = max_offset >> PAGE_SHIFT;
-	radeon_ttm_placement_from_domain(bo, domain, 0, lpfn);
+	radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size, 0, lpfn);
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 
@@ -558,11 +559,11 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 
 		retry:
 			if (ring == R600_RING_TYPE_UVD_INDEX) {
-				radeon_ttm_placement_from_domain(bo, domain,
+				radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size,
 								 0, (256 * 1024 * 1024) >> PAGE_SHIFT);
 				radeon_uvd_force_into_uvd_segment(bo, allowed);
 			} else {
-				radeon_ttm_placement_from_domain(bo, domain, 0, 0);
+				radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size, 0, 0);
 			}
 
 			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
@@ -805,10 +806,12 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 
 	/* hurrah the memory is not visible ! */
 	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
+					 rbo->tbo.mem.size,
 					 0, rdev->mc.visible_vram_size >> PAGE_SHIFT);
 	r = ttm_bo_validate(bo, &rbo->placement, false, false);
 	if (unlikely(r == -ENOMEM)) {
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, 0, 0);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+						 rbo->tbo.mem.size, 0, 0);
 		return ttm_bo_validate(bo, &rbo->placement, false, false);
 	} else if (unlikely(r != 0)) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 49d00d8..befb590 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, 0, 0);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+							 rbo->tbo.mem.size, 0, 0);
 		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
 			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
 			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
@@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			 * BOs to be evicted from VRAM
 			 */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
-							 RADEON_GEM_DOMAIN_GTT, 0, 0);
+							 RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size, 0, 0);
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
@@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 				}
 			}
 		} else
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, 0, 0);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size, 0, 0);
 		break;
 	case TTM_PL_TT:
 	default:
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, 0, 0);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+						 rbo->tbo.mem.size, 0, 0);
 	}
 	*placement = rbo->placement;
 }
-- 
1.8.3.1


[-- Attachment #4: 0009-drm-radeon-force-the-vce-allocation-into-the-first-2.patch --]
[-- Type: text/x-patch, Size: 1219 bytes --]

From b3cb5971185373df7156216dfc1280c8dcbe378a Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 11:03:37 -0400
Subject: [PATCH 09/10] drm/radeon: force the vce allocation into the first 256
 MB

Use the new lpfn parameter to limit the vce fw/stack/heap
buffer to the first 256 MB of vram.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_vce.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 8febf6f..c35f0a9 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -126,8 +126,8 @@ int radeon_vce_init(struct radeon_device *rdev)
 	size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
 	       RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL, NULL,
-			     &rdev->vce.vcpu_bo);
+			     RADEON_GEM_DOMAIN_VRAM, 0, (256 * 1024 * 1024) >> PAGE_SHIFT, 0,
+			     NULL, NULL, &rdev->vce.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
 		return r;
-- 
1.8.3.1


[-- Attachment #5: 0008-drm-radeon-force-the-uvd-allocation-into-the-first-2.patch --]
[-- Type: text/x-patch, Size: 1193 bytes --]

From cab91cc5e2e39554e33dd0c4fff97d5824d0c48d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 11:00:58 -0400
Subject: [PATCH 08/10] drm/radeon: force the uvd allocation into the first 256
 MB

Use the new lpfn parameter to limit the uvd fw/stack/heap
buffer to the first 256 MB of vram.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_uvd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index cea3eb1..2b47f1a 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -141,8 +141,8 @@ int radeon_uvd_init(struct radeon_device *rdev)
 		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
 		  RADEON_GPU_PAGE_SIZE;
 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
-			     NULL, &rdev->uvd.vcpu_bo);
+			     RADEON_GEM_DOMAIN_VRAM, 0, (256 * 1024 * 1024) >> PAGE_SHIFT, 0,
+			     NULL, NULL, &rdev->uvd.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
 		return r;
-- 
1.8.3.1


[-- Attachment #6: 0007-drm-radeon-limit-the-vbios-scratch-area-to-the-first.patch --]
[-- Type: text/x-patch, Size: 1107 bytes --]

From ba56096114c7e900b83f03e60bf308575661469b Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 10:55:47 -0400
Subject: [PATCH 07/10] drm/radeon: limit the vbios scratch area to the first
 256k.

Todo: use the atom VRAM_UsageByFirmware data table to determine
the exact location and size and use that instead.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index fb138b7..49d00d8 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -888,7 +888,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 
 	r = radeon_bo_create(rdev, 256 * 1024, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, 0, (256 * 1024) >> PAGE_SHIFT, 0, NULL,
 			     NULL, &rdev->stollen_vga_memory);
 	if (r) {
 		return r;
-- 
1.8.3.1


[-- Attachment #7: 0006-drm-radeon-use-lpfn-in-radeon_bo_fault_reserve_notif.patch --]
[-- Type: text/x-patch, Size: 1789 bytes --]

From 3e1340f7bc0a33560ebe18b028ced1e341e4c672 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 10:50:40 -0400
Subject: [PATCH 06/10] drm/radeon: use lpfn in
 radeon_bo_fault_reserve_notify()

Rather than fixing it after calling radeon_ttm_placement_from_domain().

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_object.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index b1e429a..205dfb8 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -787,8 +787,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
 	struct radeon_device *rdev;
 	struct radeon_bo *rbo;
-	unsigned long offset, size, lpfn;
-	int i, r;
+	unsigned long offset, size;
+	int r;
 
 	if (!radeon_ttm_bo_is_radeon_bo(bo))
 		return 0;
@@ -804,14 +804,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return 0;
 
 	/* hurrah the memory is not visible ! */
-	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM, 0, 0);
-	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
-	for (i = 0; i < rbo->placement.num_placement; i++) {
-		/* Force into visible VRAM */
-		if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
-		    (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
-			rbo->placements[i].lpfn = lpfn;
-	}
+	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
+					 0, rdev->mc.visible_vram_size >> PAGE_SHIFT);
 	r = ttm_bo_validate(bo, &rbo->placement, false, false);
 	if (unlikely(r == -ENOMEM)) {
 		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, 0, 0);
-- 
1.8.3.1


[-- Attachment #8: 0004-drm-radeon-use-new-lpfn-parameter-for-radeon_bo_pin_.patch --]
[-- Type: text/x-patch, Size: 2104 bytes --]

From 8fa0ae78b3c5e0fdd832fee00ed9c1ed4a9fa826 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 10:33:09 -0400
Subject: [PATCH 04/10] drm/radeon: use new lpfn parameter for
 radeon_bo_pin_restricted()

Rather than adjusting it after the calling
radeon_ttm_placement_from_domain().

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_object.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index cd7f80b..af166e5 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -330,6 +330,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 			     u64 *gpu_addr)
 {
 	int r, i;
+	unsigned lpfn;
 
 	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
 		return -EPERM;
@@ -352,19 +353,16 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 
 		return 0;
 	}
-	radeon_ttm_placement_from_domain(bo, domain, 0, 0);
-	for (i = 0; i < bo->placement.num_placement; i++) {
-		/* force to pin into visible video ram */
-		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
-		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
-		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
-			bo->placements[i].lpfn =
-				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
-		else
-			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
-
+	/* force to pin into visible video ram */
+	if ((domain == RADEON_GEM_DOMAIN_VRAM) &&
+	    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
+	    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
+		lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+	else
+		lpfn = max_offset >> PAGE_SHIFT;
+	radeon_ttm_placement_from_domain(bo, domain, 0, lpfn);
+	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-	}
 
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
-- 
1.8.3.1


[-- Attachment #9: 0003-drm-radeon-limit-the-gart-vram-page-table-allocation.patch --]
[-- Type: text/x-patch, Size: 1067 bytes --]

From 941978a1d5c67d14929f3d21c5d732cd7e9edd55 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 10:26:16 -0400
Subject: [PATCH 03/10] drm/radeon: limit the gart vram page table allocation
 to visible vram

We update the gart page table with the CPU.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_gart.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 271d6a1..30c21d4 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -128,7 +128,8 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
 	if (rdev->gart.robj == NULL) {
 		r = radeon_bo_create(rdev, rdev->gart.table_size,
 				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-				     0, 0, 0, NULL, NULL, &rdev->gart.robj);
+				     0, rdev->mc.visible_vram_size >> PAGE_SHIFT,
+				     0, NULL, NULL, &rdev->gart.robj);
 		if (r) {
 			return r;
 		}
-- 
1.8.3.1


[-- Attachment #10: 0002-drm-radeon-pass-fpfn-and-lpfn-to-radeon_bo_create.patch --]
[-- Type: text/x-patch, Size: 14835 bytes --]

From 3486a47d59d1881307a119c01919a0469f3a04a0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 10:21:59 -0400
Subject: [PATCH 02/10] drm/radeon: pass fpfn and lpfn to radeon_bo_create()

Allow the driver to specify the page range when allocating
buffers.  Currently set to 0, 0 so no functional change.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/cik.c              | 4 ++--
 drivers/gpu/drm/radeon/evergreen.c        | 6 +++---
 drivers/gpu/drm/radeon/r600.c             | 4 ++--
 drivers/gpu/drm/radeon/radeon_benchmark.c | 6 ++++--
 drivers/gpu/drm/radeon/radeon_device.c    | 2 +-
 drivers/gpu/drm/radeon/radeon_gart.c      | 2 +-
 drivers/gpu/drm/radeon/radeon_gem.c       | 2 +-
 drivers/gpu/drm/radeon/radeon_kfd.c       | 2 +-
 drivers/gpu/drm/radeon/radeon_object.c    | 6 +++---
 drivers/gpu/drm/radeon/radeon_object.h    | 3 ++-
 drivers/gpu/drm/radeon/radeon_prime.c     | 2 +-
 drivers/gpu/drm/radeon/radeon_ring.c      | 2 +-
 drivers/gpu/drm/radeon/radeon_sa.c        | 2 +-
 drivers/gpu/drm/radeon/radeon_test.c      | 4 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c       | 2 +-
 drivers/gpu/drm/radeon/radeon_uvd.c       | 2 +-
 drivers/gpu/drm/radeon/radeon_vce.c       | 2 +-
 drivers/gpu/drm/radeon/radeon_vm.c        | 4 ++--
 18 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 28faea9..8d70176 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4756,7 +4756,7 @@ static int cik_mec_init(struct radeon_device *rdev)
 		r = radeon_bo_create(rdev,
 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
 				     PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL, NULL,
 				     &rdev->mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
@@ -4922,7 +4922,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
 			r = radeon_bo_create(rdev,
 					     sizeof(struct bonaire_mqd),
 					     PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
+					     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL,
 					     NULL, &rdev->ring[idx].mqd_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f848acf..02b5d2e 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -4054,7 +4054,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 		/* save restore block */
 		if (rdev->rlc.save_restore_obj == NULL) {
 			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+					     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 					     NULL, &rdev->rlc.save_restore_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
@@ -4133,7 +4133,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 
 		if (rdev->rlc.clear_state_obj == NULL) {
 			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+					     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 					     NULL, &rdev->rlc.clear_state_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
@@ -4210,7 +4210,7 @@ int sumo_rlc_init(struct radeon_device *rdev)
 		if (rdev->rlc.cp_table_obj == NULL) {
 			r = radeon_bo_create(rdev, rdev->rlc.cp_table_size,
 					     PAGE_SIZE, true,
-					     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+					     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 					     NULL, &rdev->rlc.cp_table_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 8f6d862..7249620 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1457,7 +1457,7 @@ int r600_vram_scratch_init(struct radeon_device *rdev)
 	if (rdev->vram_scratch.robj == NULL) {
 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE,
 				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-				     0, NULL, NULL, &rdev->vram_scratch.robj);
+				     0, 0, 0, NULL, NULL, &rdev->vram_scratch.robj);
 		if (r) {
 			return r;
 		}
@@ -3390,7 +3390,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev)
 	if (rdev->ih.ring_obj == NULL) {
 		r = radeon_bo_create(rdev, rdev->ih.ring_size,
 				     PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0,
 				     NULL, NULL, &rdev->ih.ring_obj);
 		if (r) {
 			DRM_ERROR("radeon: failed to create ih ring buffer (%d).\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 87d5fb2..1caa887 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -94,7 +94,8 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
 	int time;
 
 	n = RADEON_BENCHMARK_ITERATIONS;
-	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, 0, NULL, NULL, &sobj);
+	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, 0, 0, 0,
+			     NULL, NULL, &sobj);
 	if (r) {
 		goto out_cleanup;
 	}
@@ -106,7 +107,8 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
 	if (r) {
 		goto out_cleanup;
 	}
-	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, 0, NULL, NULL, &dobj);
+	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, 0, 0, 0,
+			     NULL, NULL, &dobj);
 	if (r) {
 		goto out_cleanup;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b7ca4c5..d1cebfe 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -465,7 +465,7 @@ int radeon_wb_init(struct radeon_device *rdev)
 
 	if (rdev->wb.wb_obj == NULL) {
 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL, NULL,
 				     &rdev->wb.wb_obj);
 		if (r) {
 			dev_warn(rdev->dev, "(%d) create WB bo failed\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 5450fa9..271d6a1 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -128,7 +128,7 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
 	if (rdev->gart.robj == NULL) {
 		r = radeon_bo_create(rdev, rdev->gart.table_size,
 				     PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-				     0, NULL, NULL, &rdev->gart.robj);
+				     0, 0, 0, NULL, NULL, &rdev->gart.robj);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index fb50ea9..0175296 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -67,7 +67,7 @@ int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size,
 
 retry:
 	r = radeon_bo_create(rdev, size, alignment, kernel, initial_domain,
-			     flags, NULL, NULL, &robj);
+			     0, 0, flags, NULL, NULL, &robj);
 	if (r) {
 		if (r != -ERESTARTSYS) {
 			if (initial_domain == RADEON_GEM_DOMAIN_VRAM) {
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 061eaa9..e0caa6c 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -212,7 +212,7 @@ static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 		return -ENOMEM;
 
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
-				RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
+			     0, 0, RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
 	if (r) {
 		dev_err(rdev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index a439bcf..cd7f80b 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -190,8 +190,8 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
 
 int radeon_bo_create(struct radeon_device *rdev,
 		     unsigned long size, int byte_align, bool kernel,
-		     u32 domain, u32 flags, struct sg_table *sg,
-		     struct reservation_object *resv,
+		     u32 domain, unsigned fpfn, unsigned lpfn, u32 flags,
+		     struct sg_table *sg, struct reservation_object *resv,
 		     struct radeon_bo **bo_ptr)
 {
 	struct radeon_bo *bo;
@@ -254,7 +254,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 	bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
-	radeon_ttm_placement_from_domain(bo, domain, 0, 0);
+	radeon_ttm_placement_from_domain(bo, domain, fpfn, lpfn);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index d8d295e..3d07f87 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -124,7 +124,8 @@ extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
 
 extern int radeon_bo_create(struct radeon_device *rdev,
 			    unsigned long size, int byte_align,
-			    bool kernel, u32 domain, u32 flags,
+			    bool kernel, u32 domain, unsigned fpfn,
+			    unsigned lpfn, u32 flags,
 			    struct sg_table *sg,
 			    struct reservation_object *resv,
 			    struct radeon_bo **bo_ptr);
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index f3609c9..d808d8f 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -68,7 +68,7 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev,
 
 	ww_mutex_lock(&resv->lock, NULL);
 	ret = radeon_bo_create(rdev, attach->dmabuf->size, PAGE_SIZE, false,
-			       RADEON_GEM_DOMAIN_GTT, 0, sg, resv, &bo);
+			       RADEON_GEM_DOMAIN_GTT, 0, 0, 0, sg, resv, &bo);
 	ww_mutex_unlock(&resv->lock);
 	if (ret)
 		return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 2456f69..d9f0150 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -383,7 +383,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	/* Allocate ring buffer */
 	if (ring->ring_obj == NULL) {
 		r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL,
 				     NULL, &ring->ring_obj);
 		if (r) {
 			dev_err(rdev->dev, "(%d) ring create failed\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index c507896..3013fb18 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -65,7 +65,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 	}
 
 	r = radeon_bo_create(rdev, size, align, true,
-			     domain, flags, NULL, NULL, &sa_manager->bo);
+			     domain, 0, 0, flags, NULL, NULL, &sa_manager->bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r);
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 79181816..073d6d2 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -67,7 +67,7 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 	}
 
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-			     0, NULL, NULL, &vram_obj);
+			     0, 0, 0, NULL, NULL, &vram_obj);
 	if (r) {
 		DRM_ERROR("Failed to create VRAM object\n");
 		goto out_cleanup;
@@ -87,7 +87,7 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 		struct radeon_fence *fence = NULL;
 
 		r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				     RADEON_GEM_DOMAIN_GTT, 0, 0, 0, NULL, NULL,
 				     gtt_obj + i);
 		if (r) {
 			DRM_ERROR("Failed to create GTT object %d\n", i);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 2994bed..fb138b7 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -888,7 +888,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
 
 	r = radeon_bo_create(rdev, 256 * 1024, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 			     NULL, &rdev->stollen_vga_memory);
 	if (r) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index c10b2ae..e98e4c6 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -141,7 +141,7 @@ int radeon_uvd_init(struct radeon_device *rdev)
 		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
 		  RADEON_GPU_PAGE_SIZE;
 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 			     NULL, &rdev->uvd.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 976fe43..8febf6f 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -126,7 +126,7 @@ int radeon_vce_init(struct radeon_device *rdev)
 	size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
 	       RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL, NULL,
 			     &rdev->vce.vcpu_bo);
 	if (r) {
 		dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 2a5a4a9..e845d4c 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -542,7 +542,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 
 		r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
 				     RADEON_GPU_PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_VRAM, 0,
+				     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0,
 				     NULL, NULL, &pt);
 		if (r)
 			return r;
@@ -1186,7 +1186,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	}
 
 	r = radeon_bo_create(rdev, pd_size, align, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+			     RADEON_GEM_DOMAIN_VRAM, 0, 0, 0, NULL,
 			     NULL, &vm->page_directory);
 	if (r)
 		return r;
-- 
1.8.3.1


[-- Attachment #11: 0001-drm-radeon-add-fpfn-lpfn-to-radeon_ttm_placement_fro.patch --]
[-- Type: text/x-patch, Size: 9850 bytes --]

From 7af0b96d2d6983044e914e481754afa6232e3d05 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Mar 2015 10:10:11 -0400
Subject: [PATCH 01/10] drm/radeon: add fpfn, lpfn to
 radeon_ttm_placement_from_domain()

This lets us set the valid page range for buffers at allocation
time.  Currently set to 0, 0 so no functional change.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  3 ++-
 drivers/gpu/drm/radeon/radeon_gem.c    |  2 +-
 drivers/gpu/drm/radeon/radeon_mn.c     |  4 ++--
 drivers/gpu/drm/radeon/radeon_object.c | 36 ++++++++++++++++++----------------
 drivers/gpu/drm/radeon/radeon_ttm.c    |  8 ++++----
 5 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 35ab65d..7de3e21 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2980,7 +2980,8 @@ extern void radeon_surface_init(struct radeon_device *rdev);
 extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
 extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
-extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
+extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+					     unsigned fpfn, unsigned lpfn);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
 extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index ac3c131..fb50ea9 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 			goto release_object;
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 		radeon_bo_unreserve(bo);
 		up_read(&current->mm->mmap_sem);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index a69bd44..359f1f2 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -141,14 +141,14 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 				DRM_ERROR("(%d) failed to wait for user bo\n", r);
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, 0, 0);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 		if (r)
 			DRM_ERROR("(%d) failed to validate user bo\n", r);
 
 		radeon_bo_unreserve(bo);
 	}
-	
+
 	mutex_unlock(&rmn->lock);
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 43e0994..a439bcf 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
+void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+				      unsigned fpfn, unsigned lpfn)
 {
 	u32 c = 0, i;
 
@@ -103,8 +104,9 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 		/* Try placing BOs which don't need CPU access outside of the
 		 * CPU accessible part of VRAM
 		 */
-		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
-		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
+		if (fpfn >= rbo->rdev->mc.visible_vram_size ||
+		    ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
+		     rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size)) {
 			rbo->placements[c].fpfn =
 				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
 			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
@@ -112,7 +114,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 						     TTM_PL_FLAG_VRAM;
 		}
 
-		rbo->placements[c].fpfn = 0;
+		rbo->placements[c].fpfn = fpfn;
 		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
 					     TTM_PL_FLAG_UNCACHED |
 					     TTM_PL_FLAG_VRAM;
@@ -120,18 +122,18 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 
 	if (domain & RADEON_GEM_DOMAIN_GTT) {
 		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_TT;
 
 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
 			   (rbo->rdev->flags & RADEON_IS_AGP)) {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
 				TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_TT;
 		} else {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
 						     TTM_PL_FLAG_TT;
 		}
@@ -139,24 +141,24 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 
 	if (domain & RADEON_GEM_DOMAIN_CPU) {
 		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_SYSTEM;
 
 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
 		    rbo->rdev->flags & RADEON_IS_AGP) {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
 				TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_SYSTEM;
 		} else {
-			rbo->placements[c].fpfn = 0;
+			rbo->placements[c].fpfn = fpfn;
 			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
 						     TTM_PL_FLAG_SYSTEM;
 		}
 	}
 	if (!c) {
-		rbo->placements[c].fpfn = 0;
+		rbo->placements[c].fpfn = fpfn;
 		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
 					     TTM_PL_FLAG_SYSTEM;
 	}
@@ -171,7 +173,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 			rbo->placements[i].lpfn =
 				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
 		else
-			rbo->placements[i].lpfn = 0;
+			rbo->placements[i].lpfn = lpfn;
 	}
 
 	/*
@@ -252,7 +254,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 	bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, 0, 0);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
@@ -350,7 +352,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 
 		return 0;
 	}
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, 0, 0);
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
 		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
@@ -557,7 +559,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 			}
 
 		retry:
-			radeon_ttm_placement_from_domain(bo, domain);
+			radeon_ttm_placement_from_domain(bo, domain, 0, 0);
 			if (ring == R600_RING_TYPE_UVD_INDEX)
 				radeon_uvd_force_into_uvd_segment(bo, allowed);
 
@@ -800,7 +802,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return 0;
 
 	/* hurrah the memory is not visible ! */
-	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
+	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM, 0, 0);
 	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < rbo->placement.num_placement; i++) {
 		/* Force into visible VRAM */
@@ -810,7 +812,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 	r = ttm_bo_validate(bo, &rbo->placement, false, false);
 	if (unlikely(r == -ENOMEM)) {
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, 0, 0);
 		return ttm_bo_validate(bo, &rbo->placement, false, false);
 	} else if (unlikely(r != 0)) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index d02aa1d..2994bed 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -197,7 +197,7 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, 0, 0);
 		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
 			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
 			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
@@ -209,7 +209,7 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			 * BOs to be evicted from VRAM
 			 */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
-							 RADEON_GEM_DOMAIN_GTT);
+							 RADEON_GEM_DOMAIN_GTT, 0, 0);
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
@@ -222,11 +222,11 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 				}
 			}
 		} else
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, 0, 0);
 		break;
 	case TTM_PL_TT:
 	default:
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, 0, 0);
 	}
 	*placement = rbo->placement;
 }
-- 
1.8.3.1


[-- Attachment #12: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-17 15:19                   ` Alex Deucher
@ 2015-03-17 15:43                     ` Christian König
  2015-03-17 15:50                       ` Alex Deucher
  0 siblings, 1 reply; 23+ messages in thread
From: Christian König @ 2015-03-17 15:43 UTC (permalink / raw)
  To: Alex Deucher, Michel Dänzer; +Cc: Maling list - DRI developers


[-- Attachment #1.1: Type: text/plain, Size: 6373 bytes --]

On 17.03.2015 16:19, Alex Deucher wrote:
> On Mon, Mar 16, 2015 at 11:48 PM, Michel Dänzer <michel@daenzer.net> wrote:
>> On 17.03.2015 07:32, Alex Deucher wrote:
>>> On Thu, Mar 12, 2015 at 10:55 PM, Michel Dänzer <michel@daenzer.net> wrote:
>>>> On 12.03.2015 22:09, Alex Deucher wrote:
>>>>> On Thu, Mar 12, 2015 at 5:23 AM, Christian König
>>>>> <deathsimple@vodafone.de> wrote:
>>>>>> On 12.03.2015 10:02, Michel Dänzer wrote:
>>>>>>> On 12.03.2015 06:14, Alex Deucher wrote:
>>>>>>>> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com>
>>>>>>>> wrote:
>>>>>>>>> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
>>>>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>>>> On 11.03.2015 16:44, Alex Deucher wrote:
>>>>>>>>>>> radeon_bo_create() calls radeon_ttm_placement_from_domain()
>>>>>>>>>>> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
>>>>>>>>>>> uses the ttm bo size to determine when to select top down
>>>>>>>>>>> allocation but since the ttm bo is not initialized yet the
>>>>>>>>>>> check is always false.
>>>>>>>>>>>
>>>>>>>>>>> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
>>>>>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>>>>>>> Cc: stable@vger.kernel.org
>>>>>>>>>>
>>>>>>>>>> And I was already wondering why the heck the BOs always made this
>>>>>>>>>> ping/pong
>>>>>>>>>> in memory after creation.
>>>>>>>>>>
>>>>>>>>>> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>>>> And fixing that promptly broke VCE due to vram location requirements.
>>>>>>>>> Updated patch attached.  Thoughts?
>>>>>>>> And one more take to make things a bit more explicit for static kernel
>>>>>>>> driver allocations.
>>>>>>> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
>>>>>>> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
>>>>>>> problem is really that some BOs are expected to be within a certain
>>>>>>> range from the beginning of VRAM, but lpfn isn't set accordingly. It
>>>>>>> would be better to fix that by setting lpfn directly than indirectly via
>>>>>>> RADEON_GEM_CPU_ACCESS.
>>>>>>
>>>>>> Yeah, agree. We should probably try to find the root cause of this instead.
>>>>>>
>>>>>> As far as I know VCE has no documented limitation on where buffers are
>>>>>> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
>>>>>> UVD which breaks here?
>>>>> It's definitely VCE, I don't know why UVD didn't have a problem.  I
>>>>> considered using pin_restricted to make sure it got pinned in the CPU
>>>>> visible region, but that had two problems: 1. it would end up getting
>>>>> migrated when pinned,
>>>> Maybe something like radeon_uvd_force_into_uvd_segment() is needed for
>>>> VCE as well?
>>>>
>>>>
>>>>> 2. it would end up at the top of the restricted
>>>>> region since the top down flag is set which would end up fragmenting
>>>>> vram.
>>>> If that's an issue (which outweighs the supposed benefit of
>>>> TTM_PL_FLAG_TOPDOWN), then again the proper solution would be not to set
>>>> TTM_PL_FLAG_TOPDOWN when rbo->placements[i].lpfn != 0 and smaller than
>>>> the whole available region, instead of checking for VRAM and
>>>> RADEON_GEM_CPU_ACCESS.
>>>>
>>> How about something like the attached patch?  I'm not really sure
>>> about the restrictions for the UVD and VCE fw and stack/heap buffers,
>>> but this seems to work.  It seems like the current UVD/VCE code works
>>> by accident since the check for TOPDOWN fails.
>> This patch is getting a bit messy, mixing several logically separate
>> changes. Can you split it up accordingly? E.g. one patch just adding the
>> new fpfn and lpfn function parameters but passing 0 for them (so no
>> functional change), then one or several patches with the corresponding
>> functional changes, and finally one patch adding the new size parameter
>> (and thus making TTM_PL_FLAG_TOPDOWN actually used for newly allocated
>> BOs). I think that would help for reviewing and generally understanding
>> the changes.
>>
>>
>>> @@ -105,14 +106,17 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
>>>                 */
>>>                if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
>>>                    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
>>> -                     rbo->placements[c].fpfn =
>>> -                             rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
>>> +                     if (fpfn > (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT))
>>> +                             rbo->placements[c].fpfn = fpfn;
>>> +                     else
>>> +                             rbo->placements[c].fpfn =
>>> +                                     rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
>>>                        rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>>>                                                     TTM_PL_FLAG_UNCACHED |
>>>                                                     TTM_PL_FLAG_VRAM;
>>>                }
>> If (fpfn >= rbo->rdev->mc.visible_vram_size), this whole block can be
>> skipped, since the next placement will be identical.
>>
>> OTOH, fpfn is currently always 0 anyway, so maybe it's better not to add
>> that parameter in the first place.
>>
>>
>> Other than that, looks good to me.
> Broken out patches attached.  Also available here:
> http://cgit.freedesktop.org/~agd5f/linux/log/?h=topdown-fixes
Thinking more about it that approach is a NAK. For limiting a BO into 
visible VRAM we want the limit it to only apply to the VRAM domain 
entry, doing it this way it applies to GTT as well which is really bad 
for handling page faults.

I would rather say let us completely nuke 
radeon_ttm_placement_from_domain for internal allocations and give 
radeon_bo_create a ttm_placement pointer to use.

Driver internal allocations would then have a couple of predefined 
placements for their buffers. We might need to make a few ttm_placement 
pointers const for this, but I think that this is the better approach.

Regards,
Christian.


>
> Alex
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel


[-- Attachment #1.2: Type: text/html, Size: 9343 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create
  2015-03-17 15:43                     ` Christian König
@ 2015-03-17 15:50                       ` Alex Deucher
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Deucher @ 2015-03-17 15:50 UTC (permalink / raw)
  To: Christian König; +Cc: Michel Dänzer, Maling list - DRI developers

On Tue, Mar 17, 2015 at 11:43 AM, Christian König
<deathsimple@vodafone.de> wrote:
> On 17.03.2015 16:19, Alex Deucher wrote:
>
> On Mon, Mar 16, 2015 at 11:48 PM, Michel Dänzer <michel@daenzer.net> wrote:
>
> On 17.03.2015 07:32, Alex Deucher wrote:
>
> On Thu, Mar 12, 2015 at 10:55 PM, Michel Dänzer <michel@daenzer.net> wrote:
>
> On 12.03.2015 22:09, Alex Deucher wrote:
>
> On Thu, Mar 12, 2015 at 5:23 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>
> On 12.03.2015 10:02, Michel Dänzer wrote:
>
> On 12.03.2015 06:14, Alex Deucher wrote:
>
> On Wed, Mar 11, 2015 at 4:51 PM, Alex Deucher <alexdeucher@gmail.com>
> wrote:
>
> On Wed, Mar 11, 2015 at 2:21 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>
> On 11.03.2015 16:44, Alex Deucher wrote:
>
> radeon_bo_create() calls radeon_ttm_placement_from_domain()
> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
> uses the ttm bo size to determine when to select top down
> allocation but since the ttm bo is not initialized yet the
> check is always false.
>
> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> Cc: stable@vger.kernel.org
>
> And I was already wondering why the heck the BOs always made this
> ping/pong
> in memory after creation.
>
> Patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>
> And fixing that promptly broke VCE due to vram location requirements.
> Updated patch attached.  Thoughts?
>
> And one more take to make things a bit more explicit for static kernel
> driver allocations.
>
> struct ttm_place::lpfn is honoured even with TTM_PL_FLAG_TOPDOWN, so
> latter should work with RADEON_GEM_CPU_ACCESS. It sounds like the
> problem is really that some BOs are expected to be within a certain
> range from the beginning of VRAM, but lpfn isn't set accordingly. It
> would be better to fix that by setting lpfn directly than indirectly via
> RADEON_GEM_CPU_ACCESS.
>
> Yeah, agree. We should probably try to find the root cause of this instead.
>
> As far as I know VCE has no documented limitation on where buffers are
> placed (unlike UVD). So this is a bit strange. Are you sure that it isn't
> UVD which breaks here?
>
> It's definitely VCE, I don't know why UVD didn't have a problem.  I
> considered using pin_restricted to make sure it got pinned in the CPU
> visible region, but that had two problems: 1. it would end up getting
> migrated when pinned,
>
> Maybe something like radeon_uvd_force_into_uvd_segment() is needed for
> VCE as well?
>
>
> 2. it would end up at the top of the restricted
> region since the top down flag is set which would end up fragmenting
> vram.
>
> If that's an issue (which outweighs the supposed benefit of
> TTM_PL_FLAG_TOPDOWN), then again the proper solution would be not to set
> TTM_PL_FLAG_TOPDOWN when rbo->placements[i].lpfn != 0 and smaller than
> the whole available region, instead of checking for VRAM and
> RADEON_GEM_CPU_ACCESS.
>
> How about something like the attached patch?  I'm not really sure
> about the restrictions for the UVD and VCE fw and stack/heap buffers,
> but this seems to work.  It seems like the current UVD/VCE code works
> by accident since the check for TOPDOWN fails.
>
> This patch is getting a bit messy, mixing several logically separate
> changes. Can you split it up accordingly? E.g. one patch just adding the
> new fpfn and lpfn function parameters but passing 0 for them (so no
> functional change), then one or several patches with the corresponding
> functional changes, and finally one patch adding the new size parameter
> (and thus making TTM_PL_FLAG_TOPDOWN actually used for newly allocated
> BOs). I think that would help for reviewing and generally understanding
> the changes.
>
>
> @@ -105,14 +106,17 @@ void radeon_ttm_placement_from_domain(struct radeon_bo
> *rbo, u32 domain)
>                */
>               if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
>                   rbo->rdev->mc.visible_vram_size <
> rbo->rdev->mc.real_vram_size) {
> -                     rbo->placements[c].fpfn =
> -                             rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
> +                     if (fpfn > (rbo->rdev->mc.visible_vram_size >>
> PAGE_SHIFT))
> +                             rbo->placements[c].fpfn = fpfn;
> +                     else
> +                             rbo->placements[c].fpfn =
> +                                     rbo->rdev->mc.visible_vram_size >>
> PAGE_SHIFT;
>                       rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>                                                    TTM_PL_FLAG_UNCACHED |
>                                                    TTM_PL_FLAG_VRAM;
>               }
>
> If (fpfn >= rbo->rdev->mc.visible_vram_size), this whole block can be
> skipped, since the next placement will be identical.
>
> OTOH, fpfn is currently always 0 anyway, so maybe it's better not to add
> that parameter in the first place.
>
>
> Other than that, looks good to me.
>
> Broken out patches attached.  Also available here:
> http://cgit.freedesktop.org/~agd5f/linux/log/?h=topdown-fixes
>
> Thinking more about it that approach is a NAK. For limiting a BO into
> visible VRAM we want the limit it to only apply to the VRAM domain entry,
> doing it this way it applies to GTT as well which is really bad for handling
> page faults.
>
> I would rather say let us completely nuke radeon_ttm_placement_from_domain
> for internal allocations and give radeon_bo_create a ttm_placement pointer
> to use.
>
> Driver internal allocations would then have a couple of predefined
> placements for their buffers. We might need to make a few ttm_placement
> pointers const for this, but I think that this is the better approach.

Yeah, the more I think about it, the more I'm starting to agree.
Maybe for now we just drop the topdown thing.  It seems like it can
only cause needless migration in it's current state.  It also reported
breaks suspend/resume on some systems.

Alex

>
> Regards,
> Christian.
>
>
>
> Alex
>
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2015-03-17 15:50 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-11 15:44 [PATCH] drm/radeon: fix TOPDOWN handling for bo_create Alex Deucher
2015-03-11 18:21 ` Christian König
2015-03-11 20:51   ` Alex Deucher
2015-03-11 21:14     ` Alex Deucher
2015-03-12  9:02       ` Michel Dänzer
2015-03-12  9:23         ` Christian König
2015-03-12  9:30           ` Oded Gabbay
2015-03-12  9:36             ` Christian König
2015-03-15 15:07               ` Oded Gabbay
2015-03-12 13:09           ` Alex Deucher
2015-03-13  2:55             ` Michel Dänzer
2015-03-16 22:32               ` Alex Deucher
2015-03-17  3:48                 ` Michel Dänzer
2015-03-17 15:19                   ` Alex Deucher
2015-03-17 15:43                     ` Christian König
2015-03-17 15:50                       ` Alex Deucher
2015-03-17 11:40                 ` Christian König
2015-03-17 13:49                   ` Alex Deucher
2015-03-12 10:21         ` Lauri Kasanen
2015-03-13  9:11         ` Daniel Vetter
2015-03-13  9:46           ` Michel Dänzer
2015-03-13 16:36             ` Daniel Vetter
2015-03-13 17:57               ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.