All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: fix GTT offset handling
@ 2016-09-06  9:41 Christian König
       [not found] ` <1473154866-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2016-09-06  9:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Otherwise we run into problems on 32bit systems with more than 4GB GART.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7417610..4337b3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -649,7 +649,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 			return r;
 		}
 	}
-	gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
+	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
 	if (!ttm->num_pages) {
 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
 		     ttm->num_pages, bo_mem, ttm);
@@ -664,8 +664,8 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 		ttm->pages, gtt->ttm.dma_address, flags);
 
 	if (r) {
-		DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
-			  ttm->num_pages, (unsigned)gtt->offset);
+		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+			  ttm->num_pages, gtt->offset);
 		return r;
 	}
 	spin_lock(&gtt->adev->gtt_list_lock);
@@ -690,8 +690,8 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
 				     flags);
 		if (r) {
 			spin_unlock(&adev->gtt_list_lock);
-			DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
-				  gtt->ttm.ttm.num_pages, (unsigned)gtt->offset);
+			DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+				  gtt->ttm.ttm.num_pages, gtt->offset);
 			return r;
 		}
 	}
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found] ` <1473154866-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-09-06  9:41   ` Christian König
       [not found]     ` <1473154866-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2016-09-06  9:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

We don't really need the GTT table any more most of the time. So bind it
only on demand.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 ++++++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 ++++++++++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
 8 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9d39fa8..28d4a67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
 struct amdgpu_bo_va_mapping *
 amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 		       uint64_t addr, struct amdgpu_bo **bo);
+int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
 
 #if defined(CONFIG_DRM_AMD_DAL)
 int amdgpu_dm_display_resume(struct amdgpu_device *adev );
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 20a1962..e069978 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		}
 	}
 
-	if (p->uf_entry.robj)
-		p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
+	if (!r && p->uf_entry.robj) {
+		struct amdgpu_bo *uf = p->uf_entry.robj;
+
+		r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
+		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
+	}
 
 error_validate:
 	if (r) {
@@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 
 	return NULL;
 }
+
+/**
+ * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
+ *
+ * @parser: command submission parser context
+ *
+ * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
+ */
+int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
+{
+	unsigned i;
+	int r;
+
+	if (!parser->bo_list)
+		return 0;
+
+	for (i = 0; i < parser->bo_list->num_entries; i++) {
+		struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
+
+		r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
+		if (unlikely(r))
+			return r;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b17734e..dc73f11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		dev_err(bo->adev->dev, "%p pin failed\n", bo);
 		goto error;
 	}
+	r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
+	if (unlikely(r)) {
+		dev_err(bo->adev->dev, "%p bind failed\n", bo);
+		goto error;
+	}
 
 	bo->pin_count = 1;
 	if (gpu_addr != NULL)
@@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 {
 	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
+	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
+		     !amdgpu_ttm_is_bound(bo->tbo.ttm));
 	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
 		     !bo->pin_count);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4337b3a..c294aa9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	new_start = (u64)new_mem->start << PAGE_SHIFT;
 
 	switch (old_mem->mem_type) {
-	case TTM_PL_VRAM:
 	case TTM_PL_TT:
+		r = amdgpu_ttm_bind(bo->ttm, old_mem);
+		if (r)
+			return r;
+
+	case TTM_PL_VRAM:
 		old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
 		break;
 	default:
@@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 		return -EINVAL;
 	}
 	switch (new_mem->mem_type) {
-	case TTM_PL_VRAM:
 	case TTM_PL_TT:
+		r = amdgpu_ttm_bind(bo->ttm, new_mem);
+		if (r)
+			return r;
+
+	case TTM_PL_VRAM:
 		new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
 		break;
 	default:
@@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 				   struct ttm_mem_reg *bo_mem)
 {
 	struct amdgpu_ttm_tt *gtt = (void*)ttm;
-	uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
 	int r;
 
 	if (gtt->userptr) {
@@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 	    bo_mem->mem_type == AMDGPU_PL_OA)
 		return -EINVAL;
 
+	return 0;
+}
+
+bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+
+	return gtt && !list_empty(&gtt->list);
+}
+
+int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	uint32_t flags;
+	int r;
+
+	if (!ttm || amdgpu_ttm_is_bound(ttm))
+		return 0;
+
+	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
 	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
 		ttm->pages, gtt->ttm.dma_address, flags);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 72f6bfc..214bae9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			struct fence **fence);
 
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
+bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
+int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5888e8a..fab7bb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 		return -EINVAL;
 	}
 
+	r = amdgpu_cs_sysvm_access_required(parser);
+	if (r)
+		return r;
+
 	ctx.parser = parser;
 	ctx.buf_sizes = buf_sizes;
 	ctx.ib_idx = ib_idx;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 9b71d6c..b0c6702 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 	uint32_t allocated = 0;
 	uint32_t tmp, handle = 0;
 	uint32_t *size = &tmp;
-	int i, r = 0, idx = 0;
+	int i, r, idx = 0;
+
+	r = amdgpu_cs_sysvm_access_required(p);
+	if (r)
+		return r;
 
 	while (idx < ib->length_dw) {
 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7660f82..a549abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	}
 
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
-	gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
+	gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
+		adev == bo_va->bo->adev) ? flags : 0;
 
 	spin_lock(&vm->status_lock);
 	if (!list_empty(&bo_va->vm_status))
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]     ` <1473154866-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-09-06  9:53       ` zhoucm1
       [not found]         ` <57CE9220.8060403-5C7GfCeVMHo@public.gmane.org>
  2016-09-07 14:32       ` Deucher, Alexander
  2016-09-17 21:14       ` Marek Olšák
  2 siblings, 1 reply; 16+ messages in thread
From: zhoucm1 @ 2016-09-06  9:53 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2016年09月06日 17:41, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> We don't really need the GTT table any more most of the time.
Why? I thought GTT bo is always needed to be bound when GPU is trying to 
access it, doesn't it?

Regards,
David Zhou
>   So bind it
> only on demand.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 ++++++++++++++++++++++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 ++++++++++++++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>   8 files changed, 84 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9d39fa8..28d4a67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
>   struct amdgpu_bo_va_mapping *
>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>   		       uint64_t addr, struct amdgpu_bo **bo);
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>   
>   #if defined(CONFIG_DRM_AMD_DAL)
>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20a1962..e069978 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   		}
>   	}
>   
> -	if (p->uf_entry.robj)
> -		p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
> +	if (!r && p->uf_entry.robj) {
> +		struct amdgpu_bo *uf = p->uf_entry.robj;
> +
> +		r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
> +		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
> +	}
>   
>   error_validate:
>   	if (r) {
> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>   
>   	return NULL;
>   }
> +
> +/**
> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
> + *
> + * @parser: command submission parser context
> + *
> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
> + */
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
> +{
> +	unsigned i;
> +	int r;
> +
> +	if (!parser->bo_list)
> +		return 0;
> +
> +	for (i = 0; i < parser->bo_list->num_entries; i++) {
> +		struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
> +
> +		r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +		if (unlikely(r))
> +			return r;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b17734e..dc73f11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>   		dev_err(bo->adev->dev, "%p pin failed\n", bo);
>   		goto error;
>   	}
> +	r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +	if (unlikely(r)) {
> +		dev_err(bo->adev->dev, "%p bind failed\n", bo);
> +		goto error;
> +	}
>   
>   	bo->pin_count = 1;
>   	if (gpu_addr != NULL)
> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>   {
>   	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
> +	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
> +		     !amdgpu_ttm_is_bound(bo->tbo.ttm));
>   	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>   		     !bo->pin_count);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4337b3a..c294aa9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>   	new_start = (u64)new_mem->start << PAGE_SHIFT;
>   
>   	switch (old_mem->mem_type) {
> -	case TTM_PL_VRAM:
>   	case TTM_PL_TT:
> +		r = amdgpu_ttm_bind(bo->ttm, old_mem);
> +		if (r)
> +			return r;
> +
> +	case TTM_PL_VRAM:
>   		old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>   		break;
>   	default:
> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>   		return -EINVAL;
>   	}
>   	switch (new_mem->mem_type) {
> -	case TTM_PL_VRAM:
>   	case TTM_PL_TT:
> +		r = amdgpu_ttm_bind(bo->ttm, new_mem);
> +		if (r)
> +			return r;
> +
> +	case TTM_PL_VRAM:
>   		new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>   		break;
>   	default:
> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>   				   struct ttm_mem_reg *bo_mem)
>   {
>   	struct amdgpu_ttm_tt *gtt = (void*)ttm;
> -	uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>   	int r;
>   
>   	if (gtt->userptr) {
> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>   	    bo_mem->mem_type == AMDGPU_PL_OA)
>   		return -EINVAL;
>   
> +	return 0;
> +}
> +
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
> +{
> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +
> +	return gtt && !list_empty(&gtt->list);
> +}
> +
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
> +{
> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +	uint32_t flags;
> +	int r;
> +
> +	if (!ttm || amdgpu_ttm_is_bound(ttm))
> +		return 0;
> +
> +	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>   	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>   		ttm->pages, gtt->ttm.dma_address, flags);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 72f6bfc..214bae9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			struct fence **fence);
>   
>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 5888e8a..fab7bb1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
>   		return -EINVAL;
>   	}
>   
> +	r = amdgpu_cs_sysvm_access_required(parser);
> +	if (r)
> +		return r;
> +
>   	ctx.parser = parser;
>   	ctx.buf_sizes = buf_sizes;
>   	ctx.ib_idx = ib_idx;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 9b71d6c..b0c6702 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
>   	uint32_t allocated = 0;
>   	uint32_t tmp, handle = 0;
>   	uint32_t *size = &tmp;
> -	int i, r = 0, idx = 0;
> +	int i, r, idx = 0;
> +
> +	r = amdgpu_cs_sysvm_access_required(p);
> +	if (r)
> +		return r;
>   
>   	while (idx < ib->length_dw) {
>   		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 7660f82..a549abd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   	}
>   
>   	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
> -	gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
> +	gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
> +		adev == bo_va->bo->adev) ? flags : 0;
>   
>   	spin_lock(&vm->status_lock);
>   	if (!list_empty(&bo_va->vm_status))

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]         ` <57CE9220.8060403-5C7GfCeVMHo@public.gmane.org>
@ 2016-09-06 10:48           ` Christian König
       [not found]             ` <e3ec968a-32b0-cfdd-36d3-6f07b0f5e14b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2016-09-06 10:48 UTC (permalink / raw)
  To: zhoucm1, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 06.09.2016 um 11:53 schrieb zhoucm1:
>
>
> On 2016年09月06日 17:41, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> We don't really need the GTT table any more most of the time.
> Why? I thought GTT bo is always needed to be bound when GPU is trying 
> to access it, doesn't it?

We only need it to be bound when we try to access it from the system VM 
(UVD/VCE, ring buffers, fences etc...).

The VMs for each process can work without it and it is just overhead to 
bind it all the time.

Regards,
Christian.


>
> Regards,
> David Zhou
>>   So bind it
>> only on demand.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 
>> ++++++++++++++++++++++++++++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 
>> ++++++++++++++++++++++++++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 9d39fa8..28d4a67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct 
>> amdgpu_device *adev) { }
>>   struct amdgpu_bo_va_mapping *
>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>                  uint64_t addr, struct amdgpu_bo **bo);
>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>     #if defined(CONFIG_DRM_AMD_DAL)
>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 20a1962..e069978 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct 
>> amdgpu_cs_parser *p,
>>           }
>>       }
>>   -    if (p->uf_entry.robj)
>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>> +    if (!r && p->uf_entry.robj) {
>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>> +
>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>> +    }
>>     error_validate:
>>       if (r) {
>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser 
>> *parser,
>>         return NULL;
>>   }
>> +
>> +/**
>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the 
>> system VM
>> + *
>> + * @parser: command submission parser context
>> + *
>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by 
>> the system VM.
>> + */
>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>> +{
>> +    unsigned i;
>> +    int r;
>> +
>> +    if (!parser->bo_list)
>> +        return 0;
>> +
>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>> +
>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>> +        if (unlikely(r))
>> +            return r;
>> +    }
>> +
>> +    return 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index b17734e..dc73f11 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
>> *bo, u32 domain,
>>           dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>           goto error;
>>       }
>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>> +    if (unlikely(r)) {
>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>> +        goto error;
>> +    }
>>         bo->pin_count = 1;
>>       if (gpu_addr != NULL)
>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct 
>> fence *fence,
>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>   {
>>       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>                !bo->pin_count);
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 4337b3a..c294aa9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct 
>> ttm_buffer_object *bo,
>>       new_start = (u64)new_mem->start << PAGE_SHIFT;
>>         switch (old_mem->mem_type) {
>> -    case TTM_PL_VRAM:
>>       case TTM_PL_TT:
>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>> +        if (r)
>> +            return r;
>> +
>> +    case TTM_PL_VRAM:
>>           old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>           break;
>>       default:
>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct 
>> ttm_buffer_object *bo,
>>           return -EINVAL;
>>       }
>>       switch (new_mem->mem_type) {
>> -    case TTM_PL_VRAM:
>>       case TTM_PL_TT:
>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>> +        if (r)
>> +            return r;
>> +
>> +    case TTM_PL_VRAM:
>>           new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>           break;
>>       default:
>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt 
>> *ttm,
>>                      struct ttm_mem_reg *bo_mem)
>>   {
>>       struct amdgpu_ttm_tt *gtt = (void*)ttm;
>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>       int r;
>>         if (gtt->userptr) {
>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt 
>> *ttm,
>>           bo_mem->mem_type == AMDGPU_PL_OA)
>>           return -EINVAL;
>>   +    return 0;
>> +}
>> +
>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>> +{
>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +
>> +    return gtt && !list_empty(&gtt->list);
>> +}
>> +
>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>> +{
>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +    uint32_t flags;
>> +    int r;
>> +
>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>> +        return 0;
>> +
>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>           ttm->pages, gtt->ttm.dma_address, flags);
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 72f6bfc..214bae9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>               struct fence **fence);
>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> index 5888e8a..fab7bb1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct 
>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>           return -EINVAL;
>>       }
>>   +    r = amdgpu_cs_sysvm_access_required(parser);
>> +    if (r)
>> +        return r;
>> +
>>       ctx.parser = parser;
>>       ctx.buf_sizes = buf_sizes;
>>       ctx.ib_idx = ib_idx;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> index 9b71d6c..b0c6702 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct 
>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>       uint32_t allocated = 0;
>>       uint32_t tmp, handle = 0;
>>       uint32_t *size = &tmp;
>> -    int i, r = 0, idx = 0;
>> +    int i, r, idx = 0;
>> +
>> +    r = amdgpu_cs_sysvm_access_required(p);
>> +    if (r)
>> +        return r;
>>         while (idx < ib->length_dw) {
>>           uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 7660f82..a549abd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>> *adev,
>>       }
>>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>> +        adev == bo_va->bo->adev) ? flags : 0;
>>         spin_lock(&vm->status_lock);
>>       if (!list_empty(&bo_va->vm_status))
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]             ` <e3ec968a-32b0-cfdd-36d3-6f07b0f5e14b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-09-07  7:18               ` zhoucm1
       [not found]                 ` <57CFBF51.3000506-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: zhoucm1 @ 2016-09-07  7:18 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2016年09月06日 18:48, Christian König wrote:
> Am 06.09.2016 um 11:53 schrieb zhoucm1:
>>
>>
>> On 2016年09月06日 17:41, Christian König wrote:
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> We don't really need the GTT table any more most of the time.
>> Why? I thought GTT bo is always needed to be bound when GPU is trying 
>> to access it, doesn't it?
>
> We only need it to be bound when we try to access it from the system 
> VM (UVD/VCE, ring buffers, fences etc...).
>
> The VMs for each process can work without it and it is just overhead 
> to bind it all the time.
Yes, I got your mean, copying pte from GART to VM confused me, I thought 
binding GTT table is a must, that's wrong.
Go though vm code again, pte can be created by amdgpu_vm_map_gart if no 
GTT pte source when we don't bind it at all under VM process, am I right 
now?

Regards,
David Zhou

>
> Regards,
> Christian.
>
>
>>
>> Regards,
>> David Zhou
>>>   So bind it
>>> only on demand.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 
>>> ++++++++++++++++++++++++++++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 
>>> ++++++++++++++++++++++++++---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 9d39fa8..28d4a67 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct 
>>> amdgpu_device *adev) { }
>>>   struct amdgpu_bo_va_mapping *
>>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>>                  uint64_t addr, struct amdgpu_bo **bo);
>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>>     #if defined(CONFIG_DRM_AMD_DAL)
>>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 20a1962..e069978 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct 
>>> amdgpu_cs_parser *p,
>>>           }
>>>       }
>>>   -    if (p->uf_entry.robj)
>>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>>> +    if (!r && p->uf_entry.robj) {
>>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>>> +
>>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>> +    }
>>>     error_validate:
>>>       if (r) {
>>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct 
>>> amdgpu_cs_parser *parser,
>>>         return NULL;
>>>   }
>>> +
>>> +/**
>>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the 
>>> system VM
>>> + *
>>> + * @parser: command submission parser context
>>> + *
>>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by 
>>> the system VM.
>>> + */
>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>>> +{
>>> +    unsigned i;
>>> +    int r;
>>> +
>>> +    if (!parser->bo_list)
>>> +        return 0;
>>> +
>>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>>> +
>>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>> +        if (unlikely(r))
>>> +            return r;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index b17734e..dc73f11 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
>>> *bo, u32 domain,
>>>           dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>>           goto error;
>>>       }
>>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>> +    if (unlikely(r)) {
>>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>>> +        goto error;
>>> +    }
>>>         bo->pin_count = 1;
>>>       if (gpu_addr != NULL)
>>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, 
>>> struct fence *fence,
>>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>>   {
>>>       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>>                !bo->pin_count);
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 4337b3a..c294aa9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct 
>>> ttm_buffer_object *bo,
>>>       new_start = (u64)new_mem->start << PAGE_SHIFT;
>>>         switch (old_mem->mem_type) {
>>> -    case TTM_PL_VRAM:
>>>       case TTM_PL_TT:
>>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>>> +        if (r)
>>> +            return r;
>>> +
>>> +    case TTM_PL_VRAM:
>>>           old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>>           break;
>>>       default:
>>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct 
>>> ttm_buffer_object *bo,
>>>           return -EINVAL;
>>>       }
>>>       switch (new_mem->mem_type) {
>>> -    case TTM_PL_VRAM:
>>>       case TTM_PL_TT:
>>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>>> +        if (r)
>>> +            return r;
>>> +
>>> +    case TTM_PL_VRAM:
>>>           new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>>           break;
>>>       default:
>>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt 
>>> *ttm,
>>>                      struct ttm_mem_reg *bo_mem)
>>>   {
>>>       struct amdgpu_ttm_tt *gtt = (void*)ttm;
>>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>       int r;
>>>         if (gtt->userptr) {
>>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct 
>>> ttm_tt *ttm,
>>>           bo_mem->mem_type == AMDGPU_PL_OA)
>>>           return -EINVAL;
>>>   +    return 0;
>>> +}
>>> +
>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>>> +{
>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>> +
>>> +    return gtt && !list_empty(&gtt->list);
>>> +}
>>> +
>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>>> +{
>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>> +    uint32_t flags;
>>> +    int r;
>>> +
>>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>>> +        return 0;
>>> +
>>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>>           ttm->pages, gtt->ttm.dma_address, flags);
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> index 72f6bfc..214bae9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>               struct fence **fence);
>>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>>> +
>>>   #endif
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> index 5888e8a..fab7bb1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct 
>>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>>           return -EINVAL;
>>>       }
>>>   +    r = amdgpu_cs_sysvm_access_required(parser);
>>> +    if (r)
>>> +        return r;
>>> +
>>>       ctx.parser = parser;
>>>       ctx.buf_sizes = buf_sizes;
>>>       ctx.ib_idx = ib_idx;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>> index 9b71d6c..b0c6702 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct 
>>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>>       uint32_t allocated = 0;
>>>       uint32_t tmp, handle = 0;
>>>       uint32_t *size = &tmp;
>>> -    int i, r = 0, idx = 0;
>>> +    int i, r, idx = 0;
>>> +
>>> +    r = amdgpu_cs_sysvm_access_required(p);
>>> +    if (r)
>>> +        return r;
>>>         while (idx < ib->length_dw) {
>>>           uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 7660f82..a549abd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev,
>>>       }
>>>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
>>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>>> +        adev == bo_va->bo->adev) ? flags : 0;
>>>         spin_lock(&vm->status_lock);
>>>       if (!list_empty(&bo_va->vm_status))
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]                 ` <57CFBF51.3000506-5C7GfCeVMHo@public.gmane.org>
@ 2016-09-07  8:18                   ` Christian König
       [not found]                     ` <24352e53-0c1c-8b55-0b39-35b3e5d350ff-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2016-09-07  8:18 UTC (permalink / raw)
  To: zhoucm1, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 07.09.2016 um 09:18 schrieb zhoucm1:
>
>
> On 2016年09月06日 18:48, Christian König wrote:
>> Am 06.09.2016 um 11:53 schrieb zhoucm1:
>>>
>>>
>>> On 2016年09月06日 17:41, Christian König wrote:
>>>> From: Christian König <christian.koenig@amd.com>
>>>>
>>>> We don't really need the GTT table any more most of the time.
>>> Why? I thought GTT bo is always needed to be bound when GPU is 
>>> trying to access it, doesn't it?
>>
>> We only need it to be bound when we try to access it from the system 
>> VM (UVD/VCE, ring buffers, fences etc...).
>>
>> The VMs for each process can work without it and it is just overhead 
>> to bind it all the time.
> Yes, I got your mean, copying pte from GART to VM confused me, I 
> thought binding GTT table is a must, that's wrong.
> Go though vm code again, pte can be created by amdgpu_vm_map_gart if 
> no GTT pte source when we don't bind it at all under VM process, am I 
> right now?

Yes exactly. I still need to measure the performance impact of that, 
adding the copy from the GTT made quite a difference when I introduced that.

I hope that doing the write when the update the VM is still faster than 
doing the GTT write.

Regards,
Christian.

>
> Regards,
> David Zhou
>
>>
>> Regards,
>> Christian.
>>
>>
>>>
>>> Regards,
>>> David Zhou
>>>>   So bind it
>>>> only on demand.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 
>>>> ++++++++++++++++++++++++++++--
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 
>>>> ++++++++++++++++++++++++++---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 9d39fa8..28d4a67 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct 
>>>> amdgpu_device *adev) { }
>>>>   struct amdgpu_bo_va_mapping *
>>>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>>>                  uint64_t addr, struct amdgpu_bo **bo);
>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>>>     #if defined(CONFIG_DRM_AMD_DAL)
>>>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index 20a1962..e069978 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct 
>>>> amdgpu_cs_parser *p,
>>>>           }
>>>>       }
>>>>   -    if (p->uf_entry.robj)
>>>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>>>> +    if (!r && p->uf_entry.robj) {
>>>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>>>> +
>>>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>>>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>>> +    }
>>>>     error_validate:
>>>>       if (r) {
>>>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct 
>>>> amdgpu_cs_parser *parser,
>>>>         return NULL;
>>>>   }
>>>> +
>>>> +/**
>>>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the 
>>>> system VM
>>>> + *
>>>> + * @parser: command submission parser context
>>>> + *
>>>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible 
>>>> by the system VM.
>>>> + */
>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>>>> +{
>>>> +    unsigned i;
>>>> +    int r;
>>>> +
>>>> +    if (!parser->bo_list)
>>>> +        return 0;
>>>> +
>>>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>>>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>>>> +
>>>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>> +        if (unlikely(r))
>>>> +            return r;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> index b17734e..dc73f11 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
>>>> *bo, u32 domain,
>>>>           dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>>>           goto error;
>>>>       }
>>>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>> +    if (unlikely(r)) {
>>>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>>>> +        goto error;
>>>> +    }
>>>>         bo->pin_count = 1;
>>>>       if (gpu_addr != NULL)
>>>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, 
>>>> struct fence *fence,
>>>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>>>   {
>>>>       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>>>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>>>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>>>                !bo->pin_count);
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index 4337b3a..c294aa9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct 
>>>> ttm_buffer_object *bo,
>>>>       new_start = (u64)new_mem->start << PAGE_SHIFT;
>>>>         switch (old_mem->mem_type) {
>>>> -    case TTM_PL_VRAM:
>>>>       case TTM_PL_TT:
>>>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>>>> +        if (r)
>>>> +            return r;
>>>> +
>>>> +    case TTM_PL_VRAM:
>>>>           old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>>>           break;
>>>>       default:
>>>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct 
>>>> ttm_buffer_object *bo,
>>>>           return -EINVAL;
>>>>       }
>>>>       switch (new_mem->mem_type) {
>>>> -    case TTM_PL_VRAM:
>>>>       case TTM_PL_TT:
>>>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>>>> +        if (r)
>>>> +            return r;
>>>> +
>>>> +    case TTM_PL_VRAM:
>>>>           new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>>>           break;
>>>>       default:
>>>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct 
>>>> ttm_tt *ttm,
>>>>                      struct ttm_mem_reg *bo_mem)
>>>>   {
>>>>       struct amdgpu_ttm_tt *gtt = (void*)ttm;
>>>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>>       int r;
>>>>         if (gtt->userptr) {
>>>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct 
>>>> ttm_tt *ttm,
>>>>           bo_mem->mem_type == AMDGPU_PL_OA)
>>>>           return -EINVAL;
>>>>   +    return 0;
>>>> +}
>>>> +
>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>>>> +{
>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>> +
>>>> +    return gtt && !list_empty(&gtt->list);
>>>> +}
>>>> +
>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>>>> +{
>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>> +    uint32_t flags;
>>>> +    int r;
>>>> +
>>>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>>>> +        return 0;
>>>> +
>>>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>>       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>>>           ttm->pages, gtt->ttm.dma_address, flags);
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> index 72f6bfc..214bae9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>               struct fence **fence);
>>>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>>>> +
>>>>   #endif
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> index 5888e8a..fab7bb1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct 
>>>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>>>           return -EINVAL;
>>>>       }
>>>>   +    r = amdgpu_cs_sysvm_access_required(parser);
>>>> +    if (r)
>>>> +        return r;
>>>> +
>>>>       ctx.parser = parser;
>>>>       ctx.buf_sizes = buf_sizes;
>>>>       ctx.ib_idx = ib_idx;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> index 9b71d6c..b0c6702 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct 
>>>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>>>       uint32_t allocated = 0;
>>>>       uint32_t tmp, handle = 0;
>>>>       uint32_t *size = &tmp;
>>>> -    int i, r = 0, idx = 0;
>>>> +    int i, r, idx = 0;
>>>> +
>>>> +    r = amdgpu_cs_sysvm_access_required(p);
>>>> +    if (r)
>>>> +        return r;
>>>>         while (idx < ib->length_dw) {
>>>>           uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 7660f82..a549abd 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>> *adev,
>>>>       }
>>>>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, 
>>>> mem);
>>>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>>>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>>>> +        adev == bo_va->bo->adev) ? flags : 0;
>>>>         spin_lock(&vm->status_lock);
>>>>       if (!list_empty(&bo_va->vm_status))
>>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]     ` <1473154866-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2016-09-06  9:53       ` zhoucm1
@ 2016-09-07 14:32       ` Deucher, Alexander
  2016-09-17 21:14       ` Marek Olšák
  2 siblings, 0 replies; 16+ messages in thread
From: Deucher, Alexander @ 2016-09-07 14:32 UTC (permalink / raw)
  To: 'Christian König', amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Christian König
> Sent: Tuesday, September 06, 2016 5:41 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH 2/2] drm/amdgpu: bind GTT on demand
> 
> From: Christian König <christian.koenig@amd.com>
> 
> We don't really need the GTT table any more most of the time. So bind it
> only on demand.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

For the series:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34
> ++++++++++++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33
> ++++++++++++++++++++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>  8 files changed, 84 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9d39fa8..28d4a67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct
> amdgpu_device *adev) { }
>  struct amdgpu_bo_va_mapping *
>  amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>  		       uint64_t addr, struct amdgpu_bo **bo);
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
> 
>  #if defined(CONFIG_DRM_AMD_DAL)
>  int amdgpu_dm_display_resume(struct amdgpu_device *adev );
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20a1962..e069978 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct
> amdgpu_cs_parser *p,
>  		}
>  	}
> 
> -	if (p->uf_entry.robj)
> -		p->job->uf_addr += amdgpu_bo_gpu_offset(p-
> >uf_entry.robj);
> +	if (!r && p->uf_entry.robj) {
> +		struct amdgpu_bo *uf = p->uf_entry.robj;
> +
> +		r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
> +		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
> +	}
> 
>  error_validate:
>  	if (r) {
> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct
> amdgpu_cs_parser *parser,
> 
>  	return NULL;
>  }
> +
> +/**
> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the
> system VM
> + *
> + * @parser: command submission parser context
> + *
> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the
> system VM.
> + */
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
> +{
> +	unsigned i;
> +	int r;
> +
> +	if (!parser->bo_list)
> +		return 0;
> +
> +	for (i = 0; i < parser->bo_list->num_entries; i++) {
> +		struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
> +
> +		r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +		if (unlikely(r))
> +			return r;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b17734e..dc73f11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo
> *bo, u32 domain,
>  		dev_err(bo->adev->dev, "%p pin failed\n", bo);
>  		goto error;
>  	}
> +	r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +	if (unlikely(r)) {
> +		dev_err(bo->adev->dev, "%p bind failed\n", bo);
> +		goto error;
> +	}
> 
>  	bo->pin_count = 1;
>  	if (gpu_addr != NULL)
> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo,
> struct fence *fence,
>  u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>  {
>  	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
> +	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
> +		     !amdgpu_ttm_is_bound(bo->tbo.ttm));
>  	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>  		     !bo->pin_count);
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4337b3a..c294aa9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct
> ttm_buffer_object *bo,
>  	new_start = (u64)new_mem->start << PAGE_SHIFT;
> 
>  	switch (old_mem->mem_type) {
> -	case TTM_PL_VRAM:
>  	case TTM_PL_TT:
> +		r = amdgpu_ttm_bind(bo->ttm, old_mem);
> +		if (r)
> +			return r;
> +
> +	case TTM_PL_VRAM:
>  		old_start += bo->bdev->man[old_mem-
> >mem_type].gpu_offset;
>  		break;
>  	default:
> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct
> ttm_buffer_object *bo,
>  		return -EINVAL;
>  	}
>  	switch (new_mem->mem_type) {
> -	case TTM_PL_VRAM:
>  	case TTM_PL_TT:
> +		r = amdgpu_ttm_bind(bo->ttm, new_mem);
> +		if (r)
> +			return r;
> +
> +	case TTM_PL_VRAM:
>  		new_start += bo->bdev->man[new_mem-
> >mem_type].gpu_offset;
>  		break;
>  	default:
> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt
> *ttm,
>  				   struct ttm_mem_reg *bo_mem)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void*)ttm;
> -	uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm,
> bo_mem);
>  	int r;
> 
>  	if (gtt->userptr) {
> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt
> *ttm,
>  	    bo_mem->mem_type == AMDGPU_PL_OA)
>  		return -EINVAL;
> 
> +	return 0;
> +}
> +
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
> +{
> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +
> +	return gtt && !list_empty(&gtt->list);
> +}
> +
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
> +{
> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +	uint32_t flags;
> +	int r;
> +
> +	if (!ttm || amdgpu_ttm_is_bound(ttm))
> +		return 0;
> +
> +	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>  	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>  		ttm->pages, gtt->ttm.dma_address, flags);
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 72f6bfc..214bae9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>  			struct fence **fence);
> 
>  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 5888e8a..fab7bb1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct
> amdgpu_cs_parser *parser, uint32_t ib_idx)
>  		return -EINVAL;
>  	}
> 
> +	r = amdgpu_cs_sysvm_access_required(parser);
> +	if (r)
> +		return r;
> +
>  	ctx.parser = parser;
>  	ctx.buf_sizes = buf_sizes;
>  	ctx.ib_idx = ib_idx;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 9b71d6c..b0c6702 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct
> amdgpu_cs_parser *p, uint32_t ib_idx)
>  	uint32_t allocated = 0;
>  	uint32_t tmp, handle = 0;
>  	uint32_t *size = &tmp;
> -	int i, r = 0, idx = 0;
> +	int i, r, idx = 0;
> +
> +	r = amdgpu_cs_sysvm_access_required(p);
> +	if (r)
> +		return r;
> 
>  	while (idx < ib->length_dw) {
>  		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 7660f82..a549abd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
> *adev,
>  	}
> 
>  	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
> -	gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
> +	gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
> +		adev == bo_va->bo->adev) ? flags : 0;
> 
>  	spin_lock(&vm->status_lock);
>  	if (!list_empty(&bo_va->vm_status))
> --
> 2.5.0
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]                     ` <24352e53-0c1c-8b55-0b39-35b3e5d350ff-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-09-07 16:07                       ` Felix Kuehling
       [not found]                         ` <2a4f24fa-6873-7c7f-486f-ef4f29efa63a-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Felix Kuehling @ 2016-09-07 16:07 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Yeah, you save yourself the GTT update, which always uses the CPU.
Instead you write the page table entries to IBs using the CPU. Will be
interesting to see if that's a net speed-up or slow-down, if the
difference is significant at all.

The patch looks good to me. Thanks for tackling this. I didn't review it
too thoroughly, and if you forgot to map to GART somewhere important, I
would probably have missed it.

Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>

Once we pull this change into the KFD branch, we can probably revert our
ridiculous GART size without losing the ability to map lots of system
memory to GPUVM. :)

Regards,
  Felix

On 16-09-07 04:18 AM, Christian König wrote:
> Am 07.09.2016 um 09:18 schrieb zhoucm1:
>>
>>
>> On 2016年09月06日 18:48, Christian König wrote:
>>> Am 06.09.2016 um 11:53 schrieb zhoucm1:
>>>>
>>>>
>>>> On 2016年09月06日 17:41, Christian König wrote:
>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>
>>>>> We don't really need the GTT table any more most of the time.
>>>> Why? I thought GTT bo is always needed to be bound when GPU is
>>>> trying to access it, doesn't it?
>>>
>>> We only need it to be bound when we try to access it from the system
>>> VM (UVD/VCE, ring buffers, fences etc...).
>>>
>>> The VMs for each process can work without it and it is just overhead
>>> to bind it all the time.
>> Yes, I got your mean, copying pte from GART to VM confused me, I
>> thought binding GTT table is a must, that's wrong.
>> Go though vm code again, pte can be created by amdgpu_vm_map_gart if
>> no GTT pte source when we don't bind it at all under VM process, am I
>> right now?
>
> Yes exactly. I still need to measure the performance impact of that,
> adding the copy from the GTT made quite a difference when I introduced
> that.
>
> I hope that doing the write when the update the VM is still faster
> than doing the GTT write.
>
> Regards,
> Christian.
>
>>
>> Regards,
>> David Zhou
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>
>>>>
>>>> Regards,
>>>> David Zhou
>>>>>   So bind it
>>>>> only on demand.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34
>>>>> ++++++++++++++++++++++++++++--
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33
>>>>> ++++++++++++++++++++++++++---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>>>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> index 9d39fa8..28d4a67 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct
>>>>> amdgpu_device *adev) { }
>>>>>   struct amdgpu_bo_va_mapping *
>>>>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>>>>                  uint64_t addr, struct amdgpu_bo **bo);
>>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser
>>>>> *parser);
>>>>>     #if defined(CONFIG_DRM_AMD_DAL)
>>>>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> index 20a1962..e069978 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct
>>>>> amdgpu_cs_parser *p,
>>>>>           }
>>>>>       }
>>>>>   -    if (p->uf_entry.robj)
>>>>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>>>>> +    if (!r && p->uf_entry.robj) {
>>>>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>>>>> +
>>>>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>>>>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>>>> +    }
>>>>>     error_validate:
>>>>>       if (r) {
>>>>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct
>>>>> amdgpu_cs_parser *parser,
>>>>>         return NULL;
>>>>>   }
>>>>> +
>>>>> +/**
>>>>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the
>>>>> system VM
>>>>> + *
>>>>> + * @parser: command submission parser context
>>>>> + *
>>>>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible
>>>>> by the system VM.
>>>>> + */
>>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>>>>> +{
>>>>> +    unsigned i;
>>>>> +    int r;
>>>>> +
>>>>> +    if (!parser->bo_list)
>>>>> +        return 0;
>>>>> +
>>>>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>>>>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>>>>> +
>>>>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>>> +        if (unlikely(r))
>>>>> +            return r;
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> index b17734e..dc73f11 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo
>>>>> *bo, u32 domain,
>>>>>           dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>>>>           goto error;
>>>>>       }
>>>>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>>> +    if (unlikely(r)) {
>>>>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>>>>> +        goto error;
>>>>> +    }
>>>>>         bo->pin_count = 1;
>>>>>       if (gpu_addr != NULL)
>>>>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo,
>>>>> struct fence *fence,
>>>>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>>>>   {
>>>>>       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>>>>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>>>>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>>>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>>>>                !bo->pin_count);
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> index 4337b3a..c294aa9 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct
>>>>> ttm_buffer_object *bo,
>>>>>       new_start = (u64)new_mem->start << PAGE_SHIFT;
>>>>>         switch (old_mem->mem_type) {
>>>>> -    case TTM_PL_VRAM:
>>>>>       case TTM_PL_TT:
>>>>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +
>>>>> +    case TTM_PL_VRAM:
>>>>>           old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>>>>           break;
>>>>>       default:
>>>>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct
>>>>> ttm_buffer_object *bo,
>>>>>           return -EINVAL;
>>>>>       }
>>>>>       switch (new_mem->mem_type) {
>>>>> -    case TTM_PL_VRAM:
>>>>>       case TTM_PL_TT:
>>>>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +
>>>>> +    case TTM_PL_VRAM:
>>>>>           new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>>>>           break;
>>>>>       default:
>>>>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct
>>>>> ttm_tt *ttm,
>>>>>                      struct ttm_mem_reg *bo_mem)
>>>>>   {
>>>>>       struct amdgpu_ttm_tt *gtt = (void*)ttm;
>>>>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm,
>>>>> bo_mem);
>>>>>       int r;
>>>>>         if (gtt->userptr) {
>>>>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct
>>>>> ttm_tt *ttm,
>>>>>           bo_mem->mem_type == AMDGPU_PL_OA)
>>>>>           return -EINVAL;
>>>>>   +    return 0;
>>>>> +}
>>>>> +
>>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>>>>> +{
>>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>>> +
>>>>> +    return gtt && !list_empty(&gtt->list);
>>>>> +}
>>>>> +
>>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>>>>> +{
>>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>>> +    uint32_t flags;
>>>>> +    int r;
>>>>> +
>>>>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>>>>> +        return 0;
>>>>> +
>>>>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>>>       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>>>>           ttm->pages, gtt->ttm.dma_address, flags);
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> index 72f6bfc..214bae9 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>>               struct fence **fence);
>>>>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>>>>> +
>>>>>   #endif
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> index 5888e8a..fab7bb1 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct
>>>>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>>>>           return -EINVAL;
>>>>>       }
>>>>>   +    r = amdgpu_cs_sysvm_access_required(parser);
>>>>> +    if (r)
>>>>> +        return r;
>>>>> +
>>>>>       ctx.parser = parser;
>>>>>       ctx.buf_sizes = buf_sizes;
>>>>>       ctx.ib_idx = ib_idx;
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> index 9b71d6c..b0c6702 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct
>>>>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>>>>       uint32_t allocated = 0;
>>>>>       uint32_t tmp, handle = 0;
>>>>>       uint32_t *size = &tmp;
>>>>> -    int i, r = 0, idx = 0;
>>>>> +    int i, r, idx = 0;
>>>>> +
>>>>> +    r = amdgpu_cs_sysvm_access_required(p);
>>>>> +    if (r)
>>>>> +        return r;
>>>>>         while (idx < ib->length_dw) {
>>>>>           uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 7660f82..a549abd 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>> *adev,
>>>>>       }
>>>>>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm,
>>>>> mem);
>>>>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>>>>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>>>>> +        adev == bo_va->bo->adev) ? flags : 0;
>>>>>         spin_lock(&vm->status_lock);
>>>>>       if (!list_empty(&bo_va->vm_status))
>>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]                         ` <2a4f24fa-6873-7c7f-486f-ef4f29efa63a-5C7GfCeVMHo@public.gmane.org>
@ 2016-09-07 16:26                           ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2016-09-07 16:26 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> The patch looks good to me. Thanks for tackling this. I didn't review it
> too thoroughly, and if you forgot to map to GART somewhere important, I
> would probably have missed it.

Yeah that's also my biggest concern, but that should at least raise a 
WARN_ON().

> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Thanks.

> Once we pull this change into the KFD branch, we can probably revert our
> ridiculous GART size without losing the ability to map lots of system
> memory to GPUVM. :)
Wait a second with that. You also need my second part of that work where 
I also stop assigning address space to all those buffers which aren't 
mapped.

But I'm still working on that.

Regards,
Christian.

Am 07.09.2016 um 18:07 schrieb Felix Kuehling:
> Yeah, you save yourself the GTT update, which always uses the CPU.
> Instead you write the page table entries to IBs using the CPU. Will be
> interesting to see if that's a net speed-up or slow-down, if the
> difference is significant at all.
>
> The patch looks good to me. Thanks for tackling this. I didn't review it
> too thoroughly, and if you forgot to map to GART somewhere important, I
> would probably have missed it.
>
> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
>
> Once we pull this change into the KFD branch, we can probably revert our
> ridiculous GART size without losing the ability to map lots of system
> memory to GPUVM. :)
>
> Regards,
>    Felix
>
> On 16-09-07 04:18 AM, Christian König wrote:
>> Am 07.09.2016 um 09:18 schrieb zhoucm1:
>>>
>>> On 2016年09月06日 18:48, Christian König wrote:
>>>> Am 06.09.2016 um 11:53 schrieb zhoucm1:
>>>>>
>>>>> On 2016年09月06日 17:41, Christian König wrote:
>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>
>>>>>> We don't really need the GTT table any more most of the time.
>>>>> Why? I thought GTT bo is always needed to be bound when GPU is
>>>>> trying to access it, doesn't it?
>>>> We only need it to be bound when we try to access it from the system
>>>> VM (UVD/VCE, ring buffers, fences etc...).
>>>>
>>>> The VMs for each process can work without it and it is just overhead
>>>> to bind it all the time.
>>> Yes, I got your mean, copying pte from GART to VM confused me, I
>>> thought binding GTT table is a must, that's wrong.
>>> Go though vm code again, pte can be created by amdgpu_vm_map_gart if
>>> no GTT pte source when we don't bind it at all under VM process, am I
>>> right now?
>> Yes exactly. I still need to measure the performance impact of that,
>> adding the copy from the GTT made quite a difference when I introduced
>> that.
>>
>> I hope that doing the write when the update the VM is still faster
>> than doing the GTT write.
>>
>> Regards,
>> Christian.
>>
>>> Regards,
>>> David Zhou
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>
>>>>> Regards,
>>>>> David Zhou
>>>>>>    So bind it
>>>>>> only on demand.
>>>>>>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> ---
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34
>>>>>> ++++++++++++++++++++++++++++--
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33
>>>>>> ++++++++++++++++++++++++++---
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>>>>>    8 files changed, 84 insertions(+), 7 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> index 9d39fa8..28d4a67 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct
>>>>>> amdgpu_device *adev) { }
>>>>>>    struct amdgpu_bo_va_mapping *
>>>>>>    amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>>>>>                   uint64_t addr, struct amdgpu_bo **bo);
>>>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser
>>>>>> *parser);
>>>>>>      #if defined(CONFIG_DRM_AMD_DAL)
>>>>>>    int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>>> index 20a1962..e069978 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct
>>>>>> amdgpu_cs_parser *p,
>>>>>>            }
>>>>>>        }
>>>>>>    -    if (p->uf_entry.robj)
>>>>>> -        p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>>>>>> +    if (!r && p->uf_entry.robj) {
>>>>>> +        struct amdgpu_bo *uf = p->uf_entry.robj;
>>>>>> +
>>>>>> +        r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>>>>>> +        p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>>>>>> +    }
>>>>>>      error_validate:
>>>>>>        if (r) {
>>>>>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct
>>>>>> amdgpu_cs_parser *parser,
>>>>>>          return NULL;
>>>>>>    }
>>>>>> +
>>>>>> +/**
>>>>>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the
>>>>>> system VM
>>>>>> + *
>>>>>> + * @parser: command submission parser context
>>>>>> + *
>>>>>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible
>>>>>> by the system VM.
>>>>>> + */
>>>>>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>>>>>> +{
>>>>>> +    unsigned i;
>>>>>> +    int r;
>>>>>> +
>>>>>> +    if (!parser->bo_list)
>>>>>> +        return 0;
>>>>>> +
>>>>>> +    for (i = 0; i < parser->bo_list->num_entries; i++) {
>>>>>> +        struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>>>>>> +
>>>>>> +        r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>>>> +        if (unlikely(r))
>>>>>> +            return r;
>>>>>> +    }
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>>> index b17734e..dc73f11 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>>>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo
>>>>>> *bo, u32 domain,
>>>>>>            dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>>>>>            goto error;
>>>>>>        }
>>>>>> +    r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>>>>>> +    if (unlikely(r)) {
>>>>>> +        dev_err(bo->adev->dev, "%p bind failed\n", bo);
>>>>>> +        goto error;
>>>>>> +    }
>>>>>>          bo->pin_count = 1;
>>>>>>        if (gpu_addr != NULL)
>>>>>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo,
>>>>>> struct fence *fence,
>>>>>>    u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>>>>>    {
>>>>>>        WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>>>>>> +    WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>>>>>> +             !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>>>>> WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>>>>>                 !bo->pin_count);
>>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>>> index 4337b3a..c294aa9 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>>>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct
>>>>>> ttm_buffer_object *bo,
>>>>>>        new_start = (u64)new_mem->start << PAGE_SHIFT;
>>>>>>          switch (old_mem->mem_type) {
>>>>>> -    case TTM_PL_VRAM:
>>>>>>        case TTM_PL_TT:
>>>>>> +        r = amdgpu_ttm_bind(bo->ttm, old_mem);
>>>>>> +        if (r)
>>>>>> +            return r;
>>>>>> +
>>>>>> +    case TTM_PL_VRAM:
>>>>>>            old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>>>>>            break;
>>>>>>        default:
>>>>>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct
>>>>>> ttm_buffer_object *bo,
>>>>>>            return -EINVAL;
>>>>>>        }
>>>>>>        switch (new_mem->mem_type) {
>>>>>> -    case TTM_PL_VRAM:
>>>>>>        case TTM_PL_TT:
>>>>>> +        r = amdgpu_ttm_bind(bo->ttm, new_mem);
>>>>>> +        if (r)
>>>>>> +            return r;
>>>>>> +
>>>>>> +    case TTM_PL_VRAM:
>>>>>>            new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>>>>>            break;
>>>>>>        default:
>>>>>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct
>>>>>> ttm_tt *ttm,
>>>>>>                       struct ttm_mem_reg *bo_mem)
>>>>>>    {
>>>>>>        struct amdgpu_ttm_tt *gtt = (void*)ttm;
>>>>>> -    uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm,
>>>>>> bo_mem);
>>>>>>        int r;
>>>>>>          if (gtt->userptr) {
>>>>>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct
>>>>>> ttm_tt *ttm,
>>>>>>            bo_mem->mem_type == AMDGPU_PL_OA)
>>>>>>            return -EINVAL;
>>>>>>    +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>>>>>> +{
>>>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>>>> +
>>>>>> +    return gtt && !list_empty(&gtt->list);
>>>>>> +}
>>>>>> +
>>>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>>>>>> +{
>>>>>> +    struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>>>>> +    uint32_t flags;
>>>>>> +    int r;
>>>>>> +
>>>>>> +    if (!ttm || amdgpu_ttm_is_bound(ttm))
>>>>>> +        return 0;
>>>>>> +
>>>>>> +    flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>>>>>        r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>>>>>            ttm->pages, gtt->ttm.dma_address, flags);
>>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>>> index 72f6bfc..214bae9 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>>>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>>>                struct fence **fence);
>>>>>>      int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>>>>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>>>>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>>>>>> +
>>>>>>    #endif
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>>> index 5888e8a..fab7bb1 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>>>>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct
>>>>>> amdgpu_cs_parser *parser, uint32_t ib_idx)
>>>>>>            return -EINVAL;
>>>>>>        }
>>>>>>    +    r = amdgpu_cs_sysvm_access_required(parser);
>>>>>> +    if (r)
>>>>>> +        return r;
>>>>>> +
>>>>>>        ctx.parser = parser;
>>>>>>        ctx.buf_sizes = buf_sizes;
>>>>>>        ctx.ib_idx = ib_idx;
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>>> index 9b71d6c..b0c6702 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>>>>>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct
>>>>>> amdgpu_cs_parser *p, uint32_t ib_idx)
>>>>>>        uint32_t allocated = 0;
>>>>>>        uint32_t tmp, handle = 0;
>>>>>>        uint32_t *size = &tmp;
>>>>>> -    int i, r = 0, idx = 0;
>>>>>> +    int i, r, idx = 0;
>>>>>> +
>>>>>> +    r = amdgpu_cs_sysvm_access_required(p);
>>>>>> +    if (r)
>>>>>> +        return r;
>>>>>>          while (idx < ib->length_dw) {
>>>>>>            uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index 7660f82..a549abd 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>> *adev,
>>>>>>        }
>>>>>>          flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm,
>>>>>> mem);
>>>>>> -    gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>>>>>> +    gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>>>>>> +        adev == bo_va->bo->adev) ? flags : 0;
>>>>>>          spin_lock(&vm->status_lock);
>>>>>>        if (!list_empty(&bo_va->vm_status))
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]     ` <1473154866-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2016-09-06  9:53       ` zhoucm1
  2016-09-07 14:32       ` Deucher, Alexander
@ 2016-09-17 21:14       ` Marek Olšák
       [not found]         ` <CAAxE2A4p=fnpwRsXDPhNW518uQqFoDzTxxhYg4_+yzJvhWOoBQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2 siblings, 1 reply; 16+ messages in thread
From: Marek Olšák @ 2016-09-17 21:14 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx mailing list

This breaks Tonga such that it hangs. Reproducible quickly with:

R600_DEBUG=testdma glxgears

It's a randomized test that runs forever. It should hang within 2 seconds.

Marek

On Tue, Sep 6, 2016 at 11:41 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> We don't really need the GTT table any more most of the time. So bind it
> only on demand.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 ++++++++++++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 ++++++++++++++++++++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>  8 files changed, 84 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9d39fa8..28d4a67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
>  struct amdgpu_bo_va_mapping *
>  amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>                        uint64_t addr, struct amdgpu_bo **bo);
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>
>  #if defined(CONFIG_DRM_AMD_DAL)
>  int amdgpu_dm_display_resume(struct amdgpu_device *adev );
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20a1962..e069978 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>                 }
>         }
>
> -       if (p->uf_entry.robj)
> -               p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
> +       if (!r && p->uf_entry.robj) {
> +               struct amdgpu_bo *uf = p->uf_entry.robj;
> +
> +               r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
> +               p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
> +       }
>
>  error_validate:
>         if (r) {
> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>
>         return NULL;
>  }
> +
> +/**
> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
> + *
> + * @parser: command submission parser context
> + *
> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
> + */
> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
> +{
> +       unsigned i;
> +       int r;
> +
> +       if (!parser->bo_list)
> +               return 0;
> +
> +       for (i = 0; i < parser->bo_list->num_entries; i++) {
> +               struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
> +
> +               r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +               if (unlikely(r))
> +                       return r;
> +       }
> +
> +       return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b17734e..dc73f11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>                 dev_err(bo->adev->dev, "%p pin failed\n", bo);
>                 goto error;
>         }
> +       r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
> +       if (unlikely(r)) {
> +               dev_err(bo->adev->dev, "%p bind failed\n", bo);
> +               goto error;
> +       }
>
>         bo->pin_count = 1;
>         if (gpu_addr != NULL)
> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
>  u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>  {
>         WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
> +       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
> +                    !amdgpu_ttm_is_bound(bo->tbo.ttm));
>         WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>                      !bo->pin_count);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4337b3a..c294aa9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>         new_start = (u64)new_mem->start << PAGE_SHIFT;
>
>         switch (old_mem->mem_type) {
> -       case TTM_PL_VRAM:
>         case TTM_PL_TT:
> +               r = amdgpu_ttm_bind(bo->ttm, old_mem);
> +               if (r)
> +                       return r;
> +
> +       case TTM_PL_VRAM:
>                 old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>                 break;
>         default:
> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>                 return -EINVAL;
>         }
>         switch (new_mem->mem_type) {
> -       case TTM_PL_VRAM:
>         case TTM_PL_TT:
> +               r = amdgpu_ttm_bind(bo->ttm, new_mem);
> +               if (r)
> +                       return r;
> +
> +       case TTM_PL_VRAM:
>                 new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>                 break;
>         default:
> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>                                    struct ttm_mem_reg *bo_mem)
>  {
>         struct amdgpu_ttm_tt *gtt = (void*)ttm;
> -       uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>         int r;
>
>         if (gtt->userptr) {
> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>             bo_mem->mem_type == AMDGPU_PL_OA)
>                 return -EINVAL;
>
> +       return 0;
> +}
> +
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
> +{
> +       struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +
> +       return gtt && !list_empty(&gtt->list);
> +}
> +
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
> +{
> +       struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +       uint32_t flags;
> +       int r;
> +
> +       if (!ttm || amdgpu_ttm_is_bound(ttm))
> +               return 0;
> +
> +       flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>         r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>                 ttm->pages, gtt->ttm.dma_address, flags);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 72f6bfc..214bae9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>                         struct fence **fence);
>
>  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 5888e8a..fab7bb1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
>                 return -EINVAL;
>         }
>
> +       r = amdgpu_cs_sysvm_access_required(parser);
> +       if (r)
> +               return r;
> +
>         ctx.parser = parser;
>         ctx.buf_sizes = buf_sizes;
>         ctx.ib_idx = ib_idx;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index 9b71d6c..b0c6702 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
>         uint32_t allocated = 0;
>         uint32_t tmp, handle = 0;
>         uint32_t *size = &tmp;
> -       int i, r = 0, idx = 0;
> +       int i, r, idx = 0;
> +
> +       r = amdgpu_cs_sysvm_access_required(p);
> +       if (r)
> +               return r;
>
>         while (idx < ib->length_dw) {
>                 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 7660f82..a549abd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>         }
>
>         flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
> -       gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
> +       gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
> +               adev == bo_va->bo->adev) ? flags : 0;
>
>         spin_lock(&vm->status_lock);
>         if (!list_empty(&bo_va->vm_status))
> --
> 2.5.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]         ` <CAAxE2A4p=fnpwRsXDPhNW518uQqFoDzTxxhYg4_+yzJvhWOoBQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2016-09-18  8:21           ` Christian König
  2016-09-22 21:54           ` Andy Furniss
  1 sibling, 0 replies; 16+ messages in thread
From: Christian König @ 2016-09-18  8:21 UTC (permalink / raw)
  To: Marek Olšák; +Cc: amd-gfx mailing list

Thanks for the note, going to take a look tomorrow.

Probably just another case I've missed.

Christian.

Am 17.09.2016 um 23:14 schrieb Marek Olšák:
> This breaks Tonga such that it hangs. Reproducible quickly with:
>
> R600_DEBUG=testdma glxgears
>
> It's a randomized test that runs forever. It should hang within 2 seconds.
>
> Marek
>
> On Tue, Sep 6, 2016 at 11:41 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> We don't really need the GTT table any more most of the time. So bind it
>> only on demand.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 34 ++++++++++++++++++++++++++++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  7 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 33 ++++++++++++++++++++++++++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  3 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |  6 +++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  3 ++-
>>   8 files changed, 84 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 9d39fa8..28d4a67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
>>   struct amdgpu_bo_va_mapping *
>>   amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>                         uint64_t addr, struct amdgpu_bo **bo);
>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>
>>   #if defined(CONFIG_DRM_AMD_DAL)
>>   int amdgpu_dm_display_resume(struct amdgpu_device *adev );
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 20a1962..e069978 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -640,8 +640,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>                  }
>>          }
>>
>> -       if (p->uf_entry.robj)
>> -               p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
>> +       if (!r && p->uf_entry.robj) {
>> +               struct amdgpu_bo *uf = p->uf_entry.robj;
>> +
>> +               r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem);
>> +               p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
>> +       }
>>
>>   error_validate:
>>          if (r) {
>> @@ -1156,3 +1160,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>
>>          return NULL;
>>   }
>> +
>> +/**
>> + * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
>> + *
>> + * @parser: command submission parser context
>> + *
>> + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
>> + */
>> +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
>> +{
>> +       unsigned i;
>> +       int r;
>> +
>> +       if (!parser->bo_list)
>> +               return 0;
>> +
>> +       for (i = 0; i < parser->bo_list->num_entries; i++) {
>> +               struct amdgpu_bo *bo = parser->bo_list->array[i].robj;
>> +
>> +               r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>> +               if (unlikely(r))
>> +                       return r;
>> +       }
>> +
>> +       return 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index b17734e..dc73f11 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -646,6 +646,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>>                  dev_err(bo->adev->dev, "%p pin failed\n", bo);
>>                  goto error;
>>          }
>> +       r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem);
>> +       if (unlikely(r)) {
>> +               dev_err(bo->adev->dev, "%p bind failed\n", bo);
>> +               goto error;
>> +       }
>>
>>          bo->pin_count = 1;
>>          if (gpu_addr != NULL)
>> @@ -918,6 +923,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
>>   u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
>>   {
>>          WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
>> +       WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
>> +                    !amdgpu_ttm_is_bound(bo->tbo.ttm));
>>          WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
>>                       !bo->pin_count);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 4337b3a..c294aa9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>          new_start = (u64)new_mem->start << PAGE_SHIFT;
>>
>>          switch (old_mem->mem_type) {
>> -       case TTM_PL_VRAM:
>>          case TTM_PL_TT:
>> +               r = amdgpu_ttm_bind(bo->ttm, old_mem);
>> +               if (r)
>> +                       return r;
>> +
>> +       case TTM_PL_VRAM:
>>                  old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
>>                  break;
>>          default:
>> @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
>>                  return -EINVAL;
>>          }
>>          switch (new_mem->mem_type) {
>> -       case TTM_PL_VRAM:
>>          case TTM_PL_TT:
>> +               r = amdgpu_ttm_bind(bo->ttm, new_mem);
>> +               if (r)
>> +                       return r;
>> +
>> +       case TTM_PL_VRAM:
>>                  new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
>>                  break;
>>          default:
>> @@ -639,7 +647,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>>                                     struct ttm_mem_reg *bo_mem)
>>   {
>>          struct amdgpu_ttm_tt *gtt = (void*)ttm;
>> -       uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>          int r;
>>
>>          if (gtt->userptr) {
>> @@ -660,6 +667,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
>>              bo_mem->mem_type == AMDGPU_PL_OA)
>>                  return -EINVAL;
>>
>> +       return 0;
>> +}
>> +
>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
>> +{
>> +       struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +
>> +       return gtt && !list_empty(&gtt->list);
>> +}
>> +
>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
>> +{
>> +       struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +       uint32_t flags;
>> +       int r;
>> +
>> +       if (!ttm || amdgpu_ttm_is_bound(ttm))
>> +               return 0;
>> +
>> +       flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>          r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>>                  ttm->pages, gtt->ttm.dma_address, flags);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 72f6bfc..214bae9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>                          struct fence **fence);
>>
>>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>> +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>> +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> index 5888e8a..fab7bb1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>> @@ -891,6 +891,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
>>                  return -EINVAL;
>>          }
>>
>> +       r = amdgpu_cs_sysvm_access_required(parser);
>> +       if (r)
>> +               return r;
>> +
>>          ctx.parser = parser;
>>          ctx.buf_sizes = buf_sizes;
>>          ctx.ib_idx = ib_idx;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> index 9b71d6c..b0c6702 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>> @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
>>          uint32_t allocated = 0;
>>          uint32_t tmp, handle = 0;
>>          uint32_t *size = &tmp;
>> -       int i, r = 0, idx = 0;
>> +       int i, r, idx = 0;
>> +
>> +       r = amdgpu_cs_sysvm_access_required(p);
>> +       if (r)
>> +               return r;
>>
>>          while (idx < ib->length_dw) {
>>                  uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 7660f82..a549abd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1162,7 +1162,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>          }
>>
>>          flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
>> -       gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
>> +       gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
>> +               adev == bo_va->bo->adev) ? flags : 0;
>>
>>          spin_lock(&vm->status_lock);
>>          if (!list_empty(&bo_va->vm_status))
>> --
>> 2.5.0
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]         ` <CAAxE2A4p=fnpwRsXDPhNW518uQqFoDzTxxhYg4_+yzJvhWOoBQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2016-09-18  8:21           ` Christian König
@ 2016-09-22 21:54           ` Andy Furniss
       [not found]             ` <57E45314.9080903-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 16+ messages in thread
From: Andy Furniss @ 2016-09-22 21:54 UTC (permalink / raw)
  To: Marek Olšák, Christian König; +Cc: amd-gfx mailing list

Marek Olšák wrote:
> This breaks Tonga such that it hangs. Reproducible quickly with:
>
> R600_DEBUG=testdma glxgears
>
> It's a randomized test that runs forever. It should hang within 2 seconds.

So what is the status of this now?

R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
until I, by luck, saw this.

On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.

On every update since it's still hung though now it takes longer.

Todays update tested both on head and reset on to the drm-next tag
still hang.

Going back over older 4.9-wips it's one built on 26th August that is
the first not to hang.



_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]             ` <57E45314.9080903-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-09-23  7:45               ` Christian König
       [not found]                 ` <99035862-cf25-c2e1-4460-78622a539f32-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2016-09-23  7:45 UTC (permalink / raw)
  To: Andy Furniss, Marek Olšák; +Cc: amd-gfx mailing list

Am 22.09.2016 um 23:54 schrieb Andy Furniss:
> Marek Olšák wrote:
>> This breaks Tonga such that it hangs. Reproducible quickly with:
>>
>> R600_DEBUG=testdma glxgears
>>
>> It's a randomized test that runs forever. It should hang within 2 
>> seconds.
>
> So what is the status of this now?

The status is that I'm hammering my head on the desk for two weeks now 
what the heck is going wrong here.

What I've found so far is that a VM update command doesn't have the 
result it should have, but I absolutely don't understand what happens here.

The good news is that it's 100% reproducible and I already found and 
fixed two bugs.

If you have any idea or more testing results what is going wrong here 
please let me know.

If we can't fix this before the 4.9 release we are just going to revert 
the patch causing this.

Regards,
Christian.

>
> R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
> until I, by luck, saw this.
>
> On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.
>
> On every update since it's still hung though now it takes longer.
>
> Todays update tested both on head and reset on to the drm-next tag
> still hang.
>
> Going back over older 4.9-wips it's one built on 26th August that is
> the first not to hang.
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]                 ` <99035862-cf25-c2e1-4460-78622a539f32-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-09-23 15:55                   ` Andy Furniss
       [not found]                     ` <57E55075.203-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Andy Furniss @ 2016-09-23 15:55 UTC (permalink / raw)
  To: Christian König, Marek Olšák; +Cc: amd-gfx mailing list

Christian König wrote:
> Am 22.09.2016 um 23:54 schrieb Andy Furniss:
>> Marek Olšák wrote:
>>> This breaks Tonga such that it hangs. Reproducible quickly with:
>>>
>>> R600_DEBUG=testdma glxgears
>>>
>>> It's a randomized test that runs forever. It should hang within 2
>>> seconds.
>>
>> So what is the status of this now?
>
> The status is that I'm hammering my head on the desk for two weeks now
> what the heck is going wrong here.

Oh, OK, I was just checking no one thought it was fixed really.

>
> What I've found so far is that a VM update command doesn't have the
> result it should have, but I absolutely don't understand what happens here.
>
> The good news is that it's 100% reproducible and I already found and
> fixed two bugs.
>
> If you have any idea or more testing results what is going wrong here
> please let me know.

I guess, you already did it anyway or it doesn't help, but

R600_DEBUG=testdma,check_vm glxgears

Does catch a vmfault and output to ddebug_dumps.
It initially saves me from locking, though I will lock on repeated runs.

>
> If we can't fix this before the 4.9 release we are just going to revert
> the patch causing this.
>
> Regards,
> Christian.
>
>>
>> R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
>> until I, by luck, saw this.
>>
>> On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.
>>
>> On every update since it's still hung though now it takes longer.
>>
>> Todays update tested both on head and reset on to the drm-next tag
>> still hang.
>>
>> Going back over older 4.9-wips it's one built on 26th August that is
>> the first not to hang.
>>
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]                     ` <57E55075.203-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2016-09-23 17:45                       ` Shawn Starr
  2016-09-26 16:33                       ` Andy Furniss
  1 sibling, 0 replies; 16+ messages in thread
From: Shawn Starr @ 2016-09-23 17:45 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Andy Furniss, Christian König, Marek Olšák

On Friday, September 23, 2016 4:55:33 PM EDT Andy Furniss wrote:
> Christian König wrote:
> > Am 22.09.2016 um 23:54 schrieb Andy Furniss:
> >> Marek Olšák wrote:
> >>> This breaks Tonga such that it hangs. Reproducible quickly with:
> >>> 
> >>> R600_DEBUG=testdma glxgears
> >>> 
> >>> It's a randomized test that runs forever. It should hang within 2
> >>> seconds.
> >> 
> >> So what is the status of this now?
> > 
> > The status is that I'm hammering my head on the desk for two weeks now
> > what the heck is going wrong here.
> 
> Oh, OK, I was just checking no one thought it was fixed really.
> 
> > What I've found so far is that a VM update command doesn't have the
> > result it should have, but I absolutely don't understand what happens
> > here.
> > 
> > The good news is that it's 100% reproducible and I already found and
> > fixed two bugs.
> > 
> > If you have any idea or more testing results what is going wrong here
> > please let me know.
> 
> I guess, you already did it anyway or it doesn't help, but
> 
> R600_DEBUG=testdma,check_vm glxgears
> 
> Does catch a vmfault and output to ddebug_dumps.
> It initially saves me from locking, though I will lock on repeated runs.
> 
> > If we can't fix this before the 4.9 release we are just going to revert
> > the patch causing this.
> > 
> > Regards,
> > Christian.
> > 

This is directly related to my thread on [amdgpu][CIK] drm-next-4.9-wip - 
piglit test - max-texture-size - GPU deadlock

Using Linus master kernel w/o drm-next-4.9-wip:
I cannot wedge GPU, the amdgpu kernel driver correctly throws 
errors if I've run out of memory vs crashing GPU.

as CIK doesn't have SDMA enabled in Mesa, I cannot reproduce Andy's test case, 
using max-texture-size can however.

Thanks,
Shawn

> >> R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
> >> until I, by luck, saw this.
> >> 
> >> On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.
> >> 
> >> On every update since it's still hung though now it takes longer.
> >> 
> >> Todays update tested both on head and reset on to the drm-next tag
> >> still hang.
> >> 
> >> Going back over older 4.9-wips it's one built on 26th August that is
> >> the first not to hang.
> >> 
> >> 
> >> 
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: bind GTT on demand
       [not found]                     ` <57E55075.203-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2016-09-23 17:45                       ` Shawn Starr
@ 2016-09-26 16:33                       ` Andy Furniss
  1 sibling, 0 replies; 16+ messages in thread
From: Andy Furniss @ 2016-09-26 16:33 UTC (permalink / raw)
  To: Christian König, Marek Olšák; +Cc: amd-gfx mailing list

With current drm-next-4.9-wip plus  drm/amdgpu: fix addr handling in 
amdgpu_vm_bo_update_mapping

testdma is now working OK for my R9285 - just did 35k tests while 
simultaneously running valley/heaven, plus messing with 
powerplay/cpufreq and all is
good, no lock, no vm faults.


Andy Furniss wrote:
> Christian König wrote:
>> Am 22.09.2016 um 23:54 schrieb Andy Furniss:
>>> Marek Olšák wrote:
>>>> This breaks Tonga such that it hangs. Reproducible quickly with:
>>>>
>>>> R600_DEBUG=testdma glxgears
>>>>
>>>> It's a randomized test that runs forever. It should hang within 2
>>>> seconds.
>>>
>>> So what is the status of this now?
>>
>> The status is that I'm hammering my head on the desk for two weeks now
>> what the heck is going wrong here.
>
> Oh, OK, I was just checking no one thought it was fixed really.
>
>>
>> What I've found so far is that a VM update command doesn't have the
>> result it should have, but I absolutely don't understand what happens
>> here.
>>
>> The good news is that it's 100% reproducible and I already found and
>> fixed two bugs.
>>
>> If you have any idea or more testing results what is going wrong here
>> please let me know.
>
> I guess, you already did it anyway or it doesn't help, but
>
> R600_DEBUG=testdma,check_vm glxgears
>
> Does catch a vmfault and output to ddebug_dumps.
> It initially saves me from locking, though I will lock on repeated runs.
>
>>
>> If we can't fix this before the 4.9 release we are just going to revert
>> the patch causing this.
>>
>> Regards,
>> Christian.
>>
>>>
>>> R600_DEBUG=testdma glxgears isn't a test I've run on my r9285 tonga
>>> until I, by luck, saw this.
>>>
>>> On agddf 4.9-wip at post time it did hang/reboot after 2 seconds.
>>>
>>> On every update since it's still hung though now it takes longer.
>>>
>>> Todays update tested both on head and reset on to the drm-next tag
>>> still hang.
>>>
>>> Going back over older 4.9-wips it's one built on 26th August that is
>>> the first not to hang.
>>>
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-09-26 16:33 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-06  9:41 [PATCH 1/2] drm/amdgpu: fix GTT offset handling Christian König
     [not found] ` <1473154866-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-09-06  9:41   ` [PATCH 2/2] drm/amdgpu: bind GTT on demand Christian König
     [not found]     ` <1473154866-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-09-06  9:53       ` zhoucm1
     [not found]         ` <57CE9220.8060403-5C7GfCeVMHo@public.gmane.org>
2016-09-06 10:48           ` Christian König
     [not found]             ` <e3ec968a-32b0-cfdd-36d3-6f07b0f5e14b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-09-07  7:18               ` zhoucm1
     [not found]                 ` <57CFBF51.3000506-5C7GfCeVMHo@public.gmane.org>
2016-09-07  8:18                   ` Christian König
     [not found]                     ` <24352e53-0c1c-8b55-0b39-35b3e5d350ff-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-09-07 16:07                       ` Felix Kuehling
     [not found]                         ` <2a4f24fa-6873-7c7f-486f-ef4f29efa63a-5C7GfCeVMHo@public.gmane.org>
2016-09-07 16:26                           ` Christian König
2016-09-07 14:32       ` Deucher, Alexander
2016-09-17 21:14       ` Marek Olšák
     [not found]         ` <CAAxE2A4p=fnpwRsXDPhNW518uQqFoDzTxxhYg4_+yzJvhWOoBQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-09-18  8:21           ` Christian König
2016-09-22 21:54           ` Andy Furniss
     [not found]             ` <57E45314.9080903-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-09-23  7:45               ` Christian König
     [not found]                 ` <99035862-cf25-c2e1-4460-78622a539f32-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-09-23 15:55                   ` Andy Furniss
     [not found]                     ` <57E55075.203-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2016-09-23 17:45                       ` Shawn Starr
2016-09-26 16:33                       ` Andy Furniss

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.