All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/9] drm/amdgpu: fix amdgpu_vm_bo_map trace point
@ 2017-08-25  9:38 Christian König
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25  9:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

That somehow got lost.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7a0656c..c77689f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2110,6 +2110,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 
 	if (flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
 	return 0;
 }
@@ -2175,6 +2176,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 
 	if (flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
 	return 0;
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 2/9] drm/amdgpu: fix and cleanup VM ready check
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-25  9:38   ` Christian König
       [not found]     ` <1503653899-1781-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-25  9:38   ` [PATCH 3/9] drm/amdgpu: cleanup GWS, GDS and OA allocation Christian König
                     ` (7 subsequent siblings)
  8 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25  9:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Stop checking the mapped BO itself, cause that one is
certainly not a page table.

Additional to that move the code into amdgpu_vm.c

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 33 ++-------------------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 32 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
 3 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7171968..9b1b6bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -127,35 +127,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 	return 0;
 }
 
-static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo)
-{
-	/* if anything is swapped out don't swap it in here,
-	   just abort and wait for the next CS */
-	if (!amdgpu_bo_gpu_accessible(bo))
-		return -ERESTARTSYS;
-
-	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-		return -ERESTARTSYS;
-
-	return 0;
-}
-
-static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev,
-				struct amdgpu_vm *vm,
-				struct list_head *list)
-{
-	struct ttm_validate_buffer *entry;
-
-	list_for_each_entry(entry, list, head) {
-		struct amdgpu_bo *bo =
-			container_of(entry->bo, struct amdgpu_bo, tbo);
-		if (amdgpu_gem_vm_check(NULL, bo))
-			return false;
-	}
-
-	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL);
-}
-
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
 			     struct drm_file *file_priv)
 {
@@ -189,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	if (bo_va && --bo_va->ref_count == 0) {
 		amdgpu_vm_bo_rmv(adev, bo_va);
 
-		if (amdgpu_gem_vm_ready(adev, vm, &list)) {
+		if (amdgpu_vm_ready(adev, vm)) {
 			struct dma_fence *fence = NULL;
 
 			r = amdgpu_vm_clear_freed(adev, vm, &fence);
@@ -513,7 +484,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 {
 	int r = -ERESTARTSYS;
 
-	if (!amdgpu_gem_vm_ready(adev, vm, list))
+	if (!amdgpu_vm_ready(adev, vm))
 		goto error;
 
 	r = amdgpu_vm_update_directories(adev, vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c77689f..f621dba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -232,6 +232,38 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 }
 
 /**
+ * amdgpu_vm_check - helper for amdgpu_vm_ready
+ */
+static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
+{
+	/* if anything is swapped out don't swap it in here,
+	   just abort and wait for the next CS */
+	if (!amdgpu_bo_gpu_accessible(bo))
+		return -ERESTARTSYS;
+
+	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
+		return -ERESTARTSYS;
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_ready - check VM is ready for updates
+ *
+ * @adev: amdgpu device
+ * @vm: VM to check
+ *
+ * Check if all VM PDs/PTs are ready for updates
+ */
+bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	if (amdgpu_vm_check(NULL, vm->root.bo))
+		return false;
+
+	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
+}
+
+/**
  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
  *
  * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ba6691b..9347d28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -225,6 +225,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
+bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 3/9] drm/amdgpu: cleanup GWS, GDS and OA allocation
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-25  9:38   ` [PATCH 2/9] drm/amdgpu: fix and cleanup VM ready check Christian König
@ 2017-08-25  9:38   ` Christian König
       [not found]     ` <1503653899-1781-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-25  9:38   ` [PATCH 4/9] drm/amdgpu: add bo_va cleared flag again Christian König
                     ` (6 subsequent siblings)
  8 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25  9:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Those are certainly not kernel allocations, instead set the NO_CPU_ACCESS flag.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 9b1b6bd..ba01293 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -186,17 +186,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	union drm_amdgpu_gem_create *args = data;
+	uint64_t flags = args->in.domain_flags;
 	uint64_t size = args->in.bo_size;
 	struct drm_gem_object *gobj;
 	uint32_t handle;
-	bool kernel = false;
 	int r;
 
 	/* reject invalid gem flags */
-	if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-				      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-				      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
-				      AMDGPU_GEM_CREATE_VRAM_CLEARED))
+	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
 		return -EINVAL;
 
 	/* reject invalid gem domains */
@@ -211,7 +211,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	/* create a gem object to contain this object in */
 	if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
 	    AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
-		kernel = true;
+		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
 		if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
 			size = size << AMDGPU_GDS_SHIFT;
 		else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
@@ -225,8 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 
 	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
 				     (u32)(0xffffffff & args->in.domains),
-				     args->in.domain_flags,
-				     kernel, &gobj);
+				     flags, false, &gobj);
 	if (r)
 		return r;
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 4/9] drm/amdgpu: add bo_va cleared flag again
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-25  9:38   ` [PATCH 2/9] drm/amdgpu: fix and cleanup VM ready check Christian König
  2017-08-25  9:38   ` [PATCH 3/9] drm/amdgpu: cleanup GWS, GDS and OA allocation Christian König
@ 2017-08-25  9:38   ` Christian König
       [not found]     ` <1503653899-1781-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-25  9:38   ` [PATCH 5/9] drm/amdgpu: rework moved handling in the VM Christian König
                     ` (5 subsequent siblings)
  8 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25  9:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

We changed this to use an extra list a while back, but for the next
series I need a separate flag again.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 37 ++++++++++++++----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  3 ---
 3 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index a288fa6..e613ba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -55,6 +55,9 @@ struct amdgpu_bo_va {
 	/* mappings for this bo_va */
 	struct list_head		invalids;
 	struct list_head		valids;
+
+	/* If the mappings are cleared or filled */
+	bool				cleared;
 };
 
 struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f621dba..16148ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1787,10 +1787,13 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	else
 		flags = 0x0;
 
-	spin_lock(&vm->status_lock);
-	if (!list_empty(&bo_va->base.vm_status))
+	/* We access vm_status without the status lock here, but that is ok
+	 * because when we don't clear the BO is locked and so the status can't
+	 * change
+	 */
+	if ((!clear && !list_empty(&bo_va->base.vm_status)) ||
+	    bo_va->cleared != clear)
 		list_splice_init(&bo_va->valids, &bo_va->invalids);
-	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
 		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
@@ -1800,25 +1803,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			return r;
 	}
 
-	if (trace_amdgpu_vm_bo_mapping_enabled()) {
-		list_for_each_entry(mapping, &bo_va->valids, list)
-			trace_amdgpu_vm_bo_mapping(mapping);
-
-		list_for_each_entry(mapping, &bo_va->invalids, list)
-			trace_amdgpu_vm_bo_mapping(mapping);
+	if (vm->use_cpu_for_update) {
+		/* Flush HDP */
+		mb();
+		amdgpu_gart_flush_gpu_tlb(adev, 0);
 	}
 
 	spin_lock(&vm->status_lock);
-	list_splice_init(&bo_va->invalids, &bo_va->valids);
 	list_del_init(&bo_va->base.vm_status);
-	if (clear)
-		list_add(&bo_va->base.vm_status, &vm->cleared);
 	spin_unlock(&vm->status_lock);
 
-	if (vm->use_cpu_for_update) {
-		/* Flush HDP */
-		mb();
-		amdgpu_gart_flush_gpu_tlb(adev, 0);
+	list_splice_init(&bo_va->invalids, &bo_va->valids);
+	bo_va->cleared = clear;
+
+	if (trace_amdgpu_vm_bo_mapping_enabled()) {
+		list_for_each_entry(mapping, &bo_va->valids, list)
+			trace_amdgpu_vm_bo_mapping(mapping);
 	}
 
 	return 0;
@@ -2419,9 +2419,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
 		spin_lock(&bo_base->vm->status_lock);
-		if (list_empty(&bo_base->vm_status))
-			list_add(&bo_base->vm_status,
-				 &bo_base->vm->moved);
+		list_move(&bo_base->vm_status, &bo_base->vm->moved);
 		spin_unlock(&bo_base->vm->status_lock);
 	}
 }
@@ -2508,7 +2506,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->moved);
-	INIT_LIST_HEAD(&vm->cleared);
 	INIT_LIST_HEAD(&vm->freed);
 
 	/* create scheduler entity for page table updates */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9347d28..e705f0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -126,9 +126,6 @@ struct amdgpu_vm {
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
-	/* BOs cleared in the PT because of a move */
-	struct list_head	cleared;
-
 	/* BO mappings freed, but not yet updated in the PT */
 	struct list_head	freed;
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 5/9] drm/amdgpu: rework moved handling in the VM
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-08-25  9:38   ` [PATCH 4/9] drm/amdgpu: add bo_va cleared flag again Christian König
@ 2017-08-25  9:38   ` Christian König
       [not found]     ` <1503653899-1781-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-25  9:38   ` [PATCH 6/9] drm/amdgpu: track evicted page tables v2 Christian König
                     ` (4 subsequent siblings)
  8 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25  9:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Instead of using the vm_state use a separate flag to note
that the BO was moved.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 +++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +++
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 16148ef..85189f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1787,13 +1787,13 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	else
 		flags = 0x0;
 
-	/* We access vm_status without the status lock here, but that is ok
-	 * because when we don't clear the BO is locked and so the status can't
-	 * change
-	 */
-	if ((!clear && !list_empty(&bo_va->base.vm_status)) ||
-	    bo_va->cleared != clear)
+	if (!clear && bo_va->base.moved) {
+		bo_va->base.moved = false;
+		list_splice_init(&bo_va->valids, &bo_va->invalids);
+
+	} else if (bo_va->cleared != clear) {
 		list_splice_init(&bo_va->valids, &bo_va->invalids);
+	}
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
 		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
@@ -2418,6 +2418,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	struct amdgpu_vm_bo_base *bo_base;
 
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
+		bo_base->moved = true;
 		spin_lock(&bo_base->vm->status_lock);
 		list_move(&bo_base->vm_status, &bo_base->vm->moved);
 		spin_unlock(&bo_base->vm->status_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index e705f0f..ff093d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -105,6 +105,9 @@ struct amdgpu_vm_bo_base {
 
 	/* protected by spinlock */
 	struct list_head		vm_status;
+
+	/* protected by the BO being reserved */
+	bool				moved;
 };
 
 struct amdgpu_vm_pt {
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 6/9] drm/amdgpu: track evicted page tables v2
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-08-25  9:38   ` [PATCH 5/9] drm/amdgpu: rework moved handling in the VM Christian König
@ 2017-08-25  9:38   ` Christian König
       [not found]     ` <1503653899-1781-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-25  9:38   ` [PATCH 7/9] drm/amdgpu: rework page directory filling v2 Christian König
                     ` (3 subsequent siblings)
  8 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25  9:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Instead of validating all page tables when one was evicted,
track which one needs a validation.

v2: simplify amdgpu_vm_ready as well

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    |   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 227 +++++++++++++----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  16 +-
 5 files changed, 119 insertions(+), 141 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 3f46b5a..f68ac56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -632,9 +632,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 				     p->bytes_moved_vis);
-	fpriv->vm.last_eviction_counter =
-		atomic64_read(&p->adev->num_evictions);
-
 	if (p->bo_list) {
 		struct amdgpu_bo *gds = p->bo_list->gds_obj;
 		struct amdgpu_bo *gws = p->bo_list->gws_obj;
@@ -826,7 +823,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 			if (!bo)
 				continue;
 
-			amdgpu_vm_bo_invalidate(adev, bo);
+			amdgpu_vm_bo_invalidate(adev, bo, false);
 		}
 	}
 
@@ -851,7 +848,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 	}
 
 	if (p->job->vm) {
-		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
+		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
 
 		r = amdgpu_bo_vm_update_pte(p);
 		if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index ba01293..d028806 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -160,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	if (bo_va && --bo_va->ref_count == 0) {
 		amdgpu_vm_bo_rmv(adev, bo_va);
 
-		if (amdgpu_vm_ready(adev, vm)) {
+		if (amdgpu_vm_ready(vm)) {
 			struct dma_fence *fence = NULL;
 
 			r = amdgpu_vm_clear_freed(adev, vm, &fence);
@@ -481,10 +481,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct list_head *list,
 				    uint32_t operation)
 {
-	int r = -ERESTARTSYS;
+	int r;
 
-	if (!amdgpu_vm_ready(adev, vm))
-		goto error;
+	if (!amdgpu_vm_ready(vm))
+		return;
 
 	r = amdgpu_vm_update_directories(adev, vm);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 9e495da..52d0109 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -929,7 +929,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 		return;
 
 	abo = container_of(bo, struct amdgpu_bo, tbo);
-	amdgpu_vm_bo_invalidate(adev, abo);
+	amdgpu_vm_bo_invalidate(adev, abo, evict);
 
 	amdgpu_bo_kunmap(abo);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 85189f1..592c3e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -140,7 +140,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry)
 {
-	entry->robj = vm->root.bo;
+	entry->robj = vm->root.base.bo;
 	entry->priority = 0;
 	entry->tv.bo = &entry->robj->tbo;
 	entry->tv.shared = true;
@@ -149,61 +149,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 }
 
 /**
- * amdgpu_vm_validate_layer - validate a single page table level
- *
- * @parent: parent page table level
- * @validate: callback to do the validation
- * @param: parameter for the validation callback
- *
- * Validate the page table BOs on command submission if neccessary.
- */
-static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
-				    int (*validate)(void *, struct amdgpu_bo *),
-				    void *param, bool use_cpu_for_update,
-				    struct ttm_bo_global *glob)
-{
-	unsigned i;
-	int r;
-
-	if (use_cpu_for_update) {
-		r = amdgpu_bo_kmap(parent->bo, NULL);
-		if (r)
-			return r;
-	}
-
-	if (!parent->entries)
-		return 0;
-
-	for (i = 0; i <= parent->last_entry_used; ++i) {
-		struct amdgpu_vm_pt *entry = &parent->entries[i];
-
-		if (!entry->bo)
-			continue;
-
-		r = validate(param, entry->bo);
-		if (r)
-			return r;
-
-		spin_lock(&glob->lru_lock);
-		ttm_bo_move_to_lru_tail(&entry->bo->tbo);
-		if (entry->bo->shadow)
-			ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo);
-		spin_unlock(&glob->lru_lock);
-
-		/*
-		 * Recurse into the sub directory. This is harmless because we
-		 * have only a maximum of 5 layers.
-		 */
-		r = amdgpu_vm_validate_level(entry, validate, param,
-					     use_cpu_for_update, glob);
-		if (r)
-			return r;
-	}
-
-	return r;
-}
-
-/**
  * amdgpu_vm_validate_pt_bos - validate the page table BOs
  *
  * @adev: amdgpu device pointer
@@ -217,32 +162,43 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*validate)(void *p, struct amdgpu_bo *bo),
 			      void *param)
 {
-	uint64_t num_evictions;
+	struct ttm_bo_global *glob = adev->mman.bdev.glob;
+	int r;
 
-	/* We only need to validate the page tables
-	 * if they aren't already valid.
-	 */
-	num_evictions = atomic64_read(&adev->num_evictions);
-	if (num_evictions == vm->last_eviction_counter)
-		return 0;
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->evicted)) {
+		struct amdgpu_vm_bo_base *bo_base;
+		struct amdgpu_bo *bo;
 
-	return amdgpu_vm_validate_level(&vm->root, validate, param,
-					vm->use_cpu_for_update,
-					adev->mman.bdev.glob);
-}
+		bo_base = list_first_entry(&vm->evicted,
+					   struct amdgpu_vm_bo_base,
+					   vm_status);
+		spin_unlock(&vm->status_lock);
 
-/**
- * amdgpu_vm_check - helper for amdgpu_vm_ready
- */
-static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
-{
-	/* if anything is swapped out don't swap it in here,
-	   just abort and wait for the next CS */
-	if (!amdgpu_bo_gpu_accessible(bo))
-		return -ERESTARTSYS;
+		bo = bo_base->bo;
+		BUG_ON(!bo);
+		if (bo->parent) {
+			r = validate(param, bo);
+			if (r)
+				return r;
 
-	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-		return -ERESTARTSYS;
+			spin_lock(&glob->lru_lock);
+			ttm_bo_move_to_lru_tail(&bo->tbo);
+			if (bo->shadow)
+				ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
+			spin_unlock(&glob->lru_lock);
+		}
+
+		if (vm->use_cpu_for_update) {
+			r = amdgpu_bo_kmap(bo, NULL);
+			if (r)
+				return r;
+		}
+
+		spin_lock(&vm->status_lock);
+		list_del_init(&bo_base->vm_status);
+	}
+	spin_unlock(&vm->status_lock);
 
 	return 0;
 }
@@ -250,17 +206,19 @@ static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
 /**
  * amdgpu_vm_ready - check VM is ready for updates
  *
- * @adev: amdgpu device
  * @vm: VM to check
  *
  * Check if all VM PDs/PTs are ready for updates
  */
-bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 {
-	if (amdgpu_vm_check(NULL, vm->root.bo))
-		return false;
+	bool ready;
+
+	spin_lock(&vm->status_lock);
+	ready = list_empty(&vm->evicted);
+	spin_unlock(&vm->status_lock);
 
-	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
+	return ready;
 }
 
 /**
@@ -325,11 +283,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
-		struct reservation_object *resv = vm->root.bo->tbo.resv;
+		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 		struct amdgpu_bo *pt;
 
-		if (!entry->bo) {
+		if (!entry->base.bo) {
 			r = amdgpu_bo_create(adev,
 					     amdgpu_vm_bo_size(adev, level),
 					     AMDGPU_GPU_PAGE_SIZE, true,
@@ -350,9 +308,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			/* Keep a reference to the root directory to avoid
 			* freeing them up in the wrong order.
 			*/
-			pt->parent = amdgpu_bo_ref(vm->root.bo);
+			pt->parent = amdgpu_bo_ref(vm->root.base.bo);
 
-			entry->bo = pt;
+			entry->base.vm = vm;
+			entry->base.bo = pt;
+			list_add_tail(&entry->base.bo_list, &pt->va);
+			INIT_LIST_HEAD(&entry->base.vm_status);
 			entry->addr = 0;
 		}
 
@@ -1019,7 +980,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	int r;
 
 	amdgpu_sync_create(&sync);
-	amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner);
+	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
 	r = amdgpu_sync_wait(&sync, true);
 	amdgpu_sync_free(&sync);
 
@@ -1058,10 +1019,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
-	shadow = parent->bo->shadow;
+	shadow = parent->base.bo->shadow;
 
 	if (vm->use_cpu_for_update) {
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
+		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
 		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 		if (unlikely(r))
 			return r;
@@ -1077,7 +1038,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		/* assume the worst case */
 		ndw += parent->last_entry_used * 6;
 
-		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
 
 		if (shadow) {
 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
@@ -1097,7 +1058,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	/* walk over the address space and update the directory */
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
+		struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo;
 		uint64_t pde, pt;
 
 		if (bo == NULL)
@@ -1140,7 +1101,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	}
 
 	if (count) {
-		if (vm->root.bo->shadow)
+		if (vm->root.base.bo->shadow)
 			params.func(&params, last_shadow, last_pt,
 				    count, incr, AMDGPU_PTE_VALID);
 
@@ -1153,7 +1114,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			amdgpu_job_free(job);
 		} else {
 			amdgpu_ring_pad_ib(ring, params.ib);
-			amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
+			amdgpu_sync_resv(adev, &job->sync,
+					 parent->base.bo->tbo.resv,
 					 AMDGPU_FENCE_OWNER_VM);
 			if (shadow)
 				amdgpu_sync_resv(adev, &job->sync,
@@ -1166,7 +1128,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			if (r)
 				goto error_free;
 
-			amdgpu_bo_fence(parent->bo, fence, true);
+			amdgpu_bo_fence(parent->base.bo, fence, true);
 			dma_fence_put(vm->last_dir_update);
 			vm->last_dir_update = dma_fence_get(fence);
 			dma_fence_put(fence);
@@ -1179,7 +1141,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 
-		if (!entry->bo)
+		if (!entry->base.bo)
 			continue;
 
 		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
@@ -1212,7 +1174,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 
-		if (!entry->bo)
+		if (!entry->base.bo)
 			continue;
 
 		entry->addr = ~0ULL;
@@ -1267,7 +1229,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
 		idx = addr >> (p->adev->vm_manager.block_size * level--);
-		idx %= amdgpu_bo_size((*entry)->bo) / 8;
+		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
 		*parent = *entry;
 		*entry = &(*entry)->entries[idx];
 	}
@@ -1303,7 +1265,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	    p->src ||
 	    !(flags & AMDGPU_PTE_VALID)) {
 
-		dst = amdgpu_bo_gpu_offset(entry->bo);
+		dst = amdgpu_bo_gpu_offset(entry->base.bo);
 		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
 		flags = AMDGPU_PTE_VALID;
 	} else {
@@ -1329,18 +1291,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 		tmp = p->pages_addr;
 		p->pages_addr = NULL;
 
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
+		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
 		pde = pd_addr + (entry - parent->entries) * 8;
 		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
 
 		p->pages_addr = tmp;
 	} else {
-		if (parent->bo->shadow) {
-			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+		if (parent->base.bo->shadow) {
+			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
 			pde = pd_addr + (entry - parent->entries) * 8;
 			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
 		}
-		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
 		pde = pd_addr + (entry - parent->entries) * 8;
 		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
 	}
@@ -1391,7 +1353,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		if (entry->addr & AMDGPU_PDE_PTE)
 			continue;
 
-		pt = entry->bo;
+		pt = entry->base.bo;
 		if (use_cpu_update) {
 			pe_start = (unsigned long)amdgpu_bo_kptr(pt);
 		} else {
@@ -1611,12 +1573,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv,
+	r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
 			     owner);
 	if (r)
 		goto error_free;
 
-	r = reservation_object_reserve_shared(vm->root.bo->tbo.resv);
+	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
 	if (r)
 		goto error_free;
 
@@ -1631,7 +1593,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_bo_fence(vm->root.bo, f, true);
+	amdgpu_bo_fence(vm->root.base.bo, f, true);
 	dma_fence_put(*fence);
 	*fence = f;
 	return 0;
@@ -1926,7 +1888,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
  */
 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
-	struct reservation_object *resv = vm->root.bo->tbo.resv;
+	struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 	struct dma_fence *excl, **shared;
 	unsigned i, shared_count;
 	int r;
@@ -2413,12 +2375,25 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
  * Mark @bo as invalid.
  */
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
-			     struct amdgpu_bo *bo)
+			     struct amdgpu_bo *bo, bool evicted)
 {
 	struct amdgpu_vm_bo_base *bo_base;
 
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
+		struct amdgpu_vm *vm = bo_base->vm;
+
 		bo_base->moved = true;
+		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+			spin_lock(&bo_base->vm->status_lock);
+			list_move(&bo_base->vm_status, &vm->evicted);
+			spin_unlock(&bo_base->vm->status_lock);
+			continue;
+		}
+
+		/* Don't add page tables to the moved state */
+		if (bo->tbo.type == ttm_bo_type_kernel)
+			continue;
+
 		spin_lock(&bo_base->vm->status_lock);
 		list_move(&bo_base->vm_status, &bo_base->vm->moved);
 		spin_unlock(&bo_base->vm->status_lock);
@@ -2506,6 +2481,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
+	INIT_LIST_HEAD(&vm->evicted);
 	INIT_LIST_HEAD(&vm->moved);
 	INIT_LIST_HEAD(&vm->freed);
 
@@ -2550,30 +2526,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     flags,
-			     NULL, NULL, init_pde_value, &vm->root.bo);
+			     NULL, NULL, init_pde_value, &vm->root.base.bo);
 	if (r)
 		goto error_free_sched_entity;
 
-	r = amdgpu_bo_reserve(vm->root.bo, false);
-	if (r)
-		goto error_free_root;
-
-	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
+	vm->root.base.vm = vm;
+	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
+	INIT_LIST_HEAD(&vm->root.base.vm_status);
 
 	if (vm->use_cpu_for_update) {
-		r = amdgpu_bo_kmap(vm->root.bo, NULL);
+		r = amdgpu_bo_reserve(vm->root.base.bo, false);
 		if (r)
 			goto error_free_root;
-	}
 
-	amdgpu_bo_unreserve(vm->root.bo);
+		r = amdgpu_bo_kmap(vm->root.base.bo, NULL);
+		if (r)
+			goto error_free_root;
+		amdgpu_bo_unreserve(vm->root.base.bo);
+	}
 
 	return 0;
 
 error_free_root:
-	amdgpu_bo_unref(&vm->root.bo->shadow);
-	amdgpu_bo_unref(&vm->root.bo);
-	vm->root.bo = NULL;
+	amdgpu_bo_unref(&vm->root.base.bo->shadow);
+	amdgpu_bo_unref(&vm->root.base.bo);
+	vm->root.base.bo = NULL;
 
 error_free_sched_entity:
 	amd_sched_entity_fini(&ring->sched, &vm->entity);
@@ -2592,9 +2569,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
 {
 	unsigned i;
 
-	if (level->bo) {
-		amdgpu_bo_unref(&level->bo->shadow);
-		amdgpu_bo_unref(&level->bo);
+	if (level->base.bo) {
+		list_del(&level->base.bo_list);
+		list_del(&level->base.vm_status);
+		amdgpu_bo_unref(&level->base.bo->shadow);
+		amdgpu_bo_unref(&level->base.bo);
 	}
 
 	if (level->entries)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ff093d4..4e465e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -111,12 +111,12 @@ struct amdgpu_vm_bo_base {
 };
 
 struct amdgpu_vm_pt {
-	struct amdgpu_bo	*bo;
-	uint64_t		addr;
+	struct amdgpu_vm_bo_base	base;
+	uint64_t			addr;
 
 	/* array of page tables, one for each directory entry */
-	struct amdgpu_vm_pt	*entries;
-	unsigned		last_entry_used;
+	struct amdgpu_vm_pt		*entries;
+	unsigned			last_entry_used;
 };
 
 struct amdgpu_vm {
@@ -126,6 +126,9 @@ struct amdgpu_vm {
 	/* protecting invalidated */
 	spinlock_t		status_lock;
 
+	/* BOs who needs a validation */
+	struct list_head	evicted;
+
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
@@ -135,7 +138,6 @@ struct amdgpu_vm {
 	/* contains the page directory */
 	struct amdgpu_vm_pt     root;
 	struct dma_fence	*last_dir_update;
-	uint64_t		last_eviction_counter;
 
 	/* protecting freed */
 	spinlock_t		freed_lock;
@@ -225,7 +227,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
-bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+bool amdgpu_vm_ready(struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
@@ -250,7 +252,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
-			     struct amdgpu_bo *bo);
+			     struct amdgpu_bo *bo, bool evicted);
 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 				       struct amdgpu_bo *bo);
 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 7/9] drm/amdgpu: rework page directory filling v2
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (4 preceding siblings ...)
  2017-08-25  9:38   ` [PATCH 6/9] drm/amdgpu: track evicted page tables v2 Christian König
@ 2017-08-25  9:38   ` Christian König
       [not found]     ` <1503653899-1781-7-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-25  9:38   ` [PATCH 8/9] drm/amdgpu: add support for per VM BOs Christian König
                     ` (2 subsequent siblings)
  8 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25  9:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Keep track off relocated PDs/PTs instead of walking and checking all PDs.

v2: better root PD handling

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 87 ++++++++++++++++++++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 ++
 2 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 592c3e7..b02451f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -196,7 +196,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 
 		spin_lock(&vm->status_lock);
-		list_del_init(&bo_base->vm_status);
+		list_move(&bo_base->vm_status, &vm->relocated);
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -313,8 +313,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			entry->base.vm = vm;
 			entry->base.bo = pt;
 			list_add_tail(&entry->base.bo_list, &pt->va);
-			INIT_LIST_HEAD(&entry->base.vm_status);
-			entry->addr = 0;
+			spin_lock(&vm->status_lock);
+			list_add(&entry->base.vm_status, &vm->relocated);
+			spin_unlock(&vm->status_lock);
+			entry->addr = ~0ULL;
 		}
 
 		if (level < adev->vm_manager.num_level) {
@@ -999,18 +1001,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  */
 static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm,
-				  struct amdgpu_vm_pt *parent,
-				  unsigned level)
+				  struct amdgpu_vm_pt *parent)
 {
 	struct amdgpu_bo *shadow;
 	struct amdgpu_ring *ring = NULL;
 	uint64_t pd_addr, shadow_addr = 0;
-	uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
 	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
 	unsigned count = 0, pt_idx, ndw = 0;
 	struct amdgpu_job *job;
 	struct amdgpu_pte_update_params params;
 	struct dma_fence *fence = NULL;
+	uint32_t incr;
 
 	int r;
 
@@ -1058,12 +1059,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	/* walk over the address space and update the directory */
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo;
+		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
+		struct amdgpu_bo *bo = entry->base.bo;
 		uint64_t pde, pt;
 
 		if (bo == NULL)
 			continue;
 
+		spin_lock(&vm->status_lock);
+		list_del_init(&entry->base.vm_status);
+		spin_unlock(&vm->status_lock);
+
 		pt = amdgpu_bo_gpu_offset(bo);
 		pt = amdgpu_gart_get_vm_pde(adev, pt);
 		/* Don't update huge pages here */
@@ -1074,6 +1080,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
 
 		pde = pd_addr + pt_idx * 8;
+		incr = amdgpu_bo_size(bo);
 		if (((last_pde + 8 * count) != pde) ||
 		    ((last_pt + incr * count) != pt) ||
 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
@@ -1134,20 +1141,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			dma_fence_put(fence);
 		}
 	}
-	/*
-	 * Recurse into the subdirectories. This recursion is harmless because
-	 * we only have a maximum of 5 layers.
-	 */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-
-		if (!entry->base.bo)
-			continue;
-
-		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
-		if (r)
-			return r;
-	}
 
 	return 0;
 
@@ -1163,7 +1156,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
  *
  * Mark all PD level as invalid after an error.
  */
-static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
+				       struct amdgpu_vm_pt *parent)
 {
 	unsigned pt_idx;
 
@@ -1178,7 +1172,10 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
 			continue;
 
 		entry->addr = ~0ULL;
-		amdgpu_vm_invalidate_level(entry);
+		spin_lock(&vm->status_lock);
+		list_move(&entry->base.vm_status, &vm->relocated);
+		spin_unlock(&vm->status_lock);
+		amdgpu_vm_invalidate_level(vm, entry);
 	}
 }
 
@@ -1196,9 +1193,36 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 {
 	int r;
 
-	r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
-	if (r)
-		amdgpu_vm_invalidate_level(&vm->root);
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->relocated)) {
+		struct amdgpu_vm_bo_base *bo_base;
+		struct amdgpu_bo *bo;
+
+		bo_base = list_first_entry(&vm->relocated,
+					   struct amdgpu_vm_bo_base,
+					   vm_status);
+		spin_unlock(&vm->status_lock);
+
+		bo = bo_base->bo->parent;
+		if (bo) {
+			struct amdgpu_vm_bo_base *parent;
+			struct amdgpu_vm_pt *pt;
+
+			parent = list_first_entry(&bo->va,
+						  struct amdgpu_vm_bo_base,
+						  bo_list);
+			pt = container_of(parent, struct amdgpu_vm_pt, base);
+
+			r = amdgpu_vm_update_level(adev, vm, pt);
+			if (r) {
+				amdgpu_vm_invalidate_level(vm, &vm->root);
+				break;
+			}
+		}
+
+		spin_lock(&vm->status_lock);
+	}
+	spin_unlock(&vm->status_lock);
 
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
@@ -1600,7 +1624,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
-	amdgpu_vm_invalidate_level(&vm->root);
+	amdgpu_vm_invalidate_level(vm, &vm->root);
 	return r;
 }
 
@@ -2390,9 +2414,13 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 			continue;
 		}
 
-		/* Don't add page tables to the moved state */
-		if (bo->tbo.type == ttm_bo_type_kernel)
+		if (bo->tbo.type == ttm_bo_type_kernel) {
+			spin_lock(&bo_base->vm->status_lock);
+			if (list_empty(&bo_base->vm_status))
+				list_add(&bo_base->vm_status, &vm->relocated);
+			spin_unlock(&bo_base->vm->status_lock);
 			continue;
+		}
 
 		spin_lock(&bo_base->vm->status_lock);
 		list_move(&bo_base->vm_status, &bo_base->vm->moved);
@@ -2482,6 +2510,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->evicted);
+	INIT_LIST_HEAD(&vm->relocated);
 	INIT_LIST_HEAD(&vm->moved);
 	INIT_LIST_HEAD(&vm->freed);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 4e465e8..c3753af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -129,6 +129,9 @@ struct amdgpu_vm {
 	/* BOs who needs a validation */
 	struct list_head	evicted;
 
+	/* PT BOs which relocated and their parent need an update */
+	struct list_head	relocated;
+
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 8/9] drm/amdgpu: add support for per VM BOs
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (5 preceding siblings ...)
  2017-08-25  9:38   ` [PATCH 7/9] drm/amdgpu: rework page directory filling v2 Christian König
@ 2017-08-25  9:38   ` Christian König
  2017-08-25  9:38   ` [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface " Christian König
  2017-08-28  4:08   ` [PATCH 1/9] drm/amdgpu: fix amdgpu_vm_bo_map trace point zhoucm1
  8 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2017-08-25  9:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
need to be specified in the BO lists.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 73 ++++++++++++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
 3 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f68ac56..48e18cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -813,7 +813,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 
 	}
 
-	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
+	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
 
 	if (amdgpu_vm_debug && p->bo_list) {
 		/* Invalidate all BOs to test for userspace bugs */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b02451f..5bc79c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			spin_unlock(&glob->lru_lock);
 		}
 
-		if (vm->use_cpu_for_update) {
+		if (bo->tbo.type == ttm_bo_type_kernel &&
+		    vm->use_cpu_for_update) {
 			r = amdgpu_bo_kmap(bo, NULL);
 			if (r)
 				return r;
 		}
 
 		spin_lock(&vm->status_lock);
-		list_move(&bo_base->vm_status, &vm->relocated);
+		if (bo->tbo.type != ttm_bo_type_kernel)
+			list_move(&bo_base->vm_status, &vm->moved);
+		else
+			list_move(&bo_base->vm_status, &vm->relocated);
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -1992,20 +1996,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_clear_moved - clear moved BOs in the PT
+ * amdgpu_vm_handle_moved - handle moved BOs in the PT
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @sync: sync object to add fences to
  *
- * Make sure all moved BOs are cleared in the PT.
+ * Make sure all BOs which are moved are updated in the PTs.
  * Returns 0 for success.
  *
- * PTs have to be reserved and mutex must be locked!
+ * PTs have to be reserved!
  */
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			    struct amdgpu_sync *sync)
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync)
 {
 	struct amdgpu_bo_va *bo_va = NULL;
+	bool clear;
 	int r = 0;
 
 	spin_lock(&vm->status_lock);
@@ -2014,7 +2021,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);
 
-		r = amdgpu_vm_bo_update(adev, bo_va, true);
+		/* Per VM BOs never need to bo cleared in the page tables */
+		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
+
+		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		if (r)
 			return r;
 
@@ -2066,6 +2076,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	return bo_va;
 }
 
+
+/**
+ * amdgpu_vm_bo_insert_mapping - insert a new mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @mapping: the mapping to insert
+ *
+ * Insert a new mapping into all structures.
+ */
+static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
+				    struct amdgpu_bo_va *bo_va,
+				    struct amdgpu_bo_va_mapping *mapping)
+{
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_bo *bo = bo_va->base.bo;
+
+	list_add(&mapping->list, &bo_va->invalids);
+	amdgpu_vm_it_insert(mapping, &vm->va);
+
+	if (mapping->flags & AMDGPU_PTE_PRT)
+		amdgpu_vm_prt_get(adev);
+
+	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+		spin_lock(&vm->status_lock);
+		list_move(&bo_va->base.vm_status, &vm->moved);
+		spin_unlock(&vm->status_lock);
+	}
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
+}
+
 /**
  * amdgpu_vm_bo_map - map bo inside a vm
  *
@@ -2117,18 +2158,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	if (!mapping)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&mapping->list);
 	mapping->start = saddr;
 	mapping->last = eaddr;
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
@@ -2155,7 +2190,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_bo *bo = bo_va->base.bo;
-	struct amdgpu_vm *vm = bo_va->base.vm;
 	uint64_t eaddr;
 	int r;
 
@@ -2189,12 +2223,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index c3753af..90b7741 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			  struct amdgpu_sync *sync);
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (6 preceding siblings ...)
  2017-08-25  9:38   ` [PATCH 8/9] drm/amdgpu: add support for per VM BOs Christian König
@ 2017-08-25  9:38   ` Christian König
       [not found]     ` <1503653899-1781-9-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-28  4:08   ` [PATCH 1/9] drm/amdgpu: fix amdgpu_vm_bo_map trace point zhoucm1
  8 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25  9:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Add the IOCTL interface so that applications can allocate per VM BOs.

Still WIP since not all corner cases are tested yet, but this reduces average
CS overhead for 10K BOs from 21ms down to 48us.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 ++++++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
 include/uapi/drm/amdgpu_drm.h             |  2 ++
 5 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b1e817c..21cab36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
  */
 void amdgpu_gem_force_release(struct amdgpu_device *adev);
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-				int alignment, u32 initial_domain,
-				u64 flags, bool kernel,
-				struct drm_gem_object **obj);
+			     int alignment, u32 initial_domain,
+			     u64 flags, bool kernel,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj);
 
 int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 			    struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 0e907ea..7256f83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
-				       true, &gobj);
+				       true, NULL, &gobj);
 	if (ret) {
 		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d028806..b8e8d67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 }
 
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-				int alignment, u32 initial_domain,
-				u64 flags, bool kernel,
-				struct drm_gem_object **obj)
+			     int alignment, u32 initial_domain,
+			     u64 flags, bool kernel,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj)
 {
-	struct amdgpu_bo *robj;
+	struct amdgpu_bo *bo;
 	int r;
 
 	*obj = NULL;
@@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 
 retry:
 	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
-			     flags, NULL, NULL, 0, &robj);
+			     flags, NULL, resv, 0, &bo);
 	if (r) {
 		if (r != -ERESTARTSYS) {
 			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
@@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 		}
 		return r;
 	}
-	*obj = &robj->gem_base;
+	*obj = &bo->gem_base;
 
 	return 0;
 }
@@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	struct amdgpu_vm *vm = &fpriv->vm;
 
 	struct amdgpu_bo_list_entry vm_pd;
-	struct list_head list;
+	struct list_head list, duplicates;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct amdgpu_bo_va *bo_va;
 	int r;
 
 	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo->tbo;
 	tv.shared = true;
@@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
@@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	union drm_amdgpu_gem_create *args = data;
 	uint64_t flags = args->in.domain_flags;
 	uint64_t size = args->in.bo_size;
+	struct reservation_object *resv = NULL;
 	struct drm_gem_object *gobj;
 	uint32_t handle;
 	int r;
@@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
-		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
+		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
+		      AMDGPU_GEM_CREATE_LOCAL))
 		return -EINVAL;
 
 	/* reject invalid gem domains */
@@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	}
 	size = roundup(size, PAGE_SIZE);
 
+	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
+		r = amdgpu_bo_reserve(vm->root.base.bo, false);
+		if (r)
+			return r;
+
+		resv = vm->root.base.bo->tbo.resv;
+	}
+
 	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
 				     (u32)(0xffffffff & args->in.domains),
-				     flags, false, &gobj);
+				     flags, false, resv, &gobj);
+	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
+		if (!r) {
+			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
+
+			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
+		}
+		amdgpu_bo_unreserve(vm->root.base.bo);
+	}
 	if (r)
 		return r;
 
@@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	}
 
 	/* create a gem object to contain this object in */
-	r = amdgpu_gem_object_create(adev, args->size, 0,
-				     AMDGPU_GEM_DOMAIN_CPU, 0,
-				     0, &gobj);
+	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
+				     0, 0, NULL, &gobj);
 	if (r)
 		return r;
 
@@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	struct amdgpu_bo_list_entry vm_pd;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
-	struct list_head list;
+	struct list_head list, duplicates;
 	uint64_t va_flags;
 	int r = 0;
 
@@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	}
 
 	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
 	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
 	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
 		gobj = drm_gem_object_lookup(filp, args->handle);
@@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r)
 		goto error_unref;
 
@@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *filp)
 {
+	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_amdgpu_gem_op *args = data;
 	struct drm_gem_object *gobj;
 	struct amdgpu_bo *robj;
@@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
+		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
+			amdgpu_vm_bo_invalidate(adev, robj, true);
+
 		amdgpu_bo_unreserve(robj);
 		break;
 	default:
@@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 	r = amdgpu_gem_object_create(adev, args->size, 0,
 				     AMDGPU_GEM_DOMAIN_VRAM,
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
-				     ttm_bo_type_device,
-				     &gobj);
+				     false, NULL, &gobj);
 	if (r)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 5b3f928..f407499 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
 
-	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
+	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
 		return ERR_PTR(-EPERM);
 
 	return drm_gem_prime_export(dev, gobj, flags);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index d0ee739..05241a6 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -89,6 +89,8 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
 /* Flag that allocating the BO should use linear VRAM */
 #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
+/* Flag that BO is local in the VM */
+#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]     ` <1503653899-1781-9-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-25 10:32       ` zhoucm1
       [not found]         ` <19c04fac-1fdd-1436-e85c-95dd4ac02b1b-5C7GfCeVMHo@public.gmane.org>
  2017-08-25 21:31       ` Felix Kuehling
  1 sibling, 1 reply; 30+ messages in thread
From: zhoucm1 @ 2017-08-25 10:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Add the IOCTL interface so that applications can allocate per VM BOs.
>
> Still WIP since not all corner cases are tested yet, but this reduces average
> CS overhead for 10K BOs from 21ms down to 48us.
Wow, cheers, eventually you get per vm bo to same reservation with 
PD/pts, indeed save a lot of bo list.

overall looks good, I will take a detailed check for this tomorrow.

Regards,
David Zhou
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 ++++++++++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>   5 files changed, 51 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index b1e817c..21cab36 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>    */
>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj);
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj);
>   
>   int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>   			    struct drm_device *dev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> index 0e907ea..7256f83 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
>   				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>   				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>   				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
> -				       true, &gobj);
> +				       true, NULL, &gobj);
>   	if (ret) {
>   		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>   		return -ENOMEM;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d028806..b8e8d67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>   }
>   
>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj)
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj)
>   {
> -	struct amdgpu_bo *robj;
> +	struct amdgpu_bo *bo;
>   	int r;
>   
>   	*obj = NULL;
> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>   
>   retry:
>   	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
> -			     flags, NULL, NULL, 0, &robj);
> +			     flags, NULL, resv, 0, &bo);
>   	if (r) {
>   		if (r != -ERESTARTSYS) {
>   			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>   		}
>   		return r;
>   	}
> -	*obj = &robj->gem_base;
> +	*obj = &bo->gem_base;
>   
>   	return 0;
>   }
> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   	struct amdgpu_vm *vm = &fpriv->vm;
>   
>   	struct amdgpu_bo_list_entry vm_pd;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>   	struct ttm_validate_buffer tv;
>   	struct ww_acquire_ctx ticket;
>   	struct amdgpu_bo_va *bo_va;
>   	int r;
>   
>   	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>   
>   	tv.bo = &bo->tbo;
>   	tv.shared = true;
> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   
>   	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>   
> -	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>   	if (r) {
>   		dev_err(adev->dev, "leaking bo va because "
>   			"we fail to reserve bo (%d)\n", r);
> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *filp)
>   {
>   	struct amdgpu_device *adev = dev->dev_private;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	struct amdgpu_vm *vm = &fpriv->vm;
>   	union drm_amdgpu_gem_create *args = data;
>   	uint64_t flags = args->in.domain_flags;
>   	uint64_t size = args->in.bo_size;
> +	struct reservation_object *resv = NULL;
>   	struct drm_gem_object *gobj;
>   	uint32_t handle;
>   	int r;
> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>   		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>   		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
> -		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
> +		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
> +		      AMDGPU_GEM_CREATE_LOCAL))
>   		return -EINVAL;
>   
>   	/* reject invalid gem domains */
> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   	}
>   	size = roundup(size, PAGE_SIZE);
>   
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		r = amdgpu_bo_reserve(vm->root.base.bo, false);
> +		if (r)
> +			return r;
> +
> +		resv = vm->root.base.bo->tbo.resv;
> +	}
> +
>   	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>   				     (u32)(0xffffffff & args->in.domains),
> -				     flags, false, &gobj);
> +				     flags, false, resv, &gobj);
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		if (!r) {
> +			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
> +
> +			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
> +		}
> +		amdgpu_bo_unreserve(vm->root.base.bo);
> +	}
>   	if (r)
>   		return r;
>   
> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>   	}
>   
>   	/* create a gem object to contain this object in */
> -	r = amdgpu_gem_object_create(adev, args->size, 0,
> -				     AMDGPU_GEM_DOMAIN_CPU, 0,
> -				     0, &gobj);
> +	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
> +				     0, 0, NULL, &gobj);
>   	if (r)
>   		return r;
>   
> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   	struct amdgpu_bo_list_entry vm_pd;
>   	struct ttm_validate_buffer tv;
>   	struct ww_acquire_ctx ticket;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>   	uint64_t va_flags;
>   	int r = 0;
>   
> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   	}
>   
>   	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>   	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>   	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>   		gobj = drm_gem_object_lookup(filp, args->handle);
> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   
>   	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>   
> -	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>   	if (r)
>   		goto error_unref;
>   
> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>   			struct drm_file *filp)
>   {
> +	struct amdgpu_device *adev = dev->dev_private;
>   	struct drm_amdgpu_gem_op *args = data;
>   	struct drm_gem_object *gobj;
>   	struct amdgpu_bo *robj;
> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>   		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>   			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>   
> +		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
> +			amdgpu_vm_bo_invalidate(adev, robj, true);
> +
>   		amdgpu_bo_unreserve(robj);
>   		break;
>   	default:
> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>   	r = amdgpu_gem_object_create(adev, args->size, 0,
>   				     AMDGPU_GEM_DOMAIN_VRAM,
>   				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
> -				     ttm_bo_type_device,
> -				     &gobj);
> +				     false, NULL, &gobj);
>   	if (r)
>   		return -ENOMEM;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index 5b3f928..f407499 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>   {
>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>   
> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> +	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>   		return ERR_PTR(-EPERM);
>   
>   	return drm_gem_prime_export(dev, gobj, flags);
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index d0ee739..05241a6 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -89,6 +89,8 @@ extern "C" {
>   #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
>   /* Flag that allocating the BO should use linear VRAM */
>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
> +/* Flag that BO is local in the VM */
> +#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
>   
>   struct drm_amdgpu_gem_create_in  {
>   	/** the requested memory size */

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]         ` <19c04fac-1fdd-1436-e85c-95dd4ac02b1b-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-25 13:00           ` Christian König
       [not found]             ` <9304342a-def2-187e-4e9c-d872c58cdc17-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25 13:00 UTC (permalink / raw)
  To: zhoucm1, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Olsak, Marek

Am 25.08.2017 um 12:32 schrieb zhoucm1:
>
>
> On 2017年08月25日 17:38, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>
>> Still WIP since not all corner cases are tested yet, but this reduces 
>> average
>> CS overhead for 10K BOs from 21ms down to 48us.
> Wow, cheers, eventually you get per vm bo to same reservation with 
> PD/pts, indeed save a lot of bo list.

Don't cheer to loud yet, that is a completely constructed test case.

So far I wasn't able to archive any improvements with any real game on 
this with Mesa.

BTW: Marek can you take a look with some CPU bound tests? I can provide 
a kernel branch if necessary.

Regards,
Christian.

> overall looks good, I will take a detailed check for this tomorrow.
>
> Regards,
> David Zhou
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 
>> ++++++++++++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>>   5 files changed, 51 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index b1e817c..21cab36 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>    */
>>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned 
>> long size,
>> -                int alignment, u32 initial_domain,
>> -                u64 flags, bool kernel,
>> -                struct drm_gem_object **obj);
>> +                 int alignment, u32 initial_domain,
>> +                 u64 flags, bool kernel,
>> +                 struct reservation_object *resv,
>> +                 struct drm_gem_object **obj);
>>     int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>                   struct drm_device *dev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> index 0e907ea..7256f83 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct 
>> amdgpu_fbdev *rfbdev,
>>                          AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>                          AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>                          AMDGPU_GEM_CREATE_VRAM_CLEARED,
>> -                       true, &gobj);
>> +                       true, NULL, &gobj);
>>       if (ret) {
>>           pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>           return -ENOMEM;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index d028806..b8e8d67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object 
>> *gobj)
>>   }
>>     int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned 
>> long size,
>> -                int alignment, u32 initial_domain,
>> -                u64 flags, bool kernel,
>> -                struct drm_gem_object **obj)
>> +                 int alignment, u32 initial_domain,
>> +                 u64 flags, bool kernel,
>> +                 struct reservation_object *resv,
>> +                 struct drm_gem_object **obj)
>>   {
>> -    struct amdgpu_bo *robj;
>> +    struct amdgpu_bo *bo;
>>       int r;
>>         *obj = NULL;
>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device 
>> *adev, unsigned long size,
>>     retry:
>>       r = amdgpu_bo_create(adev, size, alignment, kernel, 
>> initial_domain,
>> -                 flags, NULL, NULL, 0, &robj);
>> +                 flags, NULL, resv, 0, &bo);
>>       if (r) {
>>           if (r != -ERESTARTSYS) {
>>               if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device 
>> *adev, unsigned long size,
>>           }
>>           return r;
>>       }
>> -    *obj = &robj->gem_base;
>> +    *obj = &bo->gem_base;
>>         return 0;
>>   }
>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct 
>> drm_gem_object *obj,
>>       struct amdgpu_vm *vm = &fpriv->vm;
>>         struct amdgpu_bo_list_entry vm_pd;
>> -    struct list_head list;
>> +    struct list_head list, duplicates;
>>       struct ttm_validate_buffer tv;
>>       struct ww_acquire_ctx ticket;
>>       struct amdgpu_bo_va *bo_va;
>>       int r;
>>         INIT_LIST_HEAD(&list);
>> +    INIT_LIST_HEAD(&duplicates);
>>         tv.bo = &bo->tbo;
>>       tv.shared = true;
>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct 
>> drm_gem_object *obj,
>>         amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>       if (r) {
>>           dev_err(adev->dev, "leaking bo va because "
>>               "we fail to reserve bo (%d)\n", r);
>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>> *dev, void *data,
>>                   struct drm_file *filp)
>>   {
>>       struct amdgpu_device *adev = dev->dev_private;
>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>       union drm_amdgpu_gem_create *args = data;
>>       uint64_t flags = args->in.domain_flags;
>>       uint64_t size = args->in.bo_size;
>> +    struct reservation_object *resv = NULL;
>>       struct drm_gem_object *gobj;
>>       uint32_t handle;
>>       int r;
>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>> *dev, void *data,
>>       if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>                 AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>                 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>> +              AMDGPU_GEM_CREATE_LOCAL))
>>           return -EINVAL;
>>         /* reject invalid gem domains */
>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>> *dev, void *data,
>>       }
>>       size = roundup(size, PAGE_SIZE);
>>   +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>> +        if (r)
>> +            return r;
>> +
>> +        resv = vm->root.base.bo->tbo.resv;
>> +    }
>> +
>>       r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>                        (u32)(0xffffffff & args->in.domains),
>> -                     flags, false, &gobj);
>> +                     flags, false, resv, &gobj);
>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +        if (!r) {
>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>> +
>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>> +        }
>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>> +    }
>>       if (r)
>>           return r;
>>   @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device 
>> *dev, void *data,
>>       }
>>         /* create a gem object to contain this object in */
>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>> -                     0, &gobj);
>> +    r = amdgpu_gem_object_create(adev, args->size, 0, 
>> AMDGPU_GEM_DOMAIN_CPU,
>> +                     0, 0, NULL, &gobj);
>>       if (r)
>>           return r;
>>   @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>> void *data,
>>       struct amdgpu_bo_list_entry vm_pd;
>>       struct ttm_validate_buffer tv;
>>       struct ww_acquire_ctx ticket;
>> -    struct list_head list;
>> +    struct list_head list, duplicates;
>>       uint64_t va_flags;
>>       int r = 0;
>>   @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>> void *data,
>>       }
>>         INIT_LIST_HEAD(&list);
>> +    INIT_LIST_HEAD(&duplicates);
>>       if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>           !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>           gobj = drm_gem_object_lookup(filp, args->handle);
>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>> void *data,
>>         amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>       if (r)
>>           goto error_unref;
>>   @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>> void *data,
>>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>               struct drm_file *filp)
>>   {
>> +    struct amdgpu_device *adev = dev->dev_private;
>>       struct drm_amdgpu_gem_op *args = data;
>>       struct drm_gem_object *gobj;
>>       struct amdgpu_bo *robj;
>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, 
>> void *data,
>>           if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>               robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>   +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>> +
>>           amdgpu_bo_unreserve(robj);
>>           break;
>>       default:
>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file 
>> *file_priv,
>>       r = amdgpu_gem_object_create(adev, args->size, 0,
>>                        AMDGPU_GEM_DOMAIN_VRAM,
>>                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>> -                     ttm_bo_type_device,
>> -                     &gobj);
>> +                     false, NULL, &gobj);
>>       if (r)
>>           return -ENOMEM;
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> index 5b3f928..f407499 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct 
>> drm_device *dev,
>>   {
>>       struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>   -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>           return ERR_PTR(-EPERM);
>>         return drm_gem_prime_export(dev, gobj, flags);
>> diff --git a/include/uapi/drm/amdgpu_drm.h 
>> b/include/uapi/drm/amdgpu_drm.h
>> index d0ee739..05241a6 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -89,6 +89,8 @@ extern "C" {
>>   #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>   /* Flag that allocating the BO should use linear VRAM */
>>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>> +/* Flag that BO is local in the VM */
>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>     struct drm_amdgpu_gem_create_in  {
>>       /** the requested memory size */
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]             ` <9304342a-def2-187e-4e9c-d872c58cdc17-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-25 16:22               ` Marek Olšák
       [not found]                 ` <CAAxE2A5dSR-PY+zZ3VeaT7iiCmj5jfty0hv7XZjz4HgOrApQHw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Marek Olšák @ 2017-08-25 16:22 UTC (permalink / raw)
  To: Christian König
  Cc: zhoucm1, Olsak, Marek, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Fri, Aug 25, 2017 at 3:00 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>
>>
>>
>> On 2017年08月25日 17:38, Christian König wrote:
>>>
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>>
>>> Still WIP since not all corner cases are tested yet, but this reduces
>>> average
>>> CS overhead for 10K BOs from 21ms down to 48us.
>>
>> Wow, cheers, eventually you get per vm bo to same reservation with PD/pts,
>> indeed save a lot of bo list.
>
>
> Don't cheer to loud yet, that is a completely constructed test case.
>
> So far I wasn't able to archive any improvements with any real game on this
> with Mesa.
>
> BTW: Marek can you take a look with some CPU bound tests? I can provide a
> kernel branch if necessary.

Do you have a branch that works on Raven? This patch series doesn't,
and I didn't investigate why.

Marek

>
> Regards,
> Christian.
>
>
>> overall looks good, I will take a detailed check for this tomorrow.
>>
>> Regards,
>> David Zhou
>>>
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59
>>> ++++++++++++++++++++++---------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>>>   5 files changed, 51 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index b1e817c..21cab36 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>    */
>>>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long
>>> size,
>>> -                int alignment, u32 initial_domain,
>>> -                u64 flags, bool kernel,
>>> -                struct drm_gem_object **obj);
>>> +                 int alignment, u32 initial_domain,
>>> +                 u64 flags, bool kernel,
>>> +                 struct reservation_object *resv,
>>> +                 struct drm_gem_object **obj);
>>>     int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>                   struct drm_device *dev,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> index 0e907ea..7256f83 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>> amdgpu_fbdev *rfbdev,
>>>                          AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>                          AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>                          AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>> -                       true, &gobj);
>>> +                       true, NULL, &gobj);
>>>       if (ret) {
>>>           pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>>           return -ENOMEM;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d028806..b8e8d67 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object
>>> *gobj)
>>>   }
>>>     int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned
>>> long size,
>>> -                int alignment, u32 initial_domain,
>>> -                u64 flags, bool kernel,
>>> -                struct drm_gem_object **obj)
>>> +                 int alignment, u32 initial_domain,
>>> +                 u64 flags, bool kernel,
>>> +                 struct reservation_object *resv,
>>> +                 struct drm_gem_object **obj)
>>>   {
>>> -    struct amdgpu_bo *robj;
>>> +    struct amdgpu_bo *bo;
>>>       int r;
>>>         *obj = NULL;
>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>> *adev, unsigned long size,
>>>     retry:
>>>       r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>>> -                 flags, NULL, NULL, 0, &robj);
>>> +                 flags, NULL, resv, 0, &bo);
>>>       if (r) {
>>>           if (r != -ERESTARTSYS) {
>>>               if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>> *adev, unsigned long size,
>>>           }
>>>           return r;
>>>       }
>>> -    *obj = &robj->gem_base;
>>> +    *obj = &bo->gem_base;
>>>         return 0;
>>>   }
>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>> *obj,
>>>       struct amdgpu_vm *vm = &fpriv->vm;
>>>         struct amdgpu_bo_list_entry vm_pd;
>>> -    struct list_head list;
>>> +    struct list_head list, duplicates;
>>>       struct ttm_validate_buffer tv;
>>>       struct ww_acquire_ctx ticket;
>>>       struct amdgpu_bo_va *bo_va;
>>>       int r;
>>>         INIT_LIST_HEAD(&list);
>>> +    INIT_LIST_HEAD(&duplicates);
>>>         tv.bo = &bo->tbo;
>>>       tv.shared = true;
>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>> *obj,
>>>         amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>       if (r) {
>>>           dev_err(adev->dev, "leaking bo va because "
>>>               "we fail to reserve bo (%d)\n", r);
>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>                   struct drm_file *filp)
>>>   {
>>>       struct amdgpu_device *adev = dev->dev_private;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>       union drm_amdgpu_gem_create *args = data;
>>>       uint64_t flags = args->in.domain_flags;
>>>       uint64_t size = args->in.bo_size;
>>> +    struct reservation_object *resv = NULL;
>>>       struct drm_gem_object *gobj;
>>>       uint32_t handle;
>>>       int r;
>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>       if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>                 AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>                 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>> +              AMDGPU_GEM_CREATE_LOCAL))
>>>           return -EINVAL;
>>>         /* reject invalid gem domains */
>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>> void *data,
>>>       }
>>>       size = roundup(size, PAGE_SIZE);
>>>   +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>> +        if (r)
>>> +            return r;
>>> +
>>> +        resv = vm->root.base.bo->tbo.resv;
>>> +    }
>>> +
>>>       r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>                        (u32)(0xffffffff & args->in.domains),
>>> -                     flags, false, &gobj);
>>> +                     flags, false, resv, &gobj);
>>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>> +        if (!r) {
>>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>> +
>>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>> +        }
>>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>>> +    }
>>>       if (r)
>>>           return r;
>>>   @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>> *dev, void *data,
>>>       }
>>>         /* create a gem object to contain this object in */
>>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>>> -                     0, &gobj);
>>> +    r = amdgpu_gem_object_create(adev, args->size, 0,
>>> AMDGPU_GEM_DOMAIN_CPU,
>>> +                     0, 0, NULL, &gobj);
>>>       if (r)
>>>           return r;
>>>   @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>       struct amdgpu_bo_list_entry vm_pd;
>>>       struct ttm_validate_buffer tv;
>>>       struct ww_acquire_ctx ticket;
>>> -    struct list_head list;
>>> +    struct list_head list, duplicates;
>>>       uint64_t va_flags;
>>>       int r = 0;
>>>   @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>       }
>>>         INIT_LIST_HEAD(&list);
>>> +    INIT_LIST_HEAD(&duplicates);
>>>       if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>           !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>           gobj = drm_gem_object_lookup(filp, args->handle);
>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
>>> *data,
>>>         amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>   -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>       if (r)
>>>           goto error_unref;
>>>   @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>               struct drm_file *filp)
>>>   {
>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>       struct drm_amdgpu_gem_op *args = data;
>>>       struct drm_gem_object *gobj;
>>>       struct amdgpu_bo *robj;
>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void
>>> *data,
>>>           if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>               robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>   +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>>> +
>>>           amdgpu_bo_unreserve(robj);
>>>           break;
>>>       default:
>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>> *file_priv,
>>>       r = amdgpu_gem_object_create(adev, args->size, 0,
>>>                        AMDGPU_GEM_DOMAIN_VRAM,
>>>                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>> -                     ttm_bo_type_device,
>>> -                     &gobj);
>>> +                     false, NULL, &gobj);
>>>       if (r)
>>>           return -ENOMEM;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> index 5b3f928..f407499 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>> drm_device *dev,
>>>   {
>>>       struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>   -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>           return ERR_PTR(-EPERM);
>>>         return drm_gem_prime_export(dev, gobj, flags);
>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>> b/include/uapi/drm/amdgpu_drm.h
>>> index d0ee739..05241a6 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -89,6 +89,8 @@ extern "C" {
>>>   #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>>   /* Flag that allocating the BO should use linear VRAM */
>>>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>>> +/* Flag that BO is local in the VM */
>>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>>     struct drm_amdgpu_gem_create_in  {
>>>       /** the requested memory size */
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]                 ` <CAAxE2A5dSR-PY+zZ3VeaT7iiCmj5jfty0hv7XZjz4HgOrApQHw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-08-25 19:19                   ` Christian König
       [not found]                     ` <e7d5f9d9-ed3e-2654-9acd-c7339976006f-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-25 19:19 UTC (permalink / raw)
  To: Marek Olšák
  Cc: zhoucm1, Olsak, Marek, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 25.08.2017 um 18:22 schrieb Marek Olšák:
> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>
>>>
>>> On 2017年08月25日 17:38, Christian König wrote:
>>>> From: Christian König <christian.koenig@amd.com>
>>>>
>>>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>>>
>>>> Still WIP since not all corner cases are tested yet, but this reduces
>>>> average
>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>> Wow, cheers, eventually you get per vm bo to same reservation with PD/pts,
>>> indeed save a lot of bo list.
>>
>> Don't cheer to loud yet, that is a completely constructed test case.
>>
>> So far I wasn't able to archive any improvements with any real game on this
>> with Mesa.
>>
>> BTW: Marek can you take a look with some CPU bound tests? I can provide a
>> kernel branch if necessary.
> Do you have a branch that works on Raven? This patch series doesn't,
> and I didn't investigate why.

I will come up with one on Monday.

Have a nice weekend guys,
Christian.

>
> Marek
>
>> Regards,
>> Christian.
>>
>>
>>> overall looks good, I will take a detailed check for this tomorrow.
>>>
>>> Regards,
>>> David Zhou
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59
>>>> ++++++++++++++++++++++---------
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>>>    include/uapi/drm/amdgpu_drm.h             |  2 ++
>>>>    5 files changed, 51 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index b1e817c..21cab36 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>>     */
>>>>    void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>>    int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long
>>>> size,
>>>> -                int alignment, u32 initial_domain,
>>>> -                u64 flags, bool kernel,
>>>> -                struct drm_gem_object **obj);
>>>> +                 int alignment, u32 initial_domain,
>>>> +                 u64 flags, bool kernel,
>>>> +                 struct reservation_object *resv,
>>>> +                 struct drm_gem_object **obj);
>>>>      int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>>                    struct drm_device *dev,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> index 0e907ea..7256f83 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>>> amdgpu_fbdev *rfbdev,
>>>>                           AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>                           AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>>                           AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>>> -                       true, &gobj);
>>>> +                       true, NULL, &gobj);
>>>>        if (ret) {
>>>>            pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>>>            return -ENOMEM;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> index d028806..b8e8d67 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object
>>>> *gobj)
>>>>    }
>>>>      int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned
>>>> long size,
>>>> -                int alignment, u32 initial_domain,
>>>> -                u64 flags, bool kernel,
>>>> -                struct drm_gem_object **obj)
>>>> +                 int alignment, u32 initial_domain,
>>>> +                 u64 flags, bool kernel,
>>>> +                 struct reservation_object *resv,
>>>> +                 struct drm_gem_object **obj)
>>>>    {
>>>> -    struct amdgpu_bo *robj;
>>>> +    struct amdgpu_bo *bo;
>>>>        int r;
>>>>          *obj = NULL;
>>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>> *adev, unsigned long size,
>>>>      retry:
>>>>        r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>>>> -                 flags, NULL, NULL, 0, &robj);
>>>> +                 flags, NULL, resv, 0, &bo);
>>>>        if (r) {
>>>>            if (r != -ERESTARTSYS) {
>>>>                if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>> *adev, unsigned long size,
>>>>            }
>>>>            return r;
>>>>        }
>>>> -    *obj = &robj->gem_base;
>>>> +    *obj = &bo->gem_base;
>>>>          return 0;
>>>>    }
>>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>>> *obj,
>>>>        struct amdgpu_vm *vm = &fpriv->vm;
>>>>          struct amdgpu_bo_list_entry vm_pd;
>>>> -    struct list_head list;
>>>> +    struct list_head list, duplicates;
>>>>        struct ttm_validate_buffer tv;
>>>>        struct ww_acquire_ctx ticket;
>>>>        struct amdgpu_bo_va *bo_va;
>>>>        int r;
>>>>          INIT_LIST_HEAD(&list);
>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>          tv.bo = &bo->tbo;
>>>>        tv.shared = true;
>>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object
>>>> *obj,
>>>>          amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>>        if (r) {
>>>>            dev_err(adev->dev, "leaking bo va because "
>>>>                "we fail to reserve bo (%d)\n", r);
>>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>                    struct drm_file *filp)
>>>>    {
>>>>        struct amdgpu_device *adev = dev->dev_private;
>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>>        union drm_amdgpu_gem_create *args = data;
>>>>        uint64_t flags = args->in.domain_flags;
>>>>        uint64_t size = args->in.bo_size;
>>>> +    struct reservation_object *resv = NULL;
>>>>        struct drm_gem_object *gobj;
>>>>        uint32_t handle;
>>>>        int r;
>>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>        if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>                  AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>>                  AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>>> +              AMDGPU_GEM_CREATE_LOCAL))
>>>>            return -EINVAL;
>>>>          /* reject invalid gem domains */
>>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>        }
>>>>        size = roundup(size, PAGE_SIZE);
>>>>    +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>>> +        if (r)
>>>> +            return r;
>>>> +
>>>> +        resv = vm->root.base.bo->tbo.resv;
>>>> +    }
>>>> +
>>>>        r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>>                         (u32)(0xffffffff & args->in.domains),
>>>> -                     flags, false, &gobj);
>>>> +                     flags, false, resv, &gobj);
>>>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>> +        if (!r) {
>>>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>>> +
>>>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>>> +        }
>>>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>>>> +    }
>>>>        if (r)
>>>>            return r;
>>>>    @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>>> *dev, void *data,
>>>>        }
>>>>          /* create a gem object to contain this object in */
>>>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>>>> -                     0, &gobj);
>>>> +    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>> AMDGPU_GEM_DOMAIN_CPU,
>>>> +                     0, 0, NULL, &gobj);
>>>>        if (r)
>>>>            return r;
>>>>    @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>        struct amdgpu_bo_list_entry vm_pd;
>>>>        struct ttm_validate_buffer tv;
>>>>        struct ww_acquire_ctx ticket;
>>>> -    struct list_head list;
>>>> +    struct list_head list, duplicates;
>>>>        uint64_t va_flags;
>>>>        int r = 0;
>>>>    @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>        }
>>>>          INIT_LIST_HEAD(&list);
>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>        if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>>            !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>>            gobj = drm_gem_object_lookup(filp, args->handle);
>>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
>>>> *data,
>>>>          amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>>        if (r)
>>>>            goto error_unref;
>>>>    @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void *data,
>>>>    int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>>                struct drm_file *filp)
>>>>    {
>>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>>        struct drm_amdgpu_gem_op *args = data;
>>>>        struct drm_gem_object *gobj;
>>>>        struct amdgpu_bo *robj;
>>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void
>>>> *data,
>>>>            if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>>                robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>>    +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>>>> +
>>>>            amdgpu_bo_unreserve(robj);
>>>>            break;
>>>>        default:
>>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>>> *file_priv,
>>>>        r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>                         AMDGPU_GEM_DOMAIN_VRAM,
>>>>                         AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>>> -                     ttm_bo_type_device,
>>>> -                     &gobj);
>>>> +                     false, NULL, &gobj);
>>>>        if (r)
>>>>            return -ENOMEM;
>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> index 5b3f928..f407499 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>>> drm_device *dev,
>>>>    {
>>>>        struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>    -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>>            return ERR_PTR(-EPERM);
>>>>          return drm_gem_prime_export(dev, gobj, flags);
>>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>>> b/include/uapi/drm/amdgpu_drm.h
>>>> index d0ee739..05241a6 100644
>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>> @@ -89,6 +89,8 @@ extern "C" {
>>>>    #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>>>    /* Flag that allocating the BO should use linear VRAM */
>>>>    #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>>>> +/* Flag that BO is local in the VM */
>>>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>>>      struct drm_amdgpu_gem_create_in  {
>>>>        /** the requested memory size */
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]     ` <1503653899-1781-9-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-25 10:32       ` zhoucm1
@ 2017-08-25 21:31       ` Felix Kuehling
       [not found]         ` <3039e134-7ee0-792a-b8ad-f01f86bc1164-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 30+ messages in thread
From: Felix Kuehling @ 2017-08-25 21:31 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

That's clever. I was scratching my head where the BOs were getting
validated, just by sharing the VM reservation object. Until I carefully
read your previous commit.

In general, I find the VM code extremely frustrating and confusing to
review. Too many lists of different things, and it's really hard to keep
track of which list track what type of object, in which situation.

For example, take struct amdgpu_vm:

	/* BOs who needs a validation */
	struct list_head	evicted;

	/* BOs moved, but not yet updated in the PT */
	struct list_head        moved;

	/* BO mappings freed, but not yet updated in the PT */
	struct list_head        freed;

Three lists of BOs (according to the comments). But evicted and moved
are lists of amdgpu_vm_bo_base, freed is a list of amdgpu_bo_va_mapping.
moved and freed are used for tracking BOs mapped in the VM. I think
moved may also track page table BOs, but I'm not sure. evicted is used
only for tracking page table BOs.

In patch #7 you add relocated to the mix. Now it gets really funny.
What's the difference between relocated and evicted and moved? It seems
PT BOs can be on any of these lists. I think evicted means the BO needs
to be validated. moved or relocated means it's been validated but its
mappings must be updated. For PT BOs and mapped BOs that means different
things, so it makes sense to have different lists for them. But I think
PT BOs can also end up on the moved list when amdgpu_vm_bo_invalidate is
called for a page table BO (through amdgpu_bo_move_notify). So I'm still
confused.

I think this could be clarified with more descriptive names for the
lists. If PT BOs and mapped BOs must be tracked separately that should
be clear from the names.

</rant>

Regards,
  Felix


On 2017-08-25 05:38 AM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Add the IOCTL interface so that applications can allocate per VM BOs.
>
> Still WIP since not all corner cases are tested yet, but this reduces average
> CS overhead for 10K BOs from 21ms down to 48us.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 ++++++++++++++++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>  include/uapi/drm/amdgpu_drm.h             |  2 ++
>  5 files changed, 51 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index b1e817c..21cab36 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>   */
>  void amdgpu_gem_force_release(struct amdgpu_device *adev);
>  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj);
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj);
>  
>  int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>  			    struct drm_device *dev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> index 0e907ea..7256f83 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
>  				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>  				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>  				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
> -				       true, &gobj);
> +				       true, NULL, &gobj);
>  	if (ret) {
>  		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>  		return -ENOMEM;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d028806..b8e8d67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>  }
>  
>  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> -				int alignment, u32 initial_domain,
> -				u64 flags, bool kernel,
> -				struct drm_gem_object **obj)
> +			     int alignment, u32 initial_domain,
> +			     u64 flags, bool kernel,
> +			     struct reservation_object *resv,
> +			     struct drm_gem_object **obj)
>  {
> -	struct amdgpu_bo *robj;
> +	struct amdgpu_bo *bo;
>  	int r;
>  
>  	*obj = NULL;
> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>  
>  retry:
>  	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
> -			     flags, NULL, NULL, 0, &robj);
> +			     flags, NULL, resv, 0, &bo);
>  	if (r) {
>  		if (r != -ERESTARTSYS) {
>  			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>  		}
>  		return r;
>  	}
> -	*obj = &robj->gem_base;
> +	*obj = &bo->gem_base;
>  
>  	return 0;
>  }
> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>  	struct amdgpu_vm *vm = &fpriv->vm;
>  
>  	struct amdgpu_bo_list_entry vm_pd;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>  	struct ttm_validate_buffer tv;
>  	struct ww_acquire_ctx ticket;
>  	struct amdgpu_bo_va *bo_va;
>  	int r;
>  
>  	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>  
>  	tv.bo = &bo->tbo;
>  	tv.shared = true;
> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>  
>  	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>  
> -	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>  	if (r) {
>  		dev_err(adev->dev, "leaking bo va because "
>  			"we fail to reserve bo (%d)\n", r);
> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>  			    struct drm_file *filp)
>  {
>  	struct amdgpu_device *adev = dev->dev_private;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	struct amdgpu_vm *vm = &fpriv->vm;
>  	union drm_amdgpu_gem_create *args = data;
>  	uint64_t flags = args->in.domain_flags;
>  	uint64_t size = args->in.bo_size;
> +	struct reservation_object *resv = NULL;
>  	struct drm_gem_object *gobj;
>  	uint32_t handle;
>  	int r;
> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>  	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>  		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>  		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
> -		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
> +		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
> +		      AMDGPU_GEM_CREATE_LOCAL))
>  		return -EINVAL;
>  
>  	/* reject invalid gem domains */
> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>  	}
>  	size = roundup(size, PAGE_SIZE);
>  
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		r = amdgpu_bo_reserve(vm->root.base.bo, false);
> +		if (r)
> +			return r;
> +
> +		resv = vm->root.base.bo->tbo.resv;
> +	}
> +
>  	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>  				     (u32)(0xffffffff & args->in.domains),
> -				     flags, false, &gobj);
> +				     flags, false, resv, &gobj);
> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
> +		if (!r) {
> +			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
> +
> +			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
> +		}
> +		amdgpu_bo_unreserve(vm->root.base.bo);
> +	}
>  	if (r)
>  		return r;
>  
> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	/* create a gem object to contain this object in */
> -	r = amdgpu_gem_object_create(adev, args->size, 0,
> -				     AMDGPU_GEM_DOMAIN_CPU, 0,
> -				     0, &gobj);
> +	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
> +				     0, 0, NULL, &gobj);
>  	if (r)
>  		return r;
>  
> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  	struct amdgpu_bo_list_entry vm_pd;
>  	struct ttm_validate_buffer tv;
>  	struct ww_acquire_ctx ticket;
> -	struct list_head list;
> +	struct list_head list, duplicates;
>  	uint64_t va_flags;
>  	int r = 0;
>  
> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	INIT_LIST_HEAD(&list);
> +	INIT_LIST_HEAD(&duplicates);
>  	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>  	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>  		gobj = drm_gem_object_lookup(filp, args->handle);
> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  
>  	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>  
> -	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
> +	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>  	if (r)
>  		goto error_unref;
>  
> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *filp)
>  {
> +	struct amdgpu_device *adev = dev->dev_private;
>  	struct drm_amdgpu_gem_op *args = data;
>  	struct drm_gem_object *gobj;
>  	struct amdgpu_bo *robj;
> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>  		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>  			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>  
> +		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
> +			amdgpu_vm_bo_invalidate(adev, robj, true);
> +
>  		amdgpu_bo_unreserve(robj);
>  		break;
>  	default:
> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>  	r = amdgpu_gem_object_create(adev, args->size, 0,
>  				     AMDGPU_GEM_DOMAIN_VRAM,
>  				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
> -				     ttm_bo_type_device,
> -				     &gobj);
> +				     false, NULL, &gobj);
>  	if (r)
>  		return -ENOMEM;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index 5b3f928..f407499 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>  {
>  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>  
> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> +	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>  		return ERR_PTR(-EPERM);
>  
>  	return drm_gem_prime_export(dev, gobj, flags);
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index d0ee739..05241a6 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -89,6 +89,8 @@ extern "C" {
>  #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
>  /* Flag that allocating the BO should use linear VRAM */
>  #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
> +/* Flag that BO is local in the VM */
> +#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
>  
>  struct drm_amdgpu_gem_create_in  {
>  	/** the requested memory size */

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]         ` <3039e134-7ee0-792a-b8ad-f01f86bc1164-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-26 13:20           ` Christian König
  0 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2017-08-26 13:20 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 25.08.2017 um 23:31 schrieb Felix Kuehling:
> That's clever. I was scratching my head where the BOs were getting
> validated, just by sharing the VM reservation object. Until I carefully
> read your previous commit.

Yeah, didn't had time to properly comment on your last mail.

> In general, I find the VM code extremely frustrating and confusing to
> review. Too many lists of different things, and it's really hard to keep
> track of which list track what type of object, in which situation.
>
> For example, take struct amdgpu_vm:
>
> 	/* BOs who needs a validation */
> 	struct list_head	evicted;
>
> 	/* BOs moved, but not yet updated in the PT */
> 	struct list_head        moved;
>
> 	/* BO mappings freed, but not yet updated in the PT */
> 	struct list_head        freed;
>
> Three lists of BOs (according to the comments). But evicted and moved
> are lists of amdgpu_vm_bo_base, freed is a list of amdgpu_bo_va_mapping.
> moved and freed are used for tracking BOs mapped in the VM. I think
> moved may also track page table BOs, but I'm not sure. evicted is used
> only for tracking page table BOs.
>
> In patch #7 you add relocated to the mix. Now it gets really funny.
> What's the difference between relocated and evicted and moved?

Essentially nothing. I actually tried to merge them, but then realized 
that this would probably remove the ability to only update the 
directories and so most likely your KFD usage of that.

> It seems
> PT BOs can be on any of these lists. I think evicted means the BO needs
> to be validated. moved or relocated means it's been validated but its
> mappings must be updated. For PT BOs and mapped BOs that means different
> things, so it makes sense to have different lists for them. But I think
> PT BOs can also end up on the moved list when amdgpu_vm_bo_invalidate is
> called for a page table BO (through amdgpu_bo_move_notify). So I'm still
> confused.

amdgpu_vm_bo_invalidate() has logic to always put PDs and PTs on the 
relocated list and everything else on the moved list. So PDs/PTs should 
never end up on the moved list.

> I think this could be clarified with more descriptive names for the
> lists. If PT BOs and mapped BOs must be tracked separately that should
> be clear from the names.

That is a good idea, but in the long term I want to merge reloacted and 
moved list and then decide while walking the list what to do, but that 
is the next step I think.

Regards,
Christian.

>
> </rant>
>
> Regards,
>    Felix
>
>
> On 2017-08-25 05:38 AM, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>
>> Still WIP since not all corner cases are tested yet, but this reduces average
>> CS overhead for 10K BOs from 21ms down to 48us.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59 ++++++++++++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>   include/uapi/drm/amdgpu_drm.h             |  2 ++
>>   5 files changed, 51 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index b1e817c..21cab36 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>    */
>>   void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>> -				int alignment, u32 initial_domain,
>> -				u64 flags, bool kernel,
>> -				struct drm_gem_object **obj);
>> +			     int alignment, u32 initial_domain,
>> +			     u64 flags, bool kernel,
>> +			     struct reservation_object *resv,
>> +			     struct drm_gem_object **obj);
>>   
>>   int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>   			    struct drm_device *dev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> index 0e907ea..7256f83 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
>>   				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>   				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>   				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
>> -				       true, &gobj);
>> +				       true, NULL, &gobj);
>>   	if (ret) {
>>   		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
>>   		return -ENOMEM;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index d028806..b8e8d67 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>>   }
>>   
>>   int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>> -				int alignment, u32 initial_domain,
>> -				u64 flags, bool kernel,
>> -				struct drm_gem_object **obj)
>> +			     int alignment, u32 initial_domain,
>> +			     u64 flags, bool kernel,
>> +			     struct reservation_object *resv,
>> +			     struct drm_gem_object **obj)
>>   {
>> -	struct amdgpu_bo *robj;
>> +	struct amdgpu_bo *bo;
>>   	int r;
>>   
>>   	*obj = NULL;
>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>>   
>>   retry:
>>   	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
>> -			     flags, NULL, NULL, 0, &robj);
>> +			     flags, NULL, resv, 0, &bo);
>>   	if (r) {
>>   		if (r != -ERESTARTSYS) {
>>   			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>>   		}
>>   		return r;
>>   	}
>> -	*obj = &robj->gem_base;
>> +	*obj = &bo->gem_base;
>>   
>>   	return 0;
>>   }
>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>>   	struct amdgpu_vm *vm = &fpriv->vm;
>>   
>>   	struct amdgpu_bo_list_entry vm_pd;
>> -	struct list_head list;
>> +	struct list_head list, duplicates;
>>   	struct ttm_validate_buffer tv;
>>   	struct ww_acquire_ctx ticket;
>>   	struct amdgpu_bo_va *bo_va;
>>   	int r;
>>   
>>   	INIT_LIST_HEAD(&list);
>> +	INIT_LIST_HEAD(&duplicates);
>>   
>>   	tv.bo = &bo->tbo;
>>   	tv.shared = true;
>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>>   
>>   	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>   
>> -	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>> +	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>   	if (r) {
>>   		dev_err(adev->dev, "leaking bo va because "
>>   			"we fail to reserve bo (%d)\n", r);
>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>>   			    struct drm_file *filp)
>>   {
>>   	struct amdgpu_device *adev = dev->dev_private;
>> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +	struct amdgpu_vm *vm = &fpriv->vm;
>>   	union drm_amdgpu_gem_create *args = data;
>>   	uint64_t flags = args->in.domain_flags;
>>   	uint64_t size = args->in.bo_size;
>> +	struct reservation_object *resv = NULL;
>>   	struct drm_gem_object *gobj;
>>   	uint32_t handle;
>>   	int r;
>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>>   	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>   		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>   		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>> -		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
>> +		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
>> +		      AMDGPU_GEM_CREATE_LOCAL))
>>   		return -EINVAL;
>>   
>>   	/* reject invalid gem domains */
>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>>   	}
>>   	size = roundup(size, PAGE_SIZE);
>>   
>> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +		r = amdgpu_bo_reserve(vm->root.base.bo, false);
>> +		if (r)
>> +			return r;
>> +
>> +		resv = vm->root.base.bo->tbo.resv;
>> +	}
>> +
>>   	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>   				     (u32)(0xffffffff & args->in.domains),
>> -				     flags, false, &gobj);
>> +				     flags, false, resv, &gobj);
>> +	if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>> +		if (!r) {
>> +			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>> +
>> +			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>> +		}
>> +		amdgpu_bo_unreserve(vm->root.base.bo);
>> +	}
>>   	if (r)
>>   		return r;
>>   
>> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>>   	}
>>   
>>   	/* create a gem object to contain this object in */
>> -	r = amdgpu_gem_object_create(adev, args->size, 0,
>> -				     AMDGPU_GEM_DOMAIN_CPU, 0,
>> -				     0, &gobj);
>> +	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
>> +				     0, 0, NULL, &gobj);
>>   	if (r)
>>   		return r;
>>   
>> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>   	struct amdgpu_bo_list_entry vm_pd;
>>   	struct ttm_validate_buffer tv;
>>   	struct ww_acquire_ctx ticket;
>> -	struct list_head list;
>> +	struct list_head list, duplicates;
>>   	uint64_t va_flags;
>>   	int r = 0;
>>   
>> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>   	}
>>   
>>   	INIT_LIST_HEAD(&list);
>> +	INIT_LIST_HEAD(&duplicates);
>>   	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>   	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>   		gobj = drm_gem_object_lookup(filp, args->handle);
>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>   
>>   	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>   
>> -	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>> +	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>   	if (r)
>>   		goto error_unref;
>>   
>> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>   int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>   			struct drm_file *filp)
>>   {
>> +	struct amdgpu_device *adev = dev->dev_private;
>>   	struct drm_amdgpu_gem_op *args = data;
>>   	struct drm_gem_object *gobj;
>>   	struct amdgpu_bo *robj;
>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>   		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>   			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>   
>> +		if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>> +			amdgpu_vm_bo_invalidate(adev, robj, true);
>> +
>>   		amdgpu_bo_unreserve(robj);
>>   		break;
>>   	default:
>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>   	r = amdgpu_gem_object_create(adev, args->size, 0,
>>   				     AMDGPU_GEM_DOMAIN_VRAM,
>>   				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>> -				     ttm_bo_type_device,
>> -				     &gobj);
>> +				     false, NULL, &gobj);
>>   	if (r)
>>   		return -ENOMEM;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> index 5b3f928..f407499 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>   {
>>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>   
>> -	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>> +	    bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>   		return ERR_PTR(-EPERM);
>>   
>>   	return drm_gem_prime_export(dev, gobj, flags);
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index d0ee739..05241a6 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -89,6 +89,8 @@ extern "C" {
>>   #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
>>   /* Flag that allocating the BO should use linear VRAM */
>>   #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
>> +/* Flag that BO is local in the VM */
>> +#define AMDGPU_GEM_CREATE_LOCAL			(1 << 6)
>>   
>>   struct drm_amdgpu_gem_create_in  {
>>   	/** the requested memory size */
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]                     ` <e7d5f9d9-ed3e-2654-9acd-c7339976006f-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-27 10:03                       ` Christian König
       [not found]                         ` <fba30bfa-aa7c-d342-b4b6-85058f5db5bf-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-27 10:03 UTC (permalink / raw)
  To: Marek Olšák
  Cc: zhoucm1, Olsak, Marek, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

[-- Attachment #1: Type: text/plain, Size: 13402 bytes --]

Am 25.08.2017 um 21:19 schrieb Christian König:
> Am 25.08.2017 um 18:22 schrieb Marek Olšák:
>> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
>> <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> wrote:
>>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>>
>>>>
>>>> On 2017年08月25日 17:38, Christian König wrote:
>>>>> From: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>>>>>
>>>>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>>>>
>>>>> Still WIP since not all corner cases are tested yet, but this reduces
>>>>> average
>>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>>> Wow, cheers, eventually you get per vm bo to same reservation with 
>>>> PD/pts,
>>>> indeed save a lot of bo list.
>>>
>>> Don't cheer to loud yet, that is a completely constructed test case.
>>>
>>> So far I wasn't able to archive any improvements with any real game 
>>> on this
>>> with Mesa.
>>>
>>> BTW: Marek can you take a look with some CPU bound tests? I can 
>>> provide a
>>> kernel branch if necessary.
>> Do you have a branch that works on Raven? This patch series doesn't,
>> and I didn't investigate why.
>
> I will come up with one on Monday.

Branch vm_improvements on 
git://people.freedesktop.org/~deathsimple/linux together with the 
attached patch should work.

Only testing on Tonga, but that's based on amd-staging-4.12 and so 
should work on Raven as well. If not I still have a bug somewhere which 
needs to be fixed.

Thanks,
Christian.
>
> Have a nice weekend guys,
> Christian.
>
>>
>> Marek
>>
>>> Regards,
>>> Christian.
>>>
>>>
>>>> overall looks good, I will take a detailed check for this tomorrow.
>>>>
>>>> Regards,
>>>> David Zhou
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59
>>>>> ++++++++++++++++++++++---------
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>>>>    include/uapi/drm/amdgpu_drm.h             |  2 ++
>>>>>    5 files changed, 51 insertions(+), 22 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> index b1e817c..21cab36 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>>>     */
>>>>>    void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>>>    int amdgpu_gem_object_create(struct amdgpu_device *adev, 
>>>>> unsigned long
>>>>> size,
>>>>> -                int alignment, u32 initial_domain,
>>>>> -                u64 flags, bool kernel,
>>>>> -                struct drm_gem_object **obj);
>>>>> +                 int alignment, u32 initial_domain,
>>>>> +                 u64 flags, bool kernel,
>>>>> +                 struct reservation_object *resv,
>>>>> +                 struct drm_gem_object **obj);
>>>>>      int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>>>                    struct drm_device *dev,
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>> index 0e907ea..7256f83 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>>>> amdgpu_fbdev *rfbdev,
>>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>> AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>>>                           AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>>>> -                       true, &gobj);
>>>>> +                       true, NULL, &gobj);
>>>>>        if (ret) {
>>>>>            pr_err("failed to allocate framebuffer (%d)\n", 
>>>>> aligned_size);
>>>>>            return -ENOMEM;
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> index d028806..b8e8d67 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object
>>>>> *gobj)
>>>>>    }
>>>>>      int amdgpu_gem_object_create(struct amdgpu_device *adev, 
>>>>> unsigned
>>>>> long size,
>>>>> -                int alignment, u32 initial_domain,
>>>>> -                u64 flags, bool kernel,
>>>>> -                struct drm_gem_object **obj)
>>>>> +                 int alignment, u32 initial_domain,
>>>>> +                 u64 flags, bool kernel,
>>>>> +                 struct reservation_object *resv,
>>>>> +                 struct drm_gem_object **obj)
>>>>>    {
>>>>> -    struct amdgpu_bo *robj;
>>>>> +    struct amdgpu_bo *bo;
>>>>>        int r;
>>>>>          *obj = NULL;
>>>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>>> *adev, unsigned long size,
>>>>>      retry:
>>>>>        r = amdgpu_bo_create(adev, size, alignment, kernel, 
>>>>> initial_domain,
>>>>> -                 flags, NULL, NULL, 0, &robj);
>>>>> +                 flags, NULL, resv, 0, &bo);
>>>>>        if (r) {
>>>>>            if (r != -ERESTARTSYS) {
>>>>>                if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>>> *adev, unsigned long size,
>>>>>            }
>>>>>            return r;
>>>>>        }
>>>>> -    *obj = &robj->gem_base;
>>>>> +    *obj = &bo->gem_base;
>>>>>          return 0;
>>>>>    }
>>>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct 
>>>>> drm_gem_object
>>>>> *obj,
>>>>>        struct amdgpu_vm *vm = &fpriv->vm;
>>>>>          struct amdgpu_bo_list_entry vm_pd;
>>>>> -    struct list_head list;
>>>>> +    struct list_head list, duplicates;
>>>>>        struct ttm_validate_buffer tv;
>>>>>        struct ww_acquire_ctx ticket;
>>>>>        struct amdgpu_bo_va *bo_va;
>>>>>        int r;
>>>>>          INIT_LIST_HEAD(&list);
>>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>>          tv.bo = &bo->tbo;
>>>>>        tv.shared = true;
>>>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct 
>>>>> drm_gem_object
>>>>> *obj,
>>>>>          amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>>>        if (r) {
>>>>>            dev_err(adev->dev, "leaking bo va because "
>>>>>                "we fail to reserve bo (%d)\n", r);
>>>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>                    struct drm_file *filp)
>>>>>    {
>>>>>        struct amdgpu_device *adev = dev->dev_private;
>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>>>        union drm_amdgpu_gem_create *args = data;
>>>>>        uint64_t flags = args->in.domain_flags;
>>>>>        uint64_t size = args->in.bo_size;
>>>>> +    struct reservation_object *resv = NULL;
>>>>>        struct drm_gem_object *gobj;
>>>>>        uint32_t handle;
>>>>>        int r;
>>>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>        if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>>                  AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>>>                  AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>>>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>>>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>>>> +              AMDGPU_GEM_CREATE_LOCAL))
>>>>>            return -EINVAL;
>>>>>          /* reject invalid gem domains */
>>>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>        }
>>>>>        size = roundup(size, PAGE_SIZE);
>>>>>    +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +
>>>>> +        resv = vm->root.base.bo->tbo.resv;
>>>>> +    }
>>>>> +
>>>>>        r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>>>                         (u32)(0xffffffff & args->in.domains),
>>>>> -                     flags, false, &gobj);
>>>>> +                     flags, false, resv, &gobj);
>>>>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>>> +        if (!r) {
>>>>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>>>> +
>>>>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>>>> +        }
>>>>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>>>>> +    }
>>>>>        if (r)
>>>>>            return r;
>>>>>    @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>>>> *dev, void *data,
>>>>>        }
>>>>>          /* create a gem object to contain this object in */
>>>>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>>>>> -                     0, &gobj);
>>>>> +    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>> AMDGPU_GEM_DOMAIN_CPU,
>>>>> +                     0, 0, NULL, &gobj);
>>>>>        if (r)
>>>>>            return r;
>>>>>    @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>        struct amdgpu_bo_list_entry vm_pd;
>>>>>        struct ttm_validate_buffer tv;
>>>>>        struct ww_acquire_ctx ticket;
>>>>> -    struct list_head list;
>>>>> +    struct list_head list, duplicates;
>>>>>        uint64_t va_flags;
>>>>>        int r = 0;
>>>>>    @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>        }
>>>>>          INIT_LIST_HEAD(&list);
>>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>>        if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>>>            !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>>>            gobj = drm_gem_object_lookup(filp, args->handle);
>>>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>> *dev, void
>>>>> *data,
>>>>>          amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>>>        if (r)
>>>>>            goto error_unref;
>>>>>    @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>    int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>>>                struct drm_file *filp)
>>>>>    {
>>>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>>>        struct drm_amdgpu_gem_op *args = data;
>>>>>        struct drm_gem_object *gobj;
>>>>>        struct amdgpu_bo *robj;
>>>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device 
>>>>> *dev, void
>>>>> *data,
>>>>>            if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>>>                robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>>>    +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>>>>> +
>>>>>            amdgpu_bo_unreserve(robj);
>>>>>            break;
>>>>>        default:
>>>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>>>> *file_priv,
>>>>>        r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>>                         AMDGPU_GEM_DOMAIN_VRAM,
>>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>>>> -                     ttm_bo_type_device,
>>>>> -                     &gobj);
>>>>> +                     false, NULL, &gobj);
>>>>>        if (r)
>>>>>            return -ENOMEM;
>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> index 5b3f928..f407499 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>>>> drm_device *dev,
>>>>>    {
>>>>>        struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>>    -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>>>            return ERR_PTR(-EPERM);
>>>>>          return drm_gem_prime_export(dev, gobj, flags);
>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>>>> b/include/uapi/drm/amdgpu_drm.h
>>>>> index d0ee739..05241a6 100644
>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>> @@ -89,6 +89,8 @@ extern "C" {
>>>>>    #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>>>>    /* Flag that allocating the BO should use linear VRAM */
>>>>>    #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>>>>> +/* Flag that BO is local in the VM */
>>>>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>>>>      struct drm_amdgpu_gem_create_in  {
>>>>>        /** the requested memory size */
>>>>
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>


[-- Attachment #2: 0001-radeonsi-set-a-per-buffer-flag-that-disables-inter-p.patch --]
[-- Type: text/x-patch, Size: 11842 bytes --]

>From 6ef6fbf384808e11490ff77b3933aa27feb63f7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak-5C7GfCeVMHo@public.gmane.org>
Date: Tue, 18 Jul 2017 16:08:44 -0400
Subject: [PATCH] radeonsi: set a per-buffer flag that disables inter-process
 sharing (v3)

For lower overhead in the CS ioctl.
Winsys allocators are not used with interprocess-sharable resources.

v2: It shouldn't crash anymore, but the kernel will reject the new flag.
v3 (christian): Rename the flag, avoid sending those buffers in the BO list.
---
 src/gallium/drivers/radeon/r600_buffer_common.c |  7 +++++
 src/gallium/drivers/radeon/radeon_winsys.h      | 20 ++++++++++---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c       | 37 ++++++++++++++++---------
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.h       |  2 ++
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c       | 25 +++++++++++++----
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c   | 27 ++++++++++--------
 6 files changed, 84 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index dd1c209..2747ac4 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -167,6 +167,13 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
 			 RADEON_FLAG_GTT_WC;
 	}
 
+	/* Only displayable single-sample textures can be shared between
+	 * processes. */
+	if (res->b.b.target == PIPE_BUFFER ||
+	    res->b.b.nr_samples >= 2 ||
+	    rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY)
+		res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
 	/* If VRAM is just stolen system memory, allow both VRAM and
 	 * GTT, whichever has free space. If a buffer is evicted from
 	 * VRAM to GTT, it will stay there.
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index b00b144..f0a0a92 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -54,6 +54,7 @@ enum radeon_bo_flag { /* bitfield */
     RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
     RADEON_FLAG_NO_SUBALLOC =   (1 << 2),
     RADEON_FLAG_SPARSE =        (1 << 3),
+    RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
 };
 
 enum radeon_bo_usage { /* bitfield */
@@ -661,14 +662,19 @@ static inline unsigned radeon_flags_from_heap(enum radeon_heap heap)
 {
     switch (heap) {
     case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
-        return RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS;
+        return RADEON_FLAG_GTT_WC |
+               RADEON_FLAG_NO_CPU_ACCESS |
+               RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
     case RADEON_HEAP_VRAM:
     case RADEON_HEAP_VRAM_GTT:
     case RADEON_HEAP_GTT_WC:
-        return RADEON_FLAG_GTT_WC;
+        return RADEON_FLAG_GTT_WC |
+               RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
     case RADEON_HEAP_GTT:
     default:
-        return 0;
+        return RADEON_FLAG_NO_INTERPROCESS_SHARING;
     }
 }
 
@@ -700,8 +706,14 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain,
     /* NO_CPU_ACCESS implies VRAM only. */
     assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM);
 
+    /* Resources with interprocess sharing don't use any winsys allocators. */
+    if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING))
+        return -1;
+
     /* Unsupported flags: NO_SUBALLOC, SPARSE. */
-    if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS))
+    if (flags & ~(RADEON_FLAG_GTT_WC |
+                  RADEON_FLAG_NO_CPU_ACCESS |
+                  RADEON_FLAG_NO_INTERPROCESS_SHARING))
         return -1;
 
     switch (domain) {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 97bbe23..d76d441 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -38,6 +38,10 @@
 #include <stdio.h>
 #include <inttypes.h>
 
+#ifndef AMDGPU_GEM_CREATE_LOCAL
+#define AMDGPU_GEM_CREATE_LOCAL (1 << 6)
+#endif
+
 /* Set to 1 for verbose output showing committed sparse buffer ranges. */
 #define DEBUG_SPARSE_COMMITS 0
 
@@ -402,6 +406,8 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
    if (flags & RADEON_FLAG_GTT_WC)
       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+   if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)
+      request.flags |= AMDGPU_GEM_CREATE_LOCAL;
 
    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
    if (r) {
@@ -435,6 +441,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
    bo->u.real.va_handle = va_handle;
    bo->initial_domain = initial_domain;
    bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
+   bo->is_local = !!(request.flags & AMDGPU_GEM_CREATE_LOCAL);
 
    if (initial_domain & RADEON_DOMAIN_VRAM)
       ws->allocated_vram += align64(size, ws->info.gart_page_size);
@@ -1134,7 +1141,7 @@ amdgpu_bo_create(struct radeon_winsys *rws,
 {
    struct amdgpu_winsys *ws = amdgpu_winsys(rws);
    struct amdgpu_winsys_bo *bo;
-   unsigned usage = 0, pb_cache_bucket;
+   unsigned usage = 0, pb_cache_bucket = 0;
 
    /* VRAM implies WC. This is not optional. */
    assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC);
@@ -1189,19 +1196,23 @@ no_slab:
    size = align64(size, ws->info.gart_page_size);
    alignment = align(alignment, ws->info.gart_page_size);
 
-   int heap = radeon_get_heap_index(domain, flags);
-   assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
-   usage = 1 << heap; /* Only set one usage bit for each heap. */
+   bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
-   pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
-   assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
+   if (use_reusable_pool) {
+       int heap = radeon_get_heap_index(domain, flags);
+       assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
+       usage = 1 << heap; /* Only set one usage bit for each heap. */
 
-   /* Get a buffer from the cache. */
-   bo = (struct amdgpu_winsys_bo*)
-        pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
-                                pb_cache_bucket);
-   if (bo)
-      return &bo->base;
+       pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
+       assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
+
+       /* Get a buffer from the cache. */
+       bo = (struct amdgpu_winsys_bo*)
+            pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage,
+                                    pb_cache_bucket);
+       if (bo)
+          return &bo->base;
+   }
 
    /* Create a new one. */
    bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
@@ -1216,7 +1227,7 @@ no_slab:
          return NULL;
    }
 
-   bo->u.real.use_reusable_pool = true;
+   bo->u.real.use_reusable_pool = use_reusable_pool;
    return &bo->base;
 }
 
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 1311344..10b095d 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -115,6 +115,8 @@ struct amdgpu_winsys_bo {
    unsigned num_fences;
    unsigned max_fences;
    struct pipe_fence_handle **fences;
+
+   bool is_local;
 };
 
 struct amdgpu_slab {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 9cadfc4..7ec33c5 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -362,8 +362,9 @@ amdgpu_lookup_or_add_real_buffer(struct amdgpu_cs *acs, struct amdgpu_winsys_bo
 {
    struct amdgpu_cs_context *cs = acs->csc;
    unsigned hash;
-   int idx = amdgpu_lookup_buffer(cs, bo);
+   int idx;
 
+   idx = amdgpu_lookup_buffer(cs, bo);
    if (idx >= 0)
       return idx;
 
@@ -1123,6 +1124,8 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
       free(handles);
       mtx_unlock(&ws->global_bo_list_lock);
    } else {
+      unsigned num_handles;
+
       if (!amdgpu_add_sparse_backing_buffers(cs)) {
          r = -ENOMEM;
          goto bo_list_error;
@@ -1142,21 +1145,31 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
          }
       }
 
+      num_handles = 0;
       for (i = 0; i < cs->num_real_buffers; ++i) {
          struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
 
+	 if (buffer->bo->is_local)
+            continue;
+
          assert(buffer->u.real.priority_usage != 0);
 
-         cs->handles[i] = buffer->bo->bo;
-         cs->flags[i] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
+         cs->handles[num_handles] = buffer->bo->bo;
+         cs->flags[num_handles] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
+	 ++num_handles;
       }
 
       if (acs->ring_type == RING_GFX)
          ws->gfx_bo_list_counter += cs->num_real_buffers;
 
-      r = amdgpu_bo_list_create(ws->dev, cs->num_real_buffers,
-                                cs->handles, cs->flags,
-                                &cs->request.resources);
+      if (num_handles) {
+         r = amdgpu_bo_list_create(ws->dev, num_handles,
+                                   cs->handles, cs->flags,
+                                   &cs->request.resources);
+      } else {
+         r = 0;
+	 cs->request.resources = 0;
+      }
    }
 bo_list_error:
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 8027a5f..15e9d38 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -914,7 +914,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
 {
     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
     struct radeon_bo *bo;
-    unsigned usage = 0, pb_cache_bucket;
+    unsigned usage = 0, pb_cache_bucket = 0;
 
     assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
 
@@ -969,17 +969,22 @@ no_slab:
     size = align(size, ws->info.gart_page_size);
     alignment = align(alignment, ws->info.gart_page_size);
 
-    int heap = radeon_get_heap_index(domain, flags);
-    assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
-    usage = 1 << heap; /* Only set one usage bit for each heap. */
+    bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
-    pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
-    assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
+    /* Shared resources don't use cached heaps. */
+    if (use_reusable_pool) {
+        int heap = radeon_get_heap_index(domain, flags);
+        assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
+        usage = 1 << heap; /* Only set one usage bit for each heap. */
 
-    bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
-                                           usage, pb_cache_bucket));
-    if (bo)
-        return &bo->base;
+        pb_cache_bucket = radeon_get_pb_cache_bucket_index(heap);
+        assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets));
+
+        bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
+                                               usage, pb_cache_bucket));
+        if (bo)
+            return &bo->base;
+    }
 
     bo = radeon_create_bo(ws, size, alignment, usage, domain, flags,
                           pb_cache_bucket);
@@ -994,7 +999,7 @@ no_slab:
             return NULL;
     }
 
-    bo->u.real.use_reusable_pool = true;
+    bo->u.real.use_reusable_pool = use_reusable_pool;
 
     mtx_lock(&ws->bo_handles_mutex);
     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
-- 
2.7.4


[-- Attachment #3: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/9] drm/amdgpu: fix amdgpu_vm_bo_map trace point
       [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (7 preceding siblings ...)
  2017-08-25  9:38   ` [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface " Christian König
@ 2017-08-28  4:08   ` zhoucm1
  8 siblings, 0 replies; 30+ messages in thread
From: zhoucm1 @ 2017-08-28  4:08 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Reviewed-by: Chunming Zhou <david1.zhou@amd.com>


On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> That somehow got lost.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++
>   1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 7a0656c..c77689f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2110,6 +2110,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
>   
>   	if (flags & AMDGPU_PTE_PRT)
>   		amdgpu_vm_prt_get(adev);
> +	trace_amdgpu_vm_bo_map(bo_va, mapping);
>   
>   	return 0;
>   }
> @@ -2175,6 +2176,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
>   
>   	if (flags & AMDGPU_PTE_PRT)
>   		amdgpu_vm_prt_get(adev);
> +	trace_amdgpu_vm_bo_map(bo_va, mapping);
>   
>   	return 0;
>   }

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2/9] drm/amdgpu: fix and cleanup VM ready check
       [not found]     ` <1503653899-1781-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-28  4:09       ` zhoucm1
  0 siblings, 0 replies; 30+ messages in thread
From: zhoucm1 @ 2017-08-28  4:09 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Stop checking the mapped BO itself, cause that one is
> certainly not a page table.
>
> Additional to that move the code into amdgpu_vm.c
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 33 ++-------------------------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 32 ++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
>   3 files changed, 35 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 7171968..9b1b6bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -127,35 +127,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
>   	return 0;
>   }
>   
> -static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo)
> -{
> -	/* if anything is swapped out don't swap it in here,
> -	   just abort and wait for the next CS */
> -	if (!amdgpu_bo_gpu_accessible(bo))
> -		return -ERESTARTSYS;
> -
> -	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
> -		return -ERESTARTSYS;
> -
> -	return 0;
> -}
> -
> -static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev,
> -				struct amdgpu_vm *vm,
> -				struct list_head *list)
> -{
> -	struct ttm_validate_buffer *entry;
> -
> -	list_for_each_entry(entry, list, head) {
> -		struct amdgpu_bo *bo =
> -			container_of(entry->bo, struct amdgpu_bo, tbo);
> -		if (amdgpu_gem_vm_check(NULL, bo))
> -			return false;
> -	}
> -
> -	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL);
> -}
> -
>   void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   			     struct drm_file *file_priv)
>   {
> @@ -189,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   	if (bo_va && --bo_va->ref_count == 0) {
>   		amdgpu_vm_bo_rmv(adev, bo_va);
>   
> -		if (amdgpu_gem_vm_ready(adev, vm, &list)) {
> +		if (amdgpu_vm_ready(adev, vm)) {
>   			struct dma_fence *fence = NULL;
>   
>   			r = amdgpu_vm_clear_freed(adev, vm, &fence);
> @@ -513,7 +484,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>   {
>   	int r = -ERESTARTSYS;
>   
> -	if (!amdgpu_gem_vm_ready(adev, vm, list))
> +	if (!amdgpu_vm_ready(adev, vm))
>   		goto error;
>   
>   	r = amdgpu_vm_update_directories(adev, vm);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c77689f..f621dba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -232,6 +232,38 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   }
>   
>   /**
> + * amdgpu_vm_check - helper for amdgpu_vm_ready
> + */
> +static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
> +{
> +	/* if anything is swapped out don't swap it in here,
> +	   just abort and wait for the next CS */
> +	if (!amdgpu_bo_gpu_accessible(bo))
> +		return -ERESTARTSYS;
> +
> +	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
> +		return -ERESTARTSYS;
> +
> +	return 0;
> +}
> +
> +/**
> + * amdgpu_vm_ready - check VM is ready for updates
> + *
> + * @adev: amdgpu device
> + * @vm: VM to check
> + *
> + * Check if all VM PDs/PTs are ready for updates
> + */
> +bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +{
> +	if (amdgpu_vm_check(NULL, vm->root.bo))
> +		return false;
> +
> +	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
> +}
> +
> +/**
>    * amdgpu_vm_alloc_levels - allocate the PD/PT levels
>    *
>    * @adev: amdgpu_device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index ba6691b..9347d28 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -225,6 +225,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>   void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>   			 struct list_head *validated,
>   			 struct amdgpu_bo_list_entry *entry);
> +bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>   int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   			      int (*callback)(void *p, struct amdgpu_bo *bo),
>   			      void *param);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 3/9] drm/amdgpu: cleanup GWS, GDS and OA allocation
       [not found]     ` <1503653899-1781-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-28  4:10       ` zhoucm1
  0 siblings, 0 replies; 30+ messages in thread
From: zhoucm1 @ 2017-08-28  4:10 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Those are certainly not kernel allocations, instead set the NO_CPU_ACCESS flag.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 15 +++++++--------
>   1 file changed, 7 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9b1b6bd..ba01293 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -186,17 +186,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   {
>   	struct amdgpu_device *adev = dev->dev_private;
>   	union drm_amdgpu_gem_create *args = data;
> +	uint64_t flags = args->in.domain_flags;
>   	uint64_t size = args->in.bo_size;
>   	struct drm_gem_object *gobj;
>   	uint32_t handle;
> -	bool kernel = false;
>   	int r;
>   
>   	/* reject invalid gem flags */
> -	if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
> -				      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> -				      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
> -				      AMDGPU_GEM_CREATE_VRAM_CLEARED))
> +	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
> +		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> +		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
> +		      AMDGPU_GEM_CREATE_VRAM_CLEARED))
>   		return -EINVAL;
>   
>   	/* reject invalid gem domains */
> @@ -211,7 +211,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   	/* create a gem object to contain this object in */
>   	if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
>   	    AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
> -		kernel = true;
> +		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
>   		if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
>   			size = size << AMDGPU_GDS_SHIFT;
>   		else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
> @@ -225,8 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   
>   	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>   				     (u32)(0xffffffff & args->in.domains),
> -				     args->in.domain_flags,
> -				     kernel, &gobj);
> +				     flags, false, &gobj);
>   	if (r)
>   		return r;
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4/9] drm/amdgpu: add bo_va cleared flag again
       [not found]     ` <1503653899-1781-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-28  4:12       ` zhoucm1
       [not found]         ` <d076aae6-5620-dd91-f504-982abb2292dd-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: zhoucm1 @ 2017-08-28  4:12 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

I'm not sure this one, since you remove status lock for vm_status list, 
I think we need to test this carefully.

Regards,

David Zhou


On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> We changed this to use an extra list a while back, but for the next
> series I need a separate flag again.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  3 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 37 ++++++++++++++----------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  3 ---
>   3 files changed, 20 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index a288fa6..e613ba4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -55,6 +55,9 @@ struct amdgpu_bo_va {
>   	/* mappings for this bo_va */
>   	struct list_head		invalids;
>   	struct list_head		valids;
> +
> +	/* If the mappings are cleared or filled */
> +	bool				cleared;
>   };
>   
>   struct amdgpu_bo {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f621dba..16148ef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1787,10 +1787,13 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   	else
>   		flags = 0x0;
>   
> -	spin_lock(&vm->status_lock);
> -	if (!list_empty(&bo_va->base.vm_status))
> +	/* We access vm_status without the status lock here, but that is ok
> +	 * because when we don't clear the BO is locked and so the status can't
> +	 * change
> +	 */
> +	if ((!clear && !list_empty(&bo_va->base.vm_status)) ||
> +	    bo_va->cleared != clear)
>   		list_splice_init(&bo_va->valids, &bo_va->invalids);
> -	spin_unlock(&vm->status_lock);
>   
>   	list_for_each_entry(mapping, &bo_va->invalids, list) {
>   		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
> @@ -1800,25 +1803,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   			return r;
>   	}
>   
> -	if (trace_amdgpu_vm_bo_mapping_enabled()) {
> -		list_for_each_entry(mapping, &bo_va->valids, list)
> -			trace_amdgpu_vm_bo_mapping(mapping);
> -
> -		list_for_each_entry(mapping, &bo_va->invalids, list)
> -			trace_amdgpu_vm_bo_mapping(mapping);
> +	if (vm->use_cpu_for_update) {
> +		/* Flush HDP */
> +		mb();
> +		amdgpu_gart_flush_gpu_tlb(adev, 0);
>   	}
>   
>   	spin_lock(&vm->status_lock);
> -	list_splice_init(&bo_va->invalids, &bo_va->valids);
>   	list_del_init(&bo_va->base.vm_status);
> -	if (clear)
> -		list_add(&bo_va->base.vm_status, &vm->cleared);
>   	spin_unlock(&vm->status_lock);
>   
> -	if (vm->use_cpu_for_update) {
> -		/* Flush HDP */
> -		mb();
> -		amdgpu_gart_flush_gpu_tlb(adev, 0);
> +	list_splice_init(&bo_va->invalids, &bo_va->valids);
> +	bo_va->cleared = clear;
> +
> +	if (trace_amdgpu_vm_bo_mapping_enabled()) {
> +		list_for_each_entry(mapping, &bo_va->valids, list)
> +			trace_amdgpu_vm_bo_mapping(mapping);
>   	}
>   
>   	return 0;
> @@ -2419,9 +2419,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>   
>   	list_for_each_entry(bo_base, &bo->va, bo_list) {
>   		spin_lock(&bo_base->vm->status_lock);
> -		if (list_empty(&bo_base->vm_status))
> -			list_add(&bo_base->vm_status,
> -				 &bo_base->vm->moved);
> +		list_move(&bo_base->vm_status, &bo_base->vm->moved);
>   		spin_unlock(&bo_base->vm->status_lock);
>   	}
>   }
> @@ -2508,7 +2506,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		vm->reserved_vmid[i] = NULL;
>   	spin_lock_init(&vm->status_lock);
>   	INIT_LIST_HEAD(&vm->moved);
> -	INIT_LIST_HEAD(&vm->cleared);
>   	INIT_LIST_HEAD(&vm->freed);
>   
>   	/* create scheduler entity for page table updates */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 9347d28..e705f0f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -126,9 +126,6 @@ struct amdgpu_vm {
>   	/* BOs moved, but not yet updated in the PT */
>   	struct list_head	moved;
>   
> -	/* BOs cleared in the PT because of a move */
> -	struct list_head	cleared;
> -
>   	/* BO mappings freed, but not yet updated in the PT */
>   	struct list_head	freed;
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]                         ` <fba30bfa-aa7c-d342-b4b6-85058f5db5bf-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-28  4:21                           ` zhoucm1
       [not found]                             ` <a897738d-ab23-8b43-9b9e-c64f7da5e065-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: zhoucm1 @ 2017-08-28  4:21 UTC (permalink / raw)
  To: Christian König, Marek Olšák
  Cc: Olsak, Marek, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年08月27日 18:03, Christian König wrote:
> Am 25.08.2017 um 21:19 schrieb Christian König:
>> Am 25.08.2017 um 18:22 schrieb Marek Olšák:
>>> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>>>
>>>>>
>>>>> On 2017年08月25日 17:38, Christian König wrote:
>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>
>>>>>> Add the IOCTL interface so that applications can allocate per VM 
>>>>>> BOs.
>>>>>>
>>>>>> Still WIP since not all corner cases are tested yet, but this 
>>>>>> reduces
>>>>>> average
>>>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>>>> Wow, cheers, eventually you get per vm bo to same reservation with 
>>>>> PD/pts,
>>>>> indeed save a lot of bo list.
>>>>
>>>> Don't cheer to loud yet, that is a completely constructed test case.
>>>>
>>>> So far I wasn't able to archive any improvements with any real game 
>>>> on this
>>>> with Mesa.
With thinking more, too many BOs share one reservation, which could 
result in reservation lock often is busy, if eviction or destroy also 
happens often in the meaning time, then which could effect VM update and 
CS submission as well.

Anyway, this is very good start and try that we reduce CS overhead, 
especially we've seen "reduces average CS overhead for 10K BOs from 21ms 
down to 48us. ".

Regards,
David Zhou
>>>>
>>>> BTW: Marek can you take a look with some CPU bound tests? I can 
>>>> provide a
>>>> kernel branch if necessary.
>>> Do you have a branch that works on Raven? This patch series doesn't,
>>> and I didn't investigate why.
>>
>> I will come up with one on Monday.
>
> Branch vm_improvements on 
> git://people.freedesktop.org/~deathsimple/linux together with the 
> attached patch should work.
>
> Only testing on Tonga, but that's based on amd-staging-4.12 and so 
> should work on Raven as well. If not I still have a bug somewhere 
> which needs to be fixed.
>
> Thanks,
> Christian.
>>
>> Have a nice weekend guys,
>> Christian.
>>
>>>
>>> Marek
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>
>>>>> overall looks good, I will take a detailed check for this tomorrow.
>>>>>
>>>>> Regards,
>>>>> David Zhou
>>>>>>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> ---
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59
>>>>>> ++++++++++++++++++++++---------
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>>>>>    include/uapi/drm/amdgpu_drm.h             |  2 ++
>>>>>>    5 files changed, 51 insertions(+), 22 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> index b1e817c..21cab36 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>>>>     */
>>>>>>    void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>>>>    int amdgpu_gem_object_create(struct amdgpu_device *adev, 
>>>>>> unsigned long
>>>>>> size,
>>>>>> -                int alignment, u32 initial_domain,
>>>>>> -                u64 flags, bool kernel,
>>>>>> -                struct drm_gem_object **obj);
>>>>>> +                 int alignment, u32 initial_domain,
>>>>>> +                 u64 flags, bool kernel,
>>>>>> +                 struct reservation_object *resv,
>>>>>> +                 struct drm_gem_object **obj);
>>>>>>      int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>>>>                    struct drm_device *dev,
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>>> index 0e907ea..7256f83 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>>>>> amdgpu_fbdev *rfbdev,
>>>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>>> AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>>>> AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>>>>> -                       true, &gobj);
>>>>>> +                       true, NULL, &gobj);
>>>>>>        if (ret) {
>>>>>>            pr_err("failed to allocate framebuffer (%d)\n", 
>>>>>> aligned_size);
>>>>>>            return -ENOMEM;
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>> index d028806..b8e8d67 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct 
>>>>>> drm_gem_object
>>>>>> *gobj)
>>>>>>    }
>>>>>>      int amdgpu_gem_object_create(struct amdgpu_device *adev, 
>>>>>> unsigned
>>>>>> long size,
>>>>>> -                int alignment, u32 initial_domain,
>>>>>> -                u64 flags, bool kernel,
>>>>>> -                struct drm_gem_object **obj)
>>>>>> +                 int alignment, u32 initial_domain,
>>>>>> +                 u64 flags, bool kernel,
>>>>>> +                 struct reservation_object *resv,
>>>>>> +                 struct drm_gem_object **obj)
>>>>>>    {
>>>>>> -    struct amdgpu_bo *robj;
>>>>>> +    struct amdgpu_bo *bo;
>>>>>>        int r;
>>>>>>          *obj = NULL;
>>>>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>>>> *adev, unsigned long size,
>>>>>>      retry:
>>>>>>        r = amdgpu_bo_create(adev, size, alignment, kernel, 
>>>>>> initial_domain,
>>>>>> -                 flags, NULL, NULL, 0, &robj);
>>>>>> +                 flags, NULL, resv, 0, &bo);
>>>>>>        if (r) {
>>>>>>            if (r != -ERESTARTSYS) {
>>>>>>                if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>>>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>>>> *adev, unsigned long size,
>>>>>>            }
>>>>>>            return r;
>>>>>>        }
>>>>>> -    *obj = &robj->gem_base;
>>>>>> +    *obj = &bo->gem_base;
>>>>>>          return 0;
>>>>>>    }
>>>>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct 
>>>>>> drm_gem_object
>>>>>> *obj,
>>>>>>        struct amdgpu_vm *vm = &fpriv->vm;
>>>>>>          struct amdgpu_bo_list_entry vm_pd;
>>>>>> -    struct list_head list;
>>>>>> +    struct list_head list, duplicates;
>>>>>>        struct ttm_validate_buffer tv;
>>>>>>        struct ww_acquire_ctx ticket;
>>>>>>        struct amdgpu_bo_va *bo_va;
>>>>>>        int r;
>>>>>>          INIT_LIST_HEAD(&list);
>>>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>>>          tv.bo = &bo->tbo;
>>>>>>        tv.shared = true;
>>>>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct 
>>>>>> drm_gem_object
>>>>>> *obj,
>>>>>>          amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>>>>        if (r) {
>>>>>>            dev_err(adev->dev, "leaking bo va because "
>>>>>>                "we fail to reserve bo (%d)\n", r);
>>>>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct 
>>>>>> drm_device *dev,
>>>>>> void *data,
>>>>>>                    struct drm_file *filp)
>>>>>>    {
>>>>>>        struct amdgpu_device *adev = dev->dev_private;
>>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>>>>        union drm_amdgpu_gem_create *args = data;
>>>>>>        uint64_t flags = args->in.domain_flags;
>>>>>>        uint64_t size = args->in.bo_size;
>>>>>> +    struct reservation_object *resv = NULL;
>>>>>>        struct drm_gem_object *gobj;
>>>>>>        uint32_t handle;
>>>>>>        int r;
>>>>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>>>>>> *dev,
>>>>>> void *data,
>>>>>>        if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>>>                  AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>>>>                  AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>>>>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>>>>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>>>>> +              AMDGPU_GEM_CREATE_LOCAL))
>>>>>>            return -EINVAL;
>>>>>>          /* reject invalid gem domains */
>>>>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct 
>>>>>> drm_device *dev,
>>>>>> void *data,
>>>>>>        }
>>>>>>        size = roundup(size, PAGE_SIZE);
>>>>>>    +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>>>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>>>>> +        if (r)
>>>>>> +            return r;
>>>>>> +
>>>>>> +        resv = vm->root.base.bo->tbo.resv;
>>>>>> +    }
>>>>>> +
>>>>>>        r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>>>>                         (u32)(0xffffffff & args->in.domains),
>>>>>> -                     flags, false, &gobj);
>>>>>> +                     flags, false, resv, &gobj);
>>>>>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>>>> +        if (!r) {
>>>>>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>>>>> +
>>>>>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>>>>> +        }
>>>>>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>>>>>> +    }
>>>>>>        if (r)
>>>>>>            return r;
>>>>>>    @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct 
>>>>>> drm_device
>>>>>> *dev, void *data,
>>>>>>        }
>>>>>>          /* create a gem object to contain this object in */
>>>>>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>>>>>> -                     0, &gobj);
>>>>>> +    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>>> AMDGPU_GEM_DOMAIN_CPU,
>>>>>> +                     0, 0, NULL, &gobj);
>>>>>>        if (r)
>>>>>>            return r;
>>>>>>    @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>>> *dev,
>>>>>> void *data,
>>>>>>        struct amdgpu_bo_list_entry vm_pd;
>>>>>>        struct ttm_validate_buffer tv;
>>>>>>        struct ww_acquire_ctx ticket;
>>>>>> -    struct list_head list;
>>>>>> +    struct list_head list, duplicates;
>>>>>>        uint64_t va_flags;
>>>>>>        int r = 0;
>>>>>>    @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>>> *dev,
>>>>>> void *data,
>>>>>>        }
>>>>>>          INIT_LIST_HEAD(&list);
>>>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>>>        if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>>>>            !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>>>>            gobj = drm_gem_object_lookup(filp, args->handle);
>>>>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>>> *dev, void
>>>>>> *data,
>>>>>>          amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>>>>        if (r)
>>>>>>            goto error_unref;
>>>>>>    @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>>> *dev,
>>>>>> void *data,
>>>>>>    int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>>>>                struct drm_file *filp)
>>>>>>    {
>>>>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>>>>        struct drm_amdgpu_gem_op *args = data;
>>>>>>        struct drm_gem_object *gobj;
>>>>>>        struct amdgpu_bo *robj;
>>>>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device 
>>>>>> *dev, void
>>>>>> *data,
>>>>>>            if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>>>>                robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>>>>    +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>>>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>>>>>> +
>>>>>>            amdgpu_bo_unreserve(robj);
>>>>>>            break;
>>>>>>        default:
>>>>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>>>>> *file_priv,
>>>>>>        r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>>>                         AMDGPU_GEM_DOMAIN_VRAM,
>>>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>>>>> -                     ttm_bo_type_device,
>>>>>> -                     &gobj);
>>>>>> +                     false, NULL, &gobj);
>>>>>>        if (r)
>>>>>>            return -ENOMEM;
>>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>> index 5b3f928..f407499 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>>>>> drm_device *dev,
>>>>>>    {
>>>>>>        struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>>>    -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>>>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>>>>            return ERR_PTR(-EPERM);
>>>>>>          return drm_gem_prime_export(dev, gobj, flags);
>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>>>>> b/include/uapi/drm/amdgpu_drm.h
>>>>>> index d0ee739..05241a6 100644
>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>> @@ -89,6 +89,8 @@ extern "C" {
>>>>>>    #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>>>>>    /* Flag that allocating the BO should use linear VRAM */
>>>>>>    #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>>>>>> +/* Flag that BO is local in the VM */
>>>>>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>>>>>      struct drm_amdgpu_gem_create_in  {
>>>>>>        /** the requested memory size */
>>>>>
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>
>>>>
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 5/9] drm/amdgpu: rework moved handling in the VM
       [not found]     ` <1503653899-1781-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-28  6:51       ` zhoucm1
  0 siblings, 0 replies; 30+ messages in thread
From: zhoucm1 @ 2017-08-28  6:51 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

How do you keep the access of bo_base->moved is safely? I guess moving 
it to status_lock is better.


Regards,

David Zhou


On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Instead of using the vm_state use a separate flag to note
> that the BO was moved.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 +++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +++
>   2 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 16148ef..85189f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1787,13 +1787,13 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   	else
>   		flags = 0x0;
>   
> -	/* We access vm_status without the status lock here, but that is ok
> -	 * because when we don't clear the BO is locked and so the status can't
> -	 * change
> -	 */
> -	if ((!clear && !list_empty(&bo_va->base.vm_status)) ||
> -	    bo_va->cleared != clear)
> +	if (!clear && bo_va->base.moved) {
> +		bo_va->base.moved = false;
> +		list_splice_init(&bo_va->valids, &bo_va->invalids);
> +
> +	} else if (bo_va->cleared != clear) {
>   		list_splice_init(&bo_va->valids, &bo_va->invalids);
> +	}
>   
>   	list_for_each_entry(mapping, &bo_va->invalids, list) {
>   		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
> @@ -2418,6 +2418,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>   	struct amdgpu_vm_bo_base *bo_base;
>   
>   	list_for_each_entry(bo_base, &bo->va, bo_list) {
> +		bo_base->moved = true;
>   		spin_lock(&bo_base->vm->status_lock);
>   		list_move(&bo_base->vm_status, &bo_base->vm->moved);
>   		spin_unlock(&bo_base->vm->status_lock);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index e705f0f..ff093d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -105,6 +105,9 @@ struct amdgpu_vm_bo_base {
>   
>   	/* protected by spinlock */
>   	struct list_head		vm_status;
> +
> +	/* protected by the BO being reserved */
> +	bool				moved;
>   };
>   
>   struct amdgpu_vm_pt {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 6/9] drm/amdgpu: track evicted page tables v2
       [not found]     ` <1503653899-1781-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-28  6:58       ` zhoucm1
  0 siblings, 0 replies; 30+ messages in thread
From: zhoucm1 @ 2017-08-28  6:58 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Instead of validating all page tables when one was evicted,
> track which one needs a validation.
>
> v2: simplify amdgpu_vm_ready as well
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Acked-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |   7 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    |   8 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 227 +++++++++++++----------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  16 +-
>   5 files changed, 119 insertions(+), 141 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 3f46b5a..f68ac56 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -632,9 +632,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   
>   	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>   				     p->bytes_moved_vis);
> -	fpriv->vm.last_eviction_counter =
> -		atomic64_read(&p->adev->num_evictions);
> -
>   	if (p->bo_list) {
>   		struct amdgpu_bo *gds = p->bo_list->gds_obj;
>   		struct amdgpu_bo *gws = p->bo_list->gws_obj;
> @@ -826,7 +823,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
>   			if (!bo)
>   				continue;
>   
> -			amdgpu_vm_bo_invalidate(adev, bo);
> +			amdgpu_vm_bo_invalidate(adev, bo, false);
>   		}
>   	}
>   
> @@ -851,7 +848,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
>   	}
>   
>   	if (p->job->vm) {
> -		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
> +		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
>   
>   		r = amdgpu_bo_vm_update_pte(p);
>   		if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index ba01293..d028806 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -160,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>   	if (bo_va && --bo_va->ref_count == 0) {
>   		amdgpu_vm_bo_rmv(adev, bo_va);
>   
> -		if (amdgpu_vm_ready(adev, vm)) {
> +		if (amdgpu_vm_ready(vm)) {
>   			struct dma_fence *fence = NULL;
>   
>   			r = amdgpu_vm_clear_freed(adev, vm, &fence);
> @@ -481,10 +481,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>   				    struct list_head *list,
>   				    uint32_t operation)
>   {
> -	int r = -ERESTARTSYS;
> +	int r;
>   
> -	if (!amdgpu_vm_ready(adev, vm))
> -		goto error;
> +	if (!amdgpu_vm_ready(vm))
> +		return;
>   
>   	r = amdgpu_vm_update_directories(adev, vm);
>   	if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 9e495da..52d0109 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -929,7 +929,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>   		return;
>   
>   	abo = container_of(bo, struct amdgpu_bo, tbo);
> -	amdgpu_vm_bo_invalidate(adev, abo);
> +	amdgpu_vm_bo_invalidate(adev, abo, evict);
>   
>   	amdgpu_bo_kunmap(abo);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 85189f1..592c3e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -140,7 +140,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>   			 struct list_head *validated,
>   			 struct amdgpu_bo_list_entry *entry)
>   {
> -	entry->robj = vm->root.bo;
> +	entry->robj = vm->root.base.bo;
>   	entry->priority = 0;
>   	entry->tv.bo = &entry->robj->tbo;
>   	entry->tv.shared = true;
> @@ -149,61 +149,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>   }
>   
>   /**
> - * amdgpu_vm_validate_layer - validate a single page table level
> - *
> - * @parent: parent page table level
> - * @validate: callback to do the validation
> - * @param: parameter for the validation callback
> - *
> - * Validate the page table BOs on command submission if neccessary.
> - */
> -static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
> -				    int (*validate)(void *, struct amdgpu_bo *),
> -				    void *param, bool use_cpu_for_update,
> -				    struct ttm_bo_global *glob)
> -{
> -	unsigned i;
> -	int r;
> -
> -	if (use_cpu_for_update) {
> -		r = amdgpu_bo_kmap(parent->bo, NULL);
> -		if (r)
> -			return r;
> -	}
> -
> -	if (!parent->entries)
> -		return 0;
> -
> -	for (i = 0; i <= parent->last_entry_used; ++i) {
> -		struct amdgpu_vm_pt *entry = &parent->entries[i];
> -
> -		if (!entry->bo)
> -			continue;
> -
> -		r = validate(param, entry->bo);
> -		if (r)
> -			return r;
> -
> -		spin_lock(&glob->lru_lock);
> -		ttm_bo_move_to_lru_tail(&entry->bo->tbo);
> -		if (entry->bo->shadow)
> -			ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo);
> -		spin_unlock(&glob->lru_lock);
> -
> -		/*
> -		 * Recurse into the sub directory. This is harmless because we
> -		 * have only a maximum of 5 layers.
> -		 */
> -		r = amdgpu_vm_validate_level(entry, validate, param,
> -					     use_cpu_for_update, glob);
> -		if (r)
> -			return r;
> -	}
> -
> -	return r;
> -}
> -
> -/**
>    * amdgpu_vm_validate_pt_bos - validate the page table BOs
>    *
>    * @adev: amdgpu device pointer
> @@ -217,32 +162,43 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   			      int (*validate)(void *p, struct amdgpu_bo *bo),
>   			      void *param)
>   {
> -	uint64_t num_evictions;
> +	struct ttm_bo_global *glob = adev->mman.bdev.glob;
> +	int r;
>   
> -	/* We only need to validate the page tables
> -	 * if they aren't already valid.
> -	 */
> -	num_evictions = atomic64_read(&adev->num_evictions);
> -	if (num_evictions == vm->last_eviction_counter)
> -		return 0;
> +	spin_lock(&vm->status_lock);
> +	while (!list_empty(&vm->evicted)) {
> +		struct amdgpu_vm_bo_base *bo_base;
> +		struct amdgpu_bo *bo;
>   
> -	return amdgpu_vm_validate_level(&vm->root, validate, param,
> -					vm->use_cpu_for_update,
> -					adev->mman.bdev.glob);
> -}
> +		bo_base = list_first_entry(&vm->evicted,
> +					   struct amdgpu_vm_bo_base,
> +					   vm_status);
> +		spin_unlock(&vm->status_lock);
>   
> -/**
> - * amdgpu_vm_check - helper for amdgpu_vm_ready
> - */
> -static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
> -{
> -	/* if anything is swapped out don't swap it in here,
> -	   just abort and wait for the next CS */
> -	if (!amdgpu_bo_gpu_accessible(bo))
> -		return -ERESTARTSYS;
> +		bo = bo_base->bo;
> +		BUG_ON(!bo);
> +		if (bo->parent) {
> +			r = validate(param, bo);
> +			if (r)
> +				return r;
>   
> -	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
> -		return -ERESTARTSYS;
> +			spin_lock(&glob->lru_lock);
> +			ttm_bo_move_to_lru_tail(&bo->tbo);
> +			if (bo->shadow)
> +				ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
> +			spin_unlock(&glob->lru_lock);
> +		}
> +
> +		if (vm->use_cpu_for_update) {
> +			r = amdgpu_bo_kmap(bo, NULL);
> +			if (r)
> +				return r;
> +		}
> +
> +		spin_lock(&vm->status_lock);
> +		list_del_init(&bo_base->vm_status);
> +	}
> +	spin_unlock(&vm->status_lock);
>   
>   	return 0;
>   }
> @@ -250,17 +206,19 @@ static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
>   /**
>    * amdgpu_vm_ready - check VM is ready for updates
>    *
> - * @adev: amdgpu device
>    * @vm: VM to check
>    *
>    * Check if all VM PDs/PTs are ready for updates
>    */
> -bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +bool amdgpu_vm_ready(struct amdgpu_vm *vm)
>   {
> -	if (amdgpu_vm_check(NULL, vm->root.bo))
> -		return false;
> +	bool ready;
> +
> +	spin_lock(&vm->status_lock);
> +	ready = list_empty(&vm->evicted);
> +	spin_unlock(&vm->status_lock);
>   
> -	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
> +	return ready;
>   }
>   
>   /**
> @@ -325,11 +283,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   
>   	/* walk over the address space and allocate the page tables */
>   	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
> -		struct reservation_object *resv = vm->root.bo->tbo.resv;
> +		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
>   		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
>   		struct amdgpu_bo *pt;
>   
> -		if (!entry->bo) {
> +		if (!entry->base.bo) {
>   			r = amdgpu_bo_create(adev,
>   					     amdgpu_vm_bo_size(adev, level),
>   					     AMDGPU_GPU_PAGE_SIZE, true,
> @@ -350,9 +308,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   			/* Keep a reference to the root directory to avoid
>   			* freeing them up in the wrong order.
>   			*/
> -			pt->parent = amdgpu_bo_ref(vm->root.bo);
> +			pt->parent = amdgpu_bo_ref(vm->root.base.bo);
>   
> -			entry->bo = pt;
> +			entry->base.vm = vm;
> +			entry->base.bo = pt;
> +			list_add_tail(&entry->base.bo_list, &pt->va);
> +			INIT_LIST_HEAD(&entry->base.vm_status);
>   			entry->addr = 0;
>   		}
>   
> @@ -1019,7 +980,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	int r;
>   
>   	amdgpu_sync_create(&sync);
> -	amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner);
> +	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
>   	r = amdgpu_sync_wait(&sync, true);
>   	amdgpu_sync_free(&sync);
>   
> @@ -1058,10 +1019,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   	memset(&params, 0, sizeof(params));
>   	params.adev = adev;
> -	shadow = parent->bo->shadow;
> +	shadow = parent->base.bo->shadow;
>   
>   	if (vm->use_cpu_for_update) {
> -		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
> +		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
>   		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
>   		if (unlikely(r))
>   			return r;
> @@ -1077,7 +1038,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		/* assume the worst case */
>   		ndw += parent->last_entry_used * 6;
>   
> -		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
>   
>   		if (shadow) {
>   			shadow_addr = amdgpu_bo_gpu_offset(shadow);
> @@ -1097,7 +1058,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   	/* walk over the address space and update the directory */
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> -		struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
> +		struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo;
>   		uint64_t pde, pt;
>   
>   		if (bo == NULL)
> @@ -1140,7 +1101,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   	}
>   
>   	if (count) {
> -		if (vm->root.bo->shadow)
> +		if (vm->root.base.bo->shadow)
>   			params.func(&params, last_shadow, last_pt,
>   				    count, incr, AMDGPU_PTE_VALID);
>   
> @@ -1153,7 +1114,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   			amdgpu_job_free(job);
>   		} else {
>   			amdgpu_ring_pad_ib(ring, params.ib);
> -			amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
> +			amdgpu_sync_resv(adev, &job->sync,
> +					 parent->base.bo->tbo.resv,
>   					 AMDGPU_FENCE_OWNER_VM);
>   			if (shadow)
>   				amdgpu_sync_resv(adev, &job->sync,
> @@ -1166,7 +1128,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   			if (r)
>   				goto error_free;
>   
> -			amdgpu_bo_fence(parent->bo, fence, true);
> +			amdgpu_bo_fence(parent->base.bo, fence, true);
>   			dma_fence_put(vm->last_dir_update);
>   			vm->last_dir_update = dma_fence_get(fence);
>   			dma_fence_put(fence);
> @@ -1179,7 +1141,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
>   		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
>   
> -		if (!entry->bo)
> +		if (!entry->base.bo)
>   			continue;
>   
>   		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
> @@ -1212,7 +1174,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
>   		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
>   
> -		if (!entry->bo)
> +		if (!entry->base.bo)
>   			continue;
>   
>   		entry->addr = ~0ULL;
> @@ -1267,7 +1229,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
>   	*entry = &p->vm->root;
>   	while ((*entry)->entries) {
>   		idx = addr >> (p->adev->vm_manager.block_size * level--);
> -		idx %= amdgpu_bo_size((*entry)->bo) / 8;
> +		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
>   		*parent = *entry;
>   		*entry = &(*entry)->entries[idx];
>   	}
> @@ -1303,7 +1265,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
>   	    p->src ||
>   	    !(flags & AMDGPU_PTE_VALID)) {
>   
> -		dst = amdgpu_bo_gpu_offset(entry->bo);
> +		dst = amdgpu_bo_gpu_offset(entry->base.bo);
>   		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
>   		flags = AMDGPU_PTE_VALID;
>   	} else {
> @@ -1329,18 +1291,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
>   		tmp = p->pages_addr;
>   		p->pages_addr = NULL;
>   
> -		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
> +		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
>   		pde = pd_addr + (entry - parent->entries) * 8;
>   		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
>   
>   		p->pages_addr = tmp;
>   	} else {
> -		if (parent->bo->shadow) {
> -			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
> +		if (parent->base.bo->shadow) {
> +			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
>   			pde = pd_addr + (entry - parent->entries) * 8;
>   			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
>   		}
> -		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
>   		pde = pd_addr + (entry - parent->entries) * 8;
>   		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
>   	}
> @@ -1391,7 +1353,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   		if (entry->addr & AMDGPU_PDE_PTE)
>   			continue;
>   
> -		pt = entry->bo;
> +		pt = entry->base.bo;
>   		if (use_cpu_update) {
>   			pe_start = (unsigned long)amdgpu_bo_kptr(pt);
>   		} else {
> @@ -1611,12 +1573,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (r)
>   		goto error_free;
>   
> -	r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv,
> +	r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
>   			     owner);
>   	if (r)
>   		goto error_free;
>   
> -	r = reservation_object_reserve_shared(vm->root.bo->tbo.resv);
> +	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
>   	if (r)
>   		goto error_free;
>   
> @@ -1631,7 +1593,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (r)
>   		goto error_free;
>   
> -	amdgpu_bo_fence(vm->root.bo, f, true);
> +	amdgpu_bo_fence(vm->root.base.bo, f, true);
>   	dma_fence_put(*fence);
>   	*fence = f;
>   	return 0;
> @@ -1926,7 +1888,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
>    */
>   static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   {
> -	struct reservation_object *resv = vm->root.bo->tbo.resv;
> +	struct reservation_object *resv = vm->root.base.bo->tbo.resv;
>   	struct dma_fence *excl, **shared;
>   	unsigned i, shared_count;
>   	int r;
> @@ -2413,12 +2375,25 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
>    * Mark @bo as invalid.
>    */
>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
> -			     struct amdgpu_bo *bo)
> +			     struct amdgpu_bo *bo, bool evicted)
>   {
>   	struct amdgpu_vm_bo_base *bo_base;
>   
>   	list_for_each_entry(bo_base, &bo->va, bo_list) {
> +		struct amdgpu_vm *vm = bo_base->vm;
> +
>   		bo_base->moved = true;
> +		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
> +			spin_lock(&bo_base->vm->status_lock);
> +			list_move(&bo_base->vm_status, &vm->evicted);
> +			spin_unlock(&bo_base->vm->status_lock);
> +			continue;
> +		}
> +
> +		/* Don't add page tables to the moved state */
> +		if (bo->tbo.type == ttm_bo_type_kernel)
> +			continue;
> +
>   		spin_lock(&bo_base->vm->status_lock);
>   		list_move(&bo_base->vm_status, &bo_base->vm->moved);
>   		spin_unlock(&bo_base->vm->status_lock);
> @@ -2506,6 +2481,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
>   		vm->reserved_vmid[i] = NULL;
>   	spin_lock_init(&vm->status_lock);
> +	INIT_LIST_HEAD(&vm->evicted);
>   	INIT_LIST_HEAD(&vm->moved);
>   	INIT_LIST_HEAD(&vm->freed);
>   
> @@ -2550,30 +2526,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
>   			     AMDGPU_GEM_DOMAIN_VRAM,
>   			     flags,
> -			     NULL, NULL, init_pde_value, &vm->root.bo);
> +			     NULL, NULL, init_pde_value, &vm->root.base.bo);
>   	if (r)
>   		goto error_free_sched_entity;
>   
> -	r = amdgpu_bo_reserve(vm->root.bo, false);
> -	if (r)
> -		goto error_free_root;
> -
> -	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
> +	vm->root.base.vm = vm;
> +	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
> +	INIT_LIST_HEAD(&vm->root.base.vm_status);
>   
>   	if (vm->use_cpu_for_update) {
> -		r = amdgpu_bo_kmap(vm->root.bo, NULL);
> +		r = amdgpu_bo_reserve(vm->root.base.bo, false);
>   		if (r)
>   			goto error_free_root;
> -	}
>   
> -	amdgpu_bo_unreserve(vm->root.bo);
> +		r = amdgpu_bo_kmap(vm->root.base.bo, NULL);
> +		if (r)
> +			goto error_free_root;
> +		amdgpu_bo_unreserve(vm->root.base.bo);
> +	}
>   
>   	return 0;
>   
>   error_free_root:
> -	amdgpu_bo_unref(&vm->root.bo->shadow);
> -	amdgpu_bo_unref(&vm->root.bo);
> -	vm->root.bo = NULL;
> +	amdgpu_bo_unref(&vm->root.base.bo->shadow);
> +	amdgpu_bo_unref(&vm->root.base.bo);
> +	vm->root.base.bo = NULL;
>   
>   error_free_sched_entity:
>   	amd_sched_entity_fini(&ring->sched, &vm->entity);
> @@ -2592,9 +2569,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
>   {
>   	unsigned i;
>   
> -	if (level->bo) {
> -		amdgpu_bo_unref(&level->bo->shadow);
> -		amdgpu_bo_unref(&level->bo);
> +	if (level->base.bo) {
> +		list_del(&level->base.bo_list);
> +		list_del(&level->base.vm_status);
> +		amdgpu_bo_unref(&level->base.bo->shadow);
> +		amdgpu_bo_unref(&level->base.bo);
>   	}
>   
>   	if (level->entries)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index ff093d4..4e465e8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -111,12 +111,12 @@ struct amdgpu_vm_bo_base {
>   };
>   
>   struct amdgpu_vm_pt {
> -	struct amdgpu_bo	*bo;
> -	uint64_t		addr;
> +	struct amdgpu_vm_bo_base	base;
> +	uint64_t			addr;
>   
>   	/* array of page tables, one for each directory entry */
> -	struct amdgpu_vm_pt	*entries;
> -	unsigned		last_entry_used;
> +	struct amdgpu_vm_pt		*entries;
> +	unsigned			last_entry_used;
>   };
>   
>   struct amdgpu_vm {
> @@ -126,6 +126,9 @@ struct amdgpu_vm {
>   	/* protecting invalidated */
>   	spinlock_t		status_lock;
>   
> +	/* BOs who needs a validation */
> +	struct list_head	evicted;
> +
>   	/* BOs moved, but not yet updated in the PT */
>   	struct list_head	moved;
>   
> @@ -135,7 +138,6 @@ struct amdgpu_vm {
>   	/* contains the page directory */
>   	struct amdgpu_vm_pt     root;
>   	struct dma_fence	*last_dir_update;
> -	uint64_t		last_eviction_counter;
>   
>   	/* protecting freed */
>   	spinlock_t		freed_lock;
> @@ -225,7 +227,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>   void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>   			 struct list_head *validated,
>   			 struct amdgpu_bo_list_entry *entry);
> -bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> +bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>   int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   			      int (*callback)(void *p, struct amdgpu_bo *bo),
>   			      void *param);
> @@ -250,7 +252,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>   			struct amdgpu_bo_va *bo_va,
>   			bool clear);
>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
> -			     struct amdgpu_bo *bo);
> +			     struct amdgpu_bo *bo, bool evicted);
>   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>   				       struct amdgpu_bo *bo);
>   struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 7/9] drm/amdgpu: rework page directory filling v2
       [not found]     ` <1503653899-1781-7-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-28  7:06       ` zhoucm1
  0 siblings, 0 replies; 30+ messages in thread
From: zhoucm1 @ 2017-08-28  7:06 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年08月25日 17:38, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Keep track off relocated PDs/PTs instead of walking and checking all PDs.
>
> v2: better root PD handling
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
 From now, many lists existed in our vm code, 
valid/invalid/cleared/evicted/moved/relocated, better describe is 
desired. :0

Acked-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 87 ++++++++++++++++++++++------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 ++
>   2 files changed, 61 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 592c3e7..b02451f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -196,7 +196,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		}
>   
>   		spin_lock(&vm->status_lock);
> -		list_del_init(&bo_base->vm_status);
> +		list_move(&bo_base->vm_status, &vm->relocated);
>   	}
>   	spin_unlock(&vm->status_lock);
>   
> @@ -313,8 +313,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   			entry->base.vm = vm;
>   			entry->base.bo = pt;
>   			list_add_tail(&entry->base.bo_list, &pt->va);
> -			INIT_LIST_HEAD(&entry->base.vm_status);
> -			entry->addr = 0;
> +			spin_lock(&vm->status_lock);
> +			list_add(&entry->base.vm_status, &vm->relocated);
> +			spin_unlock(&vm->status_lock);
> +			entry->addr = ~0ULL;
>   		}
>   
>   		if (level < adev->vm_manager.num_level) {
> @@ -999,18 +1001,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>    */
>   static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   				  struct amdgpu_vm *vm,
> -				  struct amdgpu_vm_pt *parent,
> -				  unsigned level)
> +				  struct amdgpu_vm_pt *parent)
>   {
>   	struct amdgpu_bo *shadow;
>   	struct amdgpu_ring *ring = NULL;
>   	uint64_t pd_addr, shadow_addr = 0;
> -	uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
>   	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
>   	unsigned count = 0, pt_idx, ndw = 0;
>   	struct amdgpu_job *job;
>   	struct amdgpu_pte_update_params params;
>   	struct dma_fence *fence = NULL;
> +	uint32_t incr;
>   
>   	int r;
>   
> @@ -1058,12 +1059,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   	/* walk over the address space and update the directory */
>   	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> -		struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo;
> +		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
> +		struct amdgpu_bo *bo = entry->base.bo;
>   		uint64_t pde, pt;
>   
>   		if (bo == NULL)
>   			continue;
>   
> +		spin_lock(&vm->status_lock);
> +		list_del_init(&entry->base.vm_status);
> +		spin_unlock(&vm->status_lock);
> +
>   		pt = amdgpu_bo_gpu_offset(bo);
>   		pt = amdgpu_gart_get_vm_pde(adev, pt);
>   		/* Don't update huge pages here */
> @@ -1074,6 +1080,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
>   
>   		pde = pd_addr + pt_idx * 8;
> +		incr = amdgpu_bo_size(bo);
>   		if (((last_pde + 8 * count) != pde) ||
>   		    ((last_pt + incr * count) != pt) ||
>   		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
> @@ -1134,20 +1141,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   			dma_fence_put(fence);
>   		}
>   	}
> -	/*
> -	 * Recurse into the subdirectories. This recursion is harmless because
> -	 * we only have a maximum of 5 layers.
> -	 */
> -	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> -		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
> -
> -		if (!entry->base.bo)
> -			continue;
> -
> -		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
> -		if (r)
> -			return r;
> -	}
>   
>   	return 0;
>   
> @@ -1163,7 +1156,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>    *
>    * Mark all PD level as invalid after an error.
>    */
> -static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
> +static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
> +				       struct amdgpu_vm_pt *parent)
>   {
>   	unsigned pt_idx;
>   
> @@ -1178,7 +1172,10 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
>   			continue;
>   
>   		entry->addr = ~0ULL;
> -		amdgpu_vm_invalidate_level(entry);
> +		spin_lock(&vm->status_lock);
> +		list_move(&entry->base.vm_status, &vm->relocated);
> +		spin_unlock(&vm->status_lock);
> +		amdgpu_vm_invalidate_level(vm, entry);
>   	}
>   }
>   
> @@ -1196,9 +1193,36 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>   {
>   	int r;
>   
> -	r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
> -	if (r)
> -		amdgpu_vm_invalidate_level(&vm->root);
> +	spin_lock(&vm->status_lock);
> +	while (!list_empty(&vm->relocated)) {
> +		struct amdgpu_vm_bo_base *bo_base;
> +		struct amdgpu_bo *bo;
> +
> +		bo_base = list_first_entry(&vm->relocated,
> +					   struct amdgpu_vm_bo_base,
> +					   vm_status);
> +		spin_unlock(&vm->status_lock);
> +
> +		bo = bo_base->bo->parent;
> +		if (bo) {
> +			struct amdgpu_vm_bo_base *parent;
> +			struct amdgpu_vm_pt *pt;
> +
> +			parent = list_first_entry(&bo->va,
> +						  struct amdgpu_vm_bo_base,
> +						  bo_list);
> +			pt = container_of(parent, struct amdgpu_vm_pt, base);
> +
> +			r = amdgpu_vm_update_level(adev, vm, pt);
> +			if (r) {
> +				amdgpu_vm_invalidate_level(vm, &vm->root);
> +				break;
> +			}
> +		}
> +
> +		spin_lock(&vm->status_lock);
> +	}
> +	spin_unlock(&vm->status_lock);
>   
>   	if (vm->use_cpu_for_update) {
>   		/* Flush HDP */
> @@ -1600,7 +1624,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   
>   error_free:
>   	amdgpu_job_free(job);
> -	amdgpu_vm_invalidate_level(&vm->root);
> +	amdgpu_vm_invalidate_level(vm, &vm->root);
>   	return r;
>   }
>   
> @@ -2390,9 +2414,13 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>   			continue;
>   		}
>   
> -		/* Don't add page tables to the moved state */
> -		if (bo->tbo.type == ttm_bo_type_kernel)
> +		if (bo->tbo.type == ttm_bo_type_kernel) {
> +			spin_lock(&bo_base->vm->status_lock);
> +			if (list_empty(&bo_base->vm_status))
> +				list_add(&bo_base->vm_status, &vm->relocated);
> +			spin_unlock(&bo_base->vm->status_lock);
>   			continue;
> +		}
>   
>   		spin_lock(&bo_base->vm->status_lock);
>   		list_move(&bo_base->vm_status, &bo_base->vm->moved);
> @@ -2482,6 +2510,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		vm->reserved_vmid[i] = NULL;
>   	spin_lock_init(&vm->status_lock);
>   	INIT_LIST_HEAD(&vm->evicted);
> +	INIT_LIST_HEAD(&vm->relocated);
>   	INIT_LIST_HEAD(&vm->moved);
>   	INIT_LIST_HEAD(&vm->freed);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 4e465e8..c3753af 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -129,6 +129,9 @@ struct amdgpu_vm {
>   	/* BOs who needs a validation */
>   	struct list_head	evicted;
>   
> +	/* PT BOs which relocated and their parent need an update */
> +	struct list_head	relocated;
> +
>   	/* BOs moved, but not yet updated in the PT */
>   	struct list_head	moved;
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]                             ` <a897738d-ab23-8b43-9b9e-c64f7da5e065-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-28 11:55                               ` Christian König
       [not found]                                 ` <0006623b-f042-dda0-b6a2-425dc568ff03-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-28 11:55 UTC (permalink / raw)
  To: zhoucm1, Marek Olšák
  Cc: Olsak, Marek, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 28.08.2017 um 06:21 schrieb zhoucm1:
>
>
> On 2017年08月27日 18:03, Christian König wrote:
>> Am 25.08.2017 um 21:19 schrieb Christian König:
>>> Am 25.08.2017 um 18:22 schrieb Marek Olšák:
>>>> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>>>>
>>>>>>
>>>>>> On 2017年08月25日 17:38, Christian König wrote:
>>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>>
>>>>>>> Add the IOCTL interface so that applications can allocate per VM 
>>>>>>> BOs.
>>>>>>>
>>>>>>> Still WIP since not all corner cases are tested yet, but this 
>>>>>>> reduces
>>>>>>> average
>>>>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>>>>> Wow, cheers, eventually you get per vm bo to same reservation 
>>>>>> with PD/pts,
>>>>>> indeed save a lot of bo list.
>>>>>
>>>>> Don't cheer to loud yet, that is a completely constructed test case.
>>>>>
>>>>> So far I wasn't able to archive any improvements with any real 
>>>>> game on this
>>>>> with Mesa.
> With thinking more, too many BOs share one reservation, which could 
> result in reservation lock often is busy, if eviction or destroy also 
> happens often in the meaning time, then which could effect VM update 
> and CS submission as well.

That's exactly the reason why I've added code to the BO destroy path to 
avoid at least some of the problems. But yeah, that's only the tip of 
the iceberg of problems with that approach.

> Anyway, this is very good start and try that we reduce CS overhead, 
> especially we've seen "reduces average CS overhead for 10K BOs from 
> 21ms down to 48us. ".

Actually, it's not that good. See this is a completely build up test 
case on a kernel with lockdep and KASAN enabled.

In reality we usually don't have so many BOs and so far I wasn't able to 
find much of an improvement in any real world testing.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4/9] drm/amdgpu: add bo_va cleared flag again
       [not found]         ` <d076aae6-5620-dd91-f504-982abb2292dd-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-28 11:57           ` Christian König
  0 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2017-08-28 11:57 UTC (permalink / raw)
  To: zhoucm1, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Actually that lockless access is removed again in the next patch.

I will reorder the patches, so that it never even occur.

Christian.

Am 28.08.2017 um 06:12 schrieb zhoucm1:
> I'm not sure this one, since you remove status lock for vm_status 
> list, I think we need to test this carefully.
>
> Regards,
>
> David Zhou
>
>
> On 2017年08月25日 17:38, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> We changed this to use an extra list a while back, but for the next
>> series I need a separate flag again.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  3 +++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 37 
>> ++++++++++++++----------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  3 ---
>>   3 files changed, 20 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index a288fa6..e613ba4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -55,6 +55,9 @@ struct amdgpu_bo_va {
>>       /* mappings for this bo_va */
>>       struct list_head        invalids;
>>       struct list_head        valids;
>> +
>> +    /* If the mappings are cleared or filled */
>> +    bool                cleared;
>>   };
>>     struct amdgpu_bo {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index f621dba..16148ef 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1787,10 +1787,13 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>> *adev,
>>       else
>>           flags = 0x0;
>>   -    spin_lock(&vm->status_lock);
>> -    if (!list_empty(&bo_va->base.vm_status))
>> +    /* We access vm_status without the status lock here, but that is ok
>> +     * because when we don't clear the BO is locked and so the 
>> status can't
>> +     * change
>> +     */
>> +    if ((!clear && !list_empty(&bo_va->base.vm_status)) ||
>> +        bo_va->cleared != clear)
>>           list_splice_init(&bo_va->valids, &bo_va->invalids);
>> -    spin_unlock(&vm->status_lock);
>>         list_for_each_entry(mapping, &bo_va->invalids, list) {
>>           r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, 
>> vm,
>> @@ -1800,25 +1803,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>> *adev,
>>               return r;
>>       }
>>   -    if (trace_amdgpu_vm_bo_mapping_enabled()) {
>> -        list_for_each_entry(mapping, &bo_va->valids, list)
>> -            trace_amdgpu_vm_bo_mapping(mapping);
>> -
>> -        list_for_each_entry(mapping, &bo_va->invalids, list)
>> -            trace_amdgpu_vm_bo_mapping(mapping);
>> +    if (vm->use_cpu_for_update) {
>> +        /* Flush HDP */
>> +        mb();
>> +        amdgpu_gart_flush_gpu_tlb(adev, 0);
>>       }
>>         spin_lock(&vm->status_lock);
>> -    list_splice_init(&bo_va->invalids, &bo_va->valids);
>>       list_del_init(&bo_va->base.vm_status);
>> -    if (clear)
>> -        list_add(&bo_va->base.vm_status, &vm->cleared);
>>       spin_unlock(&vm->status_lock);
>>   -    if (vm->use_cpu_for_update) {
>> -        /* Flush HDP */
>> -        mb();
>> -        amdgpu_gart_flush_gpu_tlb(adev, 0);
>> +    list_splice_init(&bo_va->invalids, &bo_va->valids);
>> +    bo_va->cleared = clear;
>> +
>> +    if (trace_amdgpu_vm_bo_mapping_enabled()) {
>> +        list_for_each_entry(mapping, &bo_va->valids, list)
>> +            trace_amdgpu_vm_bo_mapping(mapping);
>>       }
>>         return 0;
>> @@ -2419,9 +2419,7 @@ void amdgpu_vm_bo_invalidate(struct 
>> amdgpu_device *adev,
>>         list_for_each_entry(bo_base, &bo->va, bo_list) {
>>           spin_lock(&bo_base->vm->status_lock);
>> -        if (list_empty(&bo_base->vm_status))
>> -            list_add(&bo_base->vm_status,
>> -                 &bo_base->vm->moved);
>> +        list_move(&bo_base->vm_status, &bo_base->vm->moved);
>>           spin_unlock(&bo_base->vm->status_lock);
>>       }
>>   }
>> @@ -2508,7 +2506,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm,
>>           vm->reserved_vmid[i] = NULL;
>>       spin_lock_init(&vm->status_lock);
>>       INIT_LIST_HEAD(&vm->moved);
>> -    INIT_LIST_HEAD(&vm->cleared);
>>       INIT_LIST_HEAD(&vm->freed);
>>         /* create scheduler entity for page table updates */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 9347d28..e705f0f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -126,9 +126,6 @@ struct amdgpu_vm {
>>       /* BOs moved, but not yet updated in the PT */
>>       struct list_head    moved;
>>   -    /* BOs cleared in the PT because of a move */
>> -    struct list_head    cleared;
>> -
>>       /* BO mappings freed, but not yet updated in the PT */
>>       struct list_head    freed;
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]                                 ` <0006623b-f042-dda0-b6a2-425dc568ff03-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-28 12:59                                   ` Zhou, David(ChunMing)
       [not found]                                     ` <MWHPR1201MB0206D4E64E86D7AADA159A47B49E0-3iK1xFAIwjrUF/YbdlDdgWrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Zhou, David(ChunMing) @ 2017-08-28 12:59 UTC (permalink / raw)
  To: Christian K鰊ig
  Cc: Zhou, David(ChunMing), Olsak, Marek, Marek Ol?醟, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 2142 bytes --]

I will push our vulkan guys to test it, their bo list is very long.


发自坚果 Pro

Christian K鰊ig <deathsimple@vodafone.de> 于 2017年8月28日 下午7:55写道:

Am 28.08.2017 um 06:21 schrieb zhoucm1:
>
>
> On 2017年08月27日 18:03, Christian König wrote:
>> Am 25.08.2017 um 21:19 schrieb Christian König:
>>> Am 25.08.2017 um 18:22 schrieb Marek Olšák:
>>>> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>>>>
>>>>>>
>>>>>> On 2017年08月25日 17:38, Christian König wrote:
>>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>>
>>>>>>> Add the IOCTL interface so that applications can allocate per VM
>>>>>>> BOs.
>>>>>>>
>>>>>>> Still WIP since not all corner cases are tested yet, but this
>>>>>>> reduces
>>>>>>> average
>>>>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>>>>> Wow, cheers, eventually you get per vm bo to same reservation
>>>>>> with PD/pts,
>>>>>> indeed save a lot of bo list.
>>>>>
>>>>> Don't cheer to loud yet, that is a completely constructed test case.
>>>>>
>>>>> So far I wasn't able to archive any improvements with any real
>>>>> game on this
>>>>> with Mesa.
> With thinking more, too many BOs share one reservation, which could
> result in reservation lock often is busy, if eviction or destroy also
> happens often in the meaning time, then which could effect VM update
> and CS submission as well.

That's exactly the reason why I've added code to the BO destroy path to
avoid at least some of the problems. But yeah, that's only the tip of
the iceberg of problems with that approach.

> Anyway, this is very good start and try that we reduce CS overhead,
> especially we've seen "reduces average CS overhead for 10K BOs from
> 21ms down to 48us. ".

Actually, it's not that good. See this is a completely build up test
case on a kernel with lockdep and KASAN enabled.

In reality we usually don't have so many BOs and so far I wasn't able to
find much of an improvement in any real world testing.

Regards,
Christian.

[-- Attachment #1.2: Type: text/html, Size: 4124 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]                                     ` <MWHPR1201MB0206D4E64E86D7AADA159A47B49E0-3iK1xFAIwjrUF/YbdlDdgWrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2017-08-29 13:59                                       ` Christian König
       [not found]                                         ` <8e7b93cf-033b-ac3a-4c81-446db00186f5-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Christian König @ 2017-08-29 13:59 UTC (permalink / raw)
  To: Zhou, David(ChunMing); +Cc: Olsak, Marek, amd-gfx, Marek Ol?醟


[-- Attachment #1.1: Type: text/plain, Size: 3663 bytes --]

Ok, found something that works. Xonotic in lowest resolution, lowest 
effects quality (e.g. totally CPU bound):

Without per process BOs:

Xonotic 0.8:
     pts/xonotic-1.4.0 [Resolution: 800 x 600 - Effects Quality: Low]
     Test 1 of 1
     Estimated Trial Run Count:    3
     Estimated Time To Completion: 3 Minutes
         Started Run 1 @ 21:13:50
         Started Run 2 @ 21:14:57
         Started Run 3 @ 21:16:03  [Std. Dev: 0.94%]

     Test Results:
         187.436577
         189.514724
         190.9605812

     Average: 189.30 Frames Per Second
     Minimum: 131
     Maximum: 355

With per process BOs:

Xonotic 0.8:
     pts/xonotic-1.4.0 [Resolution: 800 x 600 - Effects Quality: Low]
     Test 1 of 1
     Estimated Trial Run Count:    3
     Estimated Time To Completion: 3 Minutes
         Started Run 1 @ 21:20:05
         Started Run 2 @ 21:21:07
         Started Run 3 @ 21:22:10  [Std. Dev: 1.49%]

     Test Results:
         203.0471676
         199.6622532
         197.0954183

     Average: 199.93 Frames Per Second
     Minimum: 132
     Maximum: 349

Well that looks like some improvement.

Regards,
Christian.

Am 28.08.2017 um 14:59 schrieb Zhou, David(ChunMing):
> I will push our vulkan guys to test it, their bo list is very long.
>
> 发自坚果 Pro
>
> Christian K鰊ig <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 于 2017年8月28日 下午7:55写道:
>
> Am 28.08.2017 um 06:21 schrieb zhoucm1:
> >
> >
> > On 2017年08月27日 18:03, Christian König wrote:
> >> Am 25.08.2017 um 21:19 schrieb Christian König:
> >>> Am 25.08.2017 um 18:22 schrieb Marek Olšák:
> >>>> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
> >>>> <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> wrote:
> >>>>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
> >>>>>>
> >>>>>>
> >>>>>> On 2017年08月25日 17:38, Christian König wrote:
> >>>>>>> From: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
> >>>>>>>
> >>>>>>> Add the IOCTL interface so that applications can allocate per VM
> >>>>>>> BOs.
> >>>>>>>
> >>>>>>> Still WIP since not all corner cases are tested yet, but this
> >>>>>>> reduces
> >>>>>>> average
> >>>>>>> CS overhead for 10K BOs from 21ms down to 48us.
> >>>>>> Wow, cheers, eventually you get per vm bo to same reservation
> >>>>>> with PD/pts,
> >>>>>> indeed save a lot of bo list.
> >>>>>
> >>>>> Don't cheer to loud yet, that is a completely constructed test case.
> >>>>>
> >>>>> So far I wasn't able to archive any improvements with any real
> >>>>> game on this
> >>>>> with Mesa.
> > With thinking more, too many BOs share one reservation, which could
> > result in reservation lock often is busy, if eviction or destroy also
> > happens often in the meaning time, then which could effect VM update
> > and CS submission as well.
>
> That's exactly the reason why I've added code to the BO destroy path to
> avoid at least some of the problems. But yeah, that's only the tip of
> the iceberg of problems with that approach.
>
> > Anyway, this is very good start and try that we reduce CS overhead,
> > especially we've seen "reduces average CS overhead for 10K BOs from
> > 21ms down to 48us. ".
>
> Actually, it's not that good. See this is a completely build up test
> case on a kernel with lockdep and KASAN enabled.
>
> In reality we usually don't have so many BOs and so far I wasn't able to
> find much of an improvement in any real world testing.
>
> Regards,
> Christian.
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[-- Attachment #1.2: Type: text/html, Size: 8014 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]                                         ` <8e7b93cf-033b-ac3a-4c81-446db00186f5-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-30  0:27                                           ` Marek Olšák
       [not found]                                             ` <CAAxE2A7bS9e34U-t==udoPMn-YYuP3auY4Ca+dpaboL+ob5J+g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 30+ messages in thread
From: Marek Olšák @ 2017-08-30  0:27 UTC (permalink / raw)
  To: Christian König; +Cc: Zhou, David(ChunMing), Olsak, Marek, amd-gfx

It might be interesting to try glmark2.

Marek

On Tue, Aug 29, 2017 at 3:59 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Ok, found something that works. Xonotic in lowest resolution, lowest effects
> quality (e.g. totally CPU bound):
>
> Without per process BOs:
>
> Xonotic 0.8:
>     pts/xonotic-1.4.0 [Resolution: 800 x 600 - Effects Quality: Low]
>     Test 1 of 1
>     Estimated Trial Run Count:    3
>     Estimated Time To Completion: 3 Minutes
>         Started Run 1 @ 21:13:50
>         Started Run 2 @ 21:14:57
>         Started Run 3 @ 21:16:03  [Std. Dev: 0.94%]
>
>     Test Results:
>         187.436577
>         189.514724
>         190.9605812
>
>     Average: 189.30 Frames Per Second
>     Minimum: 131
>     Maximum: 355
>
> With per process BOs:
>
> Xonotic 0.8:
>     pts/xonotic-1.4.0 [Resolution: 800 x 600 - Effects Quality: Low]
>     Test 1 of 1
>     Estimated Trial Run Count:    3
>     Estimated Time To Completion: 3 Minutes
>         Started Run 1 @ 21:20:05
>         Started Run 2 @ 21:21:07
>         Started Run 3 @ 21:22:10  [Std. Dev: 1.49%]
>
>     Test Results:
>         203.0471676
>         199.6622532
>         197.0954183
>
>     Average: 199.93 Frames Per Second
>     Minimum: 132
>     Maximum: 349
>
> Well that looks like some improvement.
>
> Regards,
> Christian.
>
>
> Am 28.08.2017 um 14:59 schrieb Zhou, David(ChunMing):
>
> I will push our vulkan guys to test it, their bo list is very long.
>
> 发自坚果 Pro
>
> Christian K鰊ig <deathsimple@vodafone.de> 于 2017年8月28日 下午7:55写道:
>
> Am 28.08.2017 um 06:21 schrieb zhoucm1:
>>
>>
>> On 2017年08月27日 18:03, Christian König wrote:
>>> Am 25.08.2017 um 21:19 schrieb Christian König:
>>>> Am 25.08.2017 um 18:22 schrieb Marek Olšák:
>>>>> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
>>>>> <deathsimple@vodafone.de> wrote:
>>>>>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>>>>>
>>>>>>>
>>>>>>> On 2017年08月25日 17:38, Christian König wrote:
>>>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>>>
>>>>>>>> Add the IOCTL interface so that applications can allocate per VM
>>>>>>>> BOs.
>>>>>>>>
>>>>>>>> Still WIP since not all corner cases are tested yet, but this
>>>>>>>> reduces
>>>>>>>> average
>>>>>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>>>>>> Wow, cheers, eventually you get per vm bo to same reservation
>>>>>>> with PD/pts,
>>>>>>> indeed save a lot of bo list.
>>>>>>
>>>>>> Don't cheer to loud yet, that is a completely constructed test case.
>>>>>>
>>>>>> So far I wasn't able to archive any improvements with any real
>>>>>> game on this
>>>>>> with Mesa.
>> With thinking more, too many BOs share one reservation, which could
>> result in reservation lock often is busy, if eviction or destroy also
>> happens often in the meaning time, then which could effect VM update
>> and CS submission as well.
>
> That's exactly the reason why I've added code to the BO destroy path to
> avoid at least some of the problems. But yeah, that's only the tip of
> the iceberg of problems with that approach.
>
>> Anyway, this is very good start and try that we reduce CS overhead,
>> especially we've seen "reduces average CS overhead for 10K BOs from
>> 21ms down to 48us. ".
>
> Actually, it's not that good. See this is a completely build up test
> case on a kernel with lockdep and KASAN enabled.
>
> In reality we usually don't have so many BOs and so far I wasn't able to
> find much of an improvement in any real world testing.
>
> Regards,
> Christian.
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs
       [not found]                                             ` <CAAxE2A7bS9e34U-t==udoPMn-YYuP3auY4Ca+dpaboL+ob5J+g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-08-30 14:58                                               ` Christian König
  0 siblings, 0 replies; 30+ messages in thread
From: Christian König @ 2017-08-30 14:58 UTC (permalink / raw)
  To: Marek Olšák; +Cc: Zhou, David(ChunMing), Olsak, Marek, amd-gfx

That was a good hint. glmark2 sees a really nice 5% improvement with 
this change.

Christian.

Am 30.08.2017 um 02:27 schrieb Marek Olšák:
> It might be interesting to try glmark2.
>
> Marek
>
> On Tue, Aug 29, 2017 at 3:59 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Ok, found something that works. Xonotic in lowest resolution, lowest effects
>> quality (e.g. totally CPU bound):
>>
>> Without per process BOs:
>>
>> Xonotic 0.8:
>>      pts/xonotic-1.4.0 [Resolution: 800 x 600 - Effects Quality: Low]
>>      Test 1 of 1
>>      Estimated Trial Run Count:    3
>>      Estimated Time To Completion: 3 Minutes
>>          Started Run 1 @ 21:13:50
>>          Started Run 2 @ 21:14:57
>>          Started Run 3 @ 21:16:03  [Std. Dev: 0.94%]
>>
>>      Test Results:
>>          187.436577
>>          189.514724
>>          190.9605812
>>
>>      Average: 189.30 Frames Per Second
>>      Minimum: 131
>>      Maximum: 355
>>
>> With per process BOs:
>>
>> Xonotic 0.8:
>>      pts/xonotic-1.4.0 [Resolution: 800 x 600 - Effects Quality: Low]
>>      Test 1 of 1
>>      Estimated Trial Run Count:    3
>>      Estimated Time To Completion: 3 Minutes
>>          Started Run 1 @ 21:20:05
>>          Started Run 2 @ 21:21:07
>>          Started Run 3 @ 21:22:10  [Std. Dev: 1.49%]
>>
>>      Test Results:
>>          203.0471676
>>          199.6622532
>>          197.0954183
>>
>>      Average: 199.93 Frames Per Second
>>      Minimum: 132
>>      Maximum: 349
>>
>> Well that looks like some improvement.
>>
>> Regards,
>> Christian.
>>
>>
>> Am 28.08.2017 um 14:59 schrieb Zhou, David(ChunMing):
>>
>> I will push our vulkan guys to test it, their bo list is very long.
>>
>> 发自坚果 Pro
>>
>> Christian K鰊ig <deathsimple@vodafone.de> 于 2017年8月28日 下午7:55写道:
>>
>> Am 28.08.2017 um 06:21 schrieb zhoucm1:
>>>
>>> On 2017年08月27日 18:03, Christian König wrote:
>>>> Am 25.08.2017 um 21:19 schrieb Christian König:
>>>>> Am 25.08.2017 um 18:22 schrieb Marek Olšák:
>>>>>> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>>>>>>
>>>>>>>> On 2017年08月25日 17:38, Christian König wrote:
>>>>>>>>> From: Christian König <christian.koenig@amd.com>
>>>>>>>>>
>>>>>>>>> Add the IOCTL interface so that applications can allocate per VM
>>>>>>>>> BOs.
>>>>>>>>>
>>>>>>>>> Still WIP since not all corner cases are tested yet, but this
>>>>>>>>> reduces
>>>>>>>>> average
>>>>>>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>>>>>>> Wow, cheers, eventually you get per vm bo to same reservation
>>>>>>>> with PD/pts,
>>>>>>>> indeed save a lot of bo list.
>>>>>>> Don't cheer to loud yet, that is a completely constructed test case.
>>>>>>>
>>>>>>> So far I wasn't able to archive any improvements with any real
>>>>>>> game on this
>>>>>>> with Mesa.
>>> With thinking more, too many BOs share one reservation, which could
>>> result in reservation lock often is busy, if eviction or destroy also
>>> happens often in the meaning time, then which could effect VM update
>>> and CS submission as well.
>> That's exactly the reason why I've added code to the BO destroy path to
>> avoid at least some of the problems. But yeah, that's only the tip of
>> the iceberg of problems with that approach.
>>
>>> Anyway, this is very good start and try that we reduce CS overhead,
>>> especially we've seen "reduces average CS overhead for 10K BOs from
>>> 21ms down to 48us. ".
>> Actually, it's not that good. See this is a completely build up test
>> case on a kernel with lockdep and KASAN enabled.
>>
>> In reality we usually don't have so many BOs and so far I wasn't able to
>> find much of an improvement in any real world testing.
>>
>> Regards,
>> Christian.
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2017-08-30 14:58 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-25  9:38 [PATCH 1/9] drm/amdgpu: fix amdgpu_vm_bo_map trace point Christian König
     [not found] ` <1503653899-1781-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-25  9:38   ` [PATCH 2/9] drm/amdgpu: fix and cleanup VM ready check Christian König
     [not found]     ` <1503653899-1781-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  4:09       ` zhoucm1
2017-08-25  9:38   ` [PATCH 3/9] drm/amdgpu: cleanup GWS, GDS and OA allocation Christian König
     [not found]     ` <1503653899-1781-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  4:10       ` zhoucm1
2017-08-25  9:38   ` [PATCH 4/9] drm/amdgpu: add bo_va cleared flag again Christian König
     [not found]     ` <1503653899-1781-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  4:12       ` zhoucm1
     [not found]         ` <d076aae6-5620-dd91-f504-982abb2292dd-5C7GfCeVMHo@public.gmane.org>
2017-08-28 11:57           ` Christian König
2017-08-25  9:38   ` [PATCH 5/9] drm/amdgpu: rework moved handling in the VM Christian König
     [not found]     ` <1503653899-1781-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  6:51       ` zhoucm1
2017-08-25  9:38   ` [PATCH 6/9] drm/amdgpu: track evicted page tables v2 Christian König
     [not found]     ` <1503653899-1781-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  6:58       ` zhoucm1
2017-08-25  9:38   ` [PATCH 7/9] drm/amdgpu: rework page directory filling v2 Christian König
     [not found]     ` <1503653899-1781-7-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  7:06       ` zhoucm1
2017-08-25  9:38   ` [PATCH 8/9] drm/amdgpu: add support for per VM BOs Christian König
2017-08-25  9:38   ` [PATCH 9/9] drm/amdgpu: WIP add IOCTL interface " Christian König
     [not found]     ` <1503653899-1781-9-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-25 10:32       ` zhoucm1
     [not found]         ` <19c04fac-1fdd-1436-e85c-95dd4ac02b1b-5C7GfCeVMHo@public.gmane.org>
2017-08-25 13:00           ` Christian König
     [not found]             ` <9304342a-def2-187e-4e9c-d872c58cdc17-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-25 16:22               ` Marek Olšák
     [not found]                 ` <CAAxE2A5dSR-PY+zZ3VeaT7iiCmj5jfty0hv7XZjz4HgOrApQHw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-25 19:19                   ` Christian König
     [not found]                     ` <e7d5f9d9-ed3e-2654-9acd-c7339976006f-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-27 10:03                       ` Christian König
     [not found]                         ` <fba30bfa-aa7c-d342-b4b6-85058f5db5bf-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28  4:21                           ` zhoucm1
     [not found]                             ` <a897738d-ab23-8b43-9b9e-c64f7da5e065-5C7GfCeVMHo@public.gmane.org>
2017-08-28 11:55                               ` Christian König
     [not found]                                 ` <0006623b-f042-dda0-b6a2-425dc568ff03-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-28 12:59                                   ` Zhou, David(ChunMing)
     [not found]                                     ` <MWHPR1201MB0206D4E64E86D7AADA159A47B49E0-3iK1xFAIwjrUF/YbdlDdgWrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-08-29 13:59                                       ` Christian König
     [not found]                                         ` <8e7b93cf-033b-ac3a-4c81-446db00186f5-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-30  0:27                                           ` Marek Olšák
     [not found]                                             ` <CAAxE2A7bS9e34U-t==udoPMn-YYuP3auY4Ca+dpaboL+ob5J+g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-08-30 14:58                                               ` Christian König
2017-08-25 21:31       ` Felix Kuehling
     [not found]         ` <3039e134-7ee0-792a-b8ad-f01f86bc1164-5C7GfCeVMHo@public.gmane.org>
2017-08-26 13:20           ` Christian König
2017-08-28  4:08   ` [PATCH 1/9] drm/amdgpu: fix amdgpu_vm_bo_map trace point zhoucm1

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.