All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/8] drm/amdgpu: stop joining PDEs
@ 2017-12-08 16:41 Christian König
       [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 21+ messages in thread
From: Christian König @ 2017-12-08 16:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

That doesn't hit any more most of the time anyway.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 41 ++++++----------------------------
 1 file changed, 7 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3ecdbdfb04dd..d15b6edf7cce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1076,8 +1076,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	struct amdgpu_bo *shadow;
 	struct amdgpu_ring *ring = NULL;
 	uint64_t pd_addr, shadow_addr = 0;
-	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
-	unsigned count = 0, pt_idx, ndw = 0;
+	unsigned pt_idx, ndw = 0;
 	struct amdgpu_job *job;
 	struct amdgpu_pte_update_params params;
 	struct dma_fence *fence = NULL;
@@ -1149,41 +1148,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
 
-		pde = pd_addr + pt_idx * 8;
 		incr = amdgpu_bo_size(bo);
-		if (((last_pde + 8 * count) != pde) ||
-		    ((last_pt + incr * count) != pt) ||
-		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
-
-			if (count) {
-				if (shadow)
-					params.func(&params,
-						    last_shadow,
-						    last_pt, count,
-						    incr,
-						    AMDGPU_PTE_VALID);
-
-				params.func(&params, last_pde,
-					    last_pt, count, incr,
-					    AMDGPU_PTE_VALID);
-			}
-
-			count = 1;
-			last_pde = pde;
-			last_shadow = shadow_addr + pt_idx * 8;
-			last_pt = pt;
-		} else {
-			++count;
+		if (shadow) {
+			pde = shadow_addr + pt_idx * 8;
+			params.func(&params, pde, pt, 1, incr,
+				    AMDGPU_PTE_VALID);
 		}
-	}
 
-	if (count) {
-		if (vm->root.base.bo->shadow)
-			params.func(&params, last_shadow, last_pt,
-				    count, incr, AMDGPU_PTE_VALID);
-
-		params.func(&params, last_pde, last_pt,
-			    count, incr, AMDGPU_PTE_VALID);
+		pde = pd_addr + pt_idx * 8;
+		params.func(&params, pde, pt, 1, incr, AMDGPU_PTE_VALID);
 	}
 
 	if (!vm->use_cpu_for_update) {
-- 
2.11.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/8] drm/amdgpu: update one PDE at a time
       [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-08 16:41   ` Christian König
       [not found]     ` <20171208164107.1567-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2017-12-08 16:41   ` [PATCH 3/8] drm/amdgpu: avoid the modulo in amdgpu_vm_get_entry Christian König
                     ` (6 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Christian König @ 2017-12-08 16:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Horrible inefficient, but avoids problems when the root PD size becomes
to big.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 81 +++++++++++++++-------------------
 1 file changed, 36 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d15b6edf7cce..796375484f6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1069,17 +1069,20 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  * Makes sure all entries in @parent are up to date.
  * Returns 0 for success, error for failure.
  */
-static int amdgpu_vm_update_level(struct amdgpu_device *adev,
-				  struct amdgpu_vm *vm,
-				  struct amdgpu_vm_pt *parent)
+static int amdgpu_vm_update_pde(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				struct amdgpu_vm_pt *parent,
+				struct amdgpu_vm_pt *entry)
 {
+	struct amdgpu_pte_update_params params;
+	struct amdgpu_bo *bo = entry->base.bo;
 	struct amdgpu_bo *shadow;
 	struct amdgpu_ring *ring = NULL;
 	uint64_t pd_addr, shadow_addr = 0;
-	unsigned pt_idx, ndw = 0;
 	struct amdgpu_job *job;
-	struct amdgpu_pte_update_params params;
 	struct dma_fence *fence = NULL;
+	unsigned ndw = 0;
+	uint64_t pde, pt;
 	uint32_t incr;
 
 	int r;
@@ -1102,20 +1105,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		ring = container_of(vm->entity.sched, struct amdgpu_ring,
 				    sched);
 
-		/* padding, etc. */
+		/* should be sufficient for two commands plus padding, etc. */
 		ndw = 64;
 
-		/* assume the worst case */
-		ndw += parent->last_entry_used * 6;
-
 		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-
-		if (shadow) {
+		if (shadow)
 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
-			ndw *= 2;
-		} else {
+		else
 			shadow_addr = 0;
-		}
 
 		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
 		if (r)
@@ -1125,40 +1122,32 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		params.func = amdgpu_vm_do_set_ptes;
 	}
 
+	spin_lock(&vm->status_lock);
+	list_del_init(&entry->base.vm_status);
+	spin_unlock(&vm->status_lock);
 
-	/* walk over the address space and update the directory */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-		struct amdgpu_bo *bo = entry->base.bo;
-		uint64_t pde, pt;
-
-		if (bo == NULL)
-			continue;
-
-		spin_lock(&vm->status_lock);
-		list_del_init(&entry->base.vm_status);
-		spin_unlock(&vm->status_lock);
-
-		pt = amdgpu_bo_gpu_offset(bo);
-		pt = amdgpu_gart_get_vm_pde(adev, pt);
-		/* Don't update huge pages here */
-		if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
-		    parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
-			continue;
-
-		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
+	pt = amdgpu_bo_gpu_offset(bo);
+	pt = amdgpu_gart_get_vm_pde(adev, pt);
+	/* Don't update huge pages here */
+	if (entry->addr & AMDGPU_PDE_PTE ||
+	    entry->addr == (pt | AMDGPU_PTE_VALID)) {
+		if (!vm->use_cpu_for_update)
+			amdgpu_job_free(job);
+		return 0;
+	}
 
-		incr = amdgpu_bo_size(bo);
-		if (shadow) {
-			pde = shadow_addr + pt_idx * 8;
-			params.func(&params, pde, pt, 1, incr,
-				    AMDGPU_PTE_VALID);
-		}
+	entry->addr = pt | AMDGPU_PTE_VALID;
 
-		pde = pd_addr + pt_idx * 8;
-		params.func(&params, pde, pt, 1, incr, AMDGPU_PTE_VALID);
+	incr = amdgpu_bo_size(bo);
+	if (shadow) {
+		pde = shadow_addr + (entry - parent->entries) * 8;
+		params.func(&params, pde, pt, 1, incr,
+			    AMDGPU_PTE_VALID);
 	}
 
+	pde = pd_addr + (entry - parent->entries) * 8;
+	params.func(&params, pde, pt, 1, incr, AMDGPU_PTE_VALID);
+
 	if (!vm->use_cpu_for_update) {
 		if (params.ib->length_dw == 0) {
 			amdgpu_job_free(job);
@@ -1249,14 +1238,16 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 		bo = bo_base->bo->parent;
 		if (bo) {
 			struct amdgpu_vm_bo_base *parent;
-			struct amdgpu_vm_pt *pt;
+			struct amdgpu_vm_pt *pt, *entry;
 
 			parent = list_first_entry(&bo->va,
 						  struct amdgpu_vm_bo_base,
 						  bo_list);
 			pt = container_of(parent, struct amdgpu_vm_pt, base);
+			entry = container_of(bo_base, struct amdgpu_vm_pt,
+					     base);
 
-			r = amdgpu_vm_update_level(adev, vm, pt);
+			r = amdgpu_vm_update_pde(adev, vm, pt, entry);
 			if (r) {
 				amdgpu_vm_invalidate_level(vm, &vm->root);
 				return r;
-- 
2.11.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 3/8] drm/amdgpu: avoid the modulo in amdgpu_vm_get_entry
       [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2017-12-08 16:41   ` [PATCH 2/8] drm/amdgpu: update one PDE at a time Christian König
@ 2017-12-08 16:41   ` Christian König
       [not found]     ` <20171208164107.1567-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2017-12-08 16:41   ` [PATCH 4/8] drm/amdgpu: remove last_entry_used from the VM code Christian König
                     ` (5 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Christian König @ 2017-12-08 16:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

We can do this with a simple mask as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 796375484f6f..400a00fababd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1288,11 +1288,11 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 	*parent = NULL;
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
-		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++);
+		unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
 
-		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
 		*parent = *entry;
-		*entry = &(*entry)->entries[idx];
+		*entry = &(*entry)->entries[addr >> shift];
+		addr &= (1ULL << shift) - 1;
 	}
 
 	if (level != p->adev->vm_manager.num_level)
-- 
2.11.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 4/8] drm/amdgpu: remove last_entry_used from the VM code
       [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2017-12-08 16:41   ` [PATCH 2/8] drm/amdgpu: update one PDE at a time Christian König
  2017-12-08 16:41   ` [PATCH 3/8] drm/amdgpu: avoid the modulo in amdgpu_vm_get_entry Christian König
@ 2017-12-08 16:41   ` Christian König
       [not found]     ` <20171208164107.1567-4-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2017-12-08 16:41   ` [PATCH 5/8] drm/amdgpu: remove keeping the addr of the VM PDs Christian König
                     ` (4 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Christian König @ 2017-12-08 16:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Not needed any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 52 +++++++++++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 -
 2 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 400a00fababd..ae5451bf5873 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -329,9 +329,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	    to >= amdgpu_vm_num_entries(adev, level))
 		return -EINVAL;
 
-	if (to > parent->last_entry_used)
-		parent->last_entry_used = to;
-
 	++level;
 	saddr = saddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
@@ -1187,16 +1184,19 @@ static int amdgpu_vm_update_pde(struct amdgpu_device *adev,
  *
  * Mark all PD level as invalid after an error.
  */
-static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
-				       struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
+				       struct amdgpu_vm *vm,
+				       struct amdgpu_vm_pt *parent,
+				       unsigned level)
 {
-	unsigned pt_idx;
+	unsigned pt_idx, num_entries;
 
 	/*
 	 * Recurse into the subdirectories. This recursion is harmless because
 	 * we only have a maximum of 5 layers.
 	 */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+	num_entries = amdgpu_vm_num_entries(adev, level);
+	for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 
 		if (!entry->base.bo)
@@ -1207,7 +1207,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
 		if (list_empty(&entry->base.vm_status))
 			list_add(&entry->base.vm_status, &vm->relocated);
 		spin_unlock(&vm->status_lock);
-		amdgpu_vm_invalidate_level(vm, entry);
+		amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
 	}
 }
 
@@ -1249,7 +1249,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 
 			r = amdgpu_vm_update_pde(adev, vm, pt, entry);
 			if (r) {
-				amdgpu_vm_invalidate_level(vm, &vm->root);
+				amdgpu_vm_invalidate_level(adev, vm,
+							   &vm->root, 0);
 				return r;
 			}
 			spin_lock(&vm->status_lock);
@@ -1652,7 +1653,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
-	amdgpu_vm_invalidate_level(vm, &vm->root);
+	amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0);
 	return r;
 }
 
@@ -2716,26 +2717,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 /**
  * amdgpu_vm_free_levels - free PD/PT levels
  *
- * @level: PD/PT starting level to free
+ * @adev: amdgpu device structure
+ * @parent: PD/PT starting level to free
+ * @level: level of parent structure
  *
  * Free the page directory or page table level and all sub levels.
  */
-static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
+static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
+				  struct amdgpu_vm_pt *parent,
+				  unsigned level)
 {
-	unsigned i;
+	unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
 
-	if (level->base.bo) {
-		list_del(&level->base.bo_list);
-		list_del(&level->base.vm_status);
-		amdgpu_bo_unref(&level->base.bo->shadow);
-		amdgpu_bo_unref(&level->base.bo);
+	if (parent->base.bo) {
+		list_del(&parent->base.bo_list);
+		list_del(&parent->base.vm_status);
+		amdgpu_bo_unref(&parent->base.bo->shadow);
+		amdgpu_bo_unref(&parent->base.bo);
 	}
 
-	if (level->entries)
-		for (i = 0; i <= level->last_entry_used; i++)
-			amdgpu_vm_free_levels(&level->entries[i]);
+	if (parent->entries)
+		for (i = 0; i < num_entries; i++)
+			amdgpu_vm_free_levels(adev, &parent->entries[i],
+					      level + 1);
 
-	kvfree(level->entries);
+	kvfree(parent->entries);
 }
 
 /**
@@ -2793,7 +2799,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (r) {
 		dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
 	} else {
-		amdgpu_vm_free_levels(&vm->root);
+		amdgpu_vm_free_levels(adev, &vm->root, 0);
 		amdgpu_bo_unreserve(root);
 	}
 	amdgpu_bo_unref(&root);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 43ea131dd411..7a308a1ea048 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -141,7 +141,6 @@ struct amdgpu_vm_pt {
 
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt		*entries;
-	unsigned			last_entry_used;
 };
 
 #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
-- 
2.11.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 5/8] drm/amdgpu: remove keeping the addr of the VM PDs
       [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-12-08 16:41   ` [PATCH 4/8] drm/amdgpu: remove last_entry_used from the VM code Christian König
@ 2017-12-08 16:41   ` Christian König
       [not found]     ` <20171208164107.1567-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2017-12-08 16:41   ` [PATCH 6/8] drm/amdgpu: batch PDE updates again Christian König
                     ` (3 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Christian König @ 2017-12-08 16:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

No more double house keeping.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ae5451bf5873..abb3d4fb49f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -383,7 +383,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			spin_lock(&vm->status_lock);
 			list_add(&entry->base.vm_status, &vm->relocated);
 			spin_unlock(&vm->status_lock);
-			entry->addr = 0;
 		}
 
 		if (level < adev->vm_manager.num_level) {
@@ -1126,15 +1125,12 @@ static int amdgpu_vm_update_pde(struct amdgpu_device *adev,
 	pt = amdgpu_bo_gpu_offset(bo);
 	pt = amdgpu_gart_get_vm_pde(adev, pt);
 	/* Don't update huge pages here */
-	if (entry->addr & AMDGPU_PDE_PTE ||
-	    entry->addr == (pt | AMDGPU_PTE_VALID)) {
+	if (entry->huge) {
 		if (!vm->use_cpu_for_update)
 			amdgpu_job_free(job);
 		return 0;
 	}
 
-	entry->addr = pt | AMDGPU_PTE_VALID;
-
 	incr = amdgpu_bo_size(bo);
 	if (shadow) {
 		pde = shadow_addr + (entry - parent->entries) * 8;
@@ -1202,7 +1198,6 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
 		if (!entry->base.bo)
 			continue;
 
-		entry->addr = ~0ULL;
 		spin_lock(&vm->status_lock);
 		if (list_empty(&entry->base.vm_status))
 			list_add(&entry->base.vm_status, &vm->relocated);
@@ -1335,10 +1330,10 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 		flags |= AMDGPU_PDE_PTE;
 	}
 
-	if (entry->addr == (dst | flags))
+	if (!entry->huge && !(flags & AMDGPU_PDE_PTE))
 		return;
 
-	entry->addr = (dst | flags);
+	entry->huge = !!(flags & AMDGPU_PDE_PTE);
 
 	if (use_cpu_update) {
 		/* In case a huge page is replaced with a system
@@ -1412,7 +1407,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		amdgpu_vm_handle_huge_pages(params, entry, parent,
 					    nptes, dst, flags);
 		/* We don't need to update PTEs for huge pages */
-		if (entry->addr & AMDGPU_PDE_PTE)
+		if (entry->huge)
 			continue;
 
 		pt = entry->base.bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 7a308a1ea048..228f63e9ac5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -137,7 +137,7 @@ struct amdgpu_vm_bo_base {
 
 struct amdgpu_vm_pt {
 	struct amdgpu_vm_bo_base	base;
-	uint64_t			addr;
+	bool				huge;
 
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt		*entries;
-- 
2.11.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 6/8] drm/amdgpu: batch PDE updates again
       [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-12-08 16:41   ` [PATCH 5/8] drm/amdgpu: remove keeping the addr of the VM PDs Christian König
@ 2017-12-08 16:41   ` Christian König
       [not found]     ` <20171208164107.1567-6-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2017-12-08 16:41   ` [PATCH 7/8] drm/amdgpu: allow get_vm_pde to change flags as well Christian König
                     ` (2 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Christian König @ 2017-12-08 16:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Now instead of one submission for each PDE batch them together over all
PDs who need an update.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 206 +++++++++++++++------------------
 1 file changed, 94 insertions(+), 112 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index abb3d4fb49f4..6a35c58100f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1056,121 +1056,46 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 }
 
 /*
- * amdgpu_vm_update_level - update a single level in the hierarchy
+ * amdgpu_vm_update_pde - update a single level in the hierarchy
  *
- * @adev: amdgpu_device pointer
+ * @param: parameters for the update
  * @vm: requested vm
  * @parent: parent directory
+ * @entry: entry to update
  *
- * Makes sure all entries in @parent are up to date.
- * Returns 0 for success, error for failure.
+ * Makes sure the requested entry in parent is up to date.
  */
-static int amdgpu_vm_update_pde(struct amdgpu_device *adev,
-				struct amdgpu_vm *vm,
-				struct amdgpu_vm_pt *parent,
-				struct amdgpu_vm_pt *entry)
+static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
+				 struct amdgpu_vm *vm,
+				 struct amdgpu_vm_pt *parent,
+				 struct amdgpu_vm_pt *entry)
 {
-	struct amdgpu_pte_update_params params;
-	struct amdgpu_bo *bo = entry->base.bo;
-	struct amdgpu_bo *shadow;
-	struct amdgpu_ring *ring = NULL;
+	struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL;
 	uint64_t pd_addr, shadow_addr = 0;
-	struct amdgpu_job *job;
-	struct dma_fence *fence = NULL;
-	unsigned ndw = 0;
 	uint64_t pde, pt;
-	uint32_t incr;
 
-	int r;
-
-	if (!parent->entries)
-		return 0;
-
-	memset(&params, 0, sizeof(params));
-	params.adev = adev;
-	shadow = parent->base.bo->shadow;
+	/* Don't update huge pages here */
+	if (entry->huge)
+		return;
 
 	if (vm->use_cpu_for_update) {
 		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
-		if (unlikely(r))
-			return r;
-
-		params.func = amdgpu_vm_cpu_set_ptes;
 	} else {
-		ring = container_of(vm->entity.sched, struct amdgpu_ring,
-				    sched);
-
-		/* should be sufficient for two commands plus padding, etc. */
-		ndw = 64;
-
 		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
+		shadow = parent->base.bo->shadow;
 		if (shadow)
 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
-		else
-			shadow_addr = 0;
-
-		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
-		if (r)
-			return r;
-
-		params.ib = &job->ibs[0];
-		params.func = amdgpu_vm_do_set_ptes;
 	}
 
-	spin_lock(&vm->status_lock);
-	list_del_init(&entry->base.vm_status);
-	spin_unlock(&vm->status_lock);
-
 	pt = amdgpu_bo_gpu_offset(bo);
-	pt = amdgpu_gart_get_vm_pde(adev, pt);
-	/* Don't update huge pages here */
-	if (entry->huge) {
-		if (!vm->use_cpu_for_update)
-			amdgpu_job_free(job);
-		return 0;
-	}
-
-	incr = amdgpu_bo_size(bo);
+	pt = amdgpu_gart_get_vm_pde(params->adev, pt);
 	if (shadow) {
 		pde = shadow_addr + (entry - parent->entries) * 8;
-		params.func(&params, pde, pt, 1, incr,
-			    AMDGPU_PTE_VALID);
+		params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
 	}
 
 	pde = pd_addr + (entry - parent->entries) * 8;
-	params.func(&params, pde, pt, 1, incr, AMDGPU_PTE_VALID);
-
-	if (!vm->use_cpu_for_update) {
-		if (params.ib->length_dw == 0) {
-			amdgpu_job_free(job);
-		} else {
-			amdgpu_ring_pad_ib(ring, params.ib);
-			amdgpu_sync_resv(adev, &job->sync,
-					 parent->base.bo->tbo.resv,
-					 AMDGPU_FENCE_OWNER_VM, false);
-			if (shadow)
-				amdgpu_sync_resv(adev, &job->sync,
-						 shadow->tbo.resv,
-						 AMDGPU_FENCE_OWNER_VM, false);
-
-			WARN_ON(params.ib->length_dw > ndw);
-			r = amdgpu_job_submit(job, ring, &vm->entity,
-					AMDGPU_FENCE_OWNER_VM, &fence);
-			if (r)
-				goto error_free;
-
-			amdgpu_bo_fence(parent->base.bo, fence, true);
-			dma_fence_put(vm->last_update);
-			vm->last_update = fence;
-		}
-	}
-
-	return 0;
-
-error_free:
-	amdgpu_job_free(job);
-	return r;
+	params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
 }
 
 /*
@@ -1218,41 +1143,63 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm)
 {
+	struct amdgpu_pte_update_params params;
+	struct amdgpu_job *job;
+	unsigned ndw = 0;
 	int r = 0;
 
+	if (list_empty(&vm->relocated))
+		return 0;
+
+restart:
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
+
+	if (vm->use_cpu_for_update) {
+		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
+		if (unlikely(r))
+			return r;
+
+		params.func = amdgpu_vm_cpu_set_ptes;
+	} else {
+		ndw = 512 * 8;
+		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
+		if (r)
+			return r;
+
+		params.ib = &job->ibs[0];
+		params.func = amdgpu_vm_do_set_ptes;
+	}
+
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->relocated)) {
-		struct amdgpu_vm_bo_base *bo_base;
+		struct amdgpu_vm_bo_base *bo_base, *parent;
+		struct amdgpu_vm_pt *pt, *entry;
 		struct amdgpu_bo *bo;
 
 		bo_base = list_first_entry(&vm->relocated,
 					   struct amdgpu_vm_bo_base,
 					   vm_status);
+		list_del_init(&bo_base->vm_status);
 		spin_unlock(&vm->status_lock);
 
 		bo = bo_base->bo->parent;
-		if (bo) {
-			struct amdgpu_vm_bo_base *parent;
-			struct amdgpu_vm_pt *pt, *entry;
-
-			parent = list_first_entry(&bo->va,
-						  struct amdgpu_vm_bo_base,
-						  bo_list);
-			pt = container_of(parent, struct amdgpu_vm_pt, base);
-			entry = container_of(bo_base, struct amdgpu_vm_pt,
-					     base);
-
-			r = amdgpu_vm_update_pde(adev, vm, pt, entry);
-			if (r) {
-				amdgpu_vm_invalidate_level(adev, vm,
-							   &vm->root, 0);
-				return r;
-			}
+		if (!bo) {
 			spin_lock(&vm->status_lock);
-		} else {
-			spin_lock(&vm->status_lock);
-			list_del_init(&bo_base->vm_status);
+			continue;
 		}
+
+		parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
+					  bo_list);
+		pt = container_of(parent, struct amdgpu_vm_pt, base);
+		entry = container_of(bo_base, struct amdgpu_vm_pt, base);
+
+		amdgpu_vm_update_pde(&params, vm, pt, entry);
+
+		spin_lock(&vm->status_lock);
+		if (!vm->use_cpu_for_update &&
+		    (ndw - params.ib->length_dw) < 32)
+			break;
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -1260,8 +1207,43 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 		/* Flush HDP */
 		mb();
 		amdgpu_gart_flush_gpu_tlb(adev, 0);
+	} else if (params.ib->length_dw == 0) {
+		amdgpu_job_free(job);
+	} else {
+		struct amdgpu_bo *root = vm->root.base.bo;
+		struct amdgpu_ring *ring;
+		struct dma_fence *fence;
+
+		ring = container_of(vm->entity.sched, struct amdgpu_ring,
+				    sched);
+
+		amdgpu_ring_pad_ib(ring, params.ib);
+		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
+				 AMDGPU_FENCE_OWNER_VM, false);
+		if (root->shadow)
+			amdgpu_sync_resv(adev, &job->sync,
+					 root->shadow->tbo.resv,
+					 AMDGPU_FENCE_OWNER_VM, false);
+
+		WARN_ON(params.ib->length_dw > ndw);
+		r = amdgpu_job_submit(job, ring, &vm->entity,
+				      AMDGPU_FENCE_OWNER_VM, &fence);
+		if (r)
+			goto error;
+
+		amdgpu_bo_fence(root, fence, true);
+		dma_fence_put(vm->last_update);
+		vm->last_update = fence;
 	}
 
+	if (!list_empty(&vm->relocated))
+		goto restart;
+
+	return 0;
+
+error:
+	amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0);
+	amdgpu_job_free(job);
 	return r;
 }
 
-- 
2.11.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 7/8] drm/amdgpu: allow get_vm_pde to change flags as well
       [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2017-12-08 16:41   ` [PATCH 6/8] drm/amdgpu: batch PDE updates again Christian König
@ 2017-12-08 16:41   ` Christian König
  2017-12-08 16:41   ` [PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven Christian König
  2017-12-11  5:15   ` [PATCH 1/8] drm/amdgpu: stop joining PDEs Chunming Zhou
  7 siblings, 0 replies; 21+ messages in thread
From: Christian König @ 2017-12-08 16:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

And also provide the level for which we need a PDE.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  5 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c  |  6 +++---
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  6 +++---
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  6 +++---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  5 +++--
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 12 +++++++-----
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  5 +++--
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c  | 12 +++++++-----
 11 files changed, 54 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7db9a7fa15a8..e5e0fbd43273 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -346,7 +346,8 @@ struct amdgpu_gart_funcs {
 	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
 				     uint32_t flags);
 	/* get the pde for a given mc addr */
-	u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr);
+	void (*get_vm_pde)(struct amdgpu_device *adev, int level,
+			   u64 *dst, u64 *flags);
 	uint32_t (*get_invalidate_req)(unsigned int vm_id);
 };
 
@@ -1783,7 +1784,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
-#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr))
+#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6a35c58100f7..61e4359a22ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1070,9 +1070,10 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 				 struct amdgpu_vm_pt *parent,
 				 struct amdgpu_vm_pt *entry)
 {
-	struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL;
+	struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo;
 	uint64_t pd_addr, shadow_addr = 0;
-	uint64_t pde, pt;
+	uint64_t pde, pt, flags;
+	unsigned level;
 
 	/* Don't update huge pages here */
 	if (entry->huge)
@@ -1087,15 +1088,19 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
 	}
 
+	for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
+		pbo = pbo->parent;
+
 	pt = amdgpu_bo_gpu_offset(bo);
-	pt = amdgpu_gart_get_vm_pde(params->adev, pt);
+	flags = AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags);
 	if (shadow) {
 		pde = shadow_addr + (entry - parent->entries) * 8;
-		params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
+		params->func(params, pde, pt, 1, 0, flags);
 	}
 
 	pde = pd_addr + (entry - parent->entries) * 8;
-	params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
+	params->func(params, pde, pt, 1, 0, flags);
 }
 
 /*
@@ -1305,7 +1310,6 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	    !(flags & AMDGPU_PTE_VALID)) {
 
 		dst = amdgpu_bo_gpu_offset(entry->base.bo);
-		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
 		flags = AMDGPU_PTE_VALID;
 	} else {
 		/* Set the huge page flag to stop scanning at this PDE */
@@ -1314,9 +1318,11 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 
 	if (!entry->huge && !(flags & AMDGPU_PDE_PTE))
 		return;
-
 	entry->huge = !!(flags & AMDGPU_PDE_PTE);
 
+	amdgpu_gart_get_vm_pde(p->adev, p->adev->vm_manager.num_level - 1,
+			       &dst, &flags);
+
 	if (use_cpu_update) {
 		/* In case a huge page is replaced with a system
 		 * memory mapping, p->pages_addr != NULL and
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6c5289ae67be..571e15acd5ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3750,10 +3750,11 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
 				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 468281f10e8d..279f43a2b1a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -395,10 +395,10 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
+				uint64_t *addr, uint64_t *flags)
 {
-	BUG_ON(addr & 0xFFFFFF0000000FFFULL);
-	return addr;
+	BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
 static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 68a85051f4b7..9c28e18741ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -480,10 +480,10 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
+				uint64_t *addr, uint64_t *flags)
 {
-	BUG_ON(addr & 0xFFFFFF0000000FFFULL);
-	return addr;
+	BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 46ec97e70e5c..efed20ac4a01 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -677,10 +677,10 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
+				uint64_t *addr, uint64_t *flags)
 {
-	BUG_ON(addr & 0xFFFFFF0000000FFFULL);
-	return addr;
+	BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index cc972153d401..0fe2a4e782ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -474,11 +474,13 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
+static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
+				uint64_t *addr, uint64_t *flags)
 {
-	addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
-	BUG_ON(addr & 0xFFFF00000000003FULL);
-	return addr;
+	if (!(*flags & AMDGPU_PDE_PTE))
+		*addr = adev->vm_manager.vram_base_offset + *addr -
+			adev->mc.vram_start;
+	BUG_ON(*addr & 0xFFFF00000000003FULL);
 }
 
 static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4c55f21e37a8..775464c78558 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1156,10 +1156,11 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 660fa41dc877..320a4c6e412f 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1283,11 +1283,12 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	uint32_t data0, data1, mask;
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
+	uint32_t data0, data1, mask;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
 	data1 = upper_32_bits(pd_addr);
@@ -1324,10 +1325,11 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index f2f713650074..e37ac6d4196e 100755
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -970,10 +970,11 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index e4673f792545..e6587237f6cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -879,11 +879,12 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	uint32_t data0, data1, mask;
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
+	uint32_t data0, data1, mask;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
 	data1 = upper_32_bits(pd_addr);
@@ -1011,10 +1012,11 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-	pd_addr |= AMDGPU_PTE_VALID;
+	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+	pd_addr |= flags;
 
 	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,
-- 
2.11.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven
       [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (5 preceding siblings ...)
  2017-12-08 16:41   ` [PATCH 7/8] drm/amdgpu: allow get_vm_pde to change flags as well Christian König
@ 2017-12-08 16:41   ` Christian König
       [not found]     ` <20171208164107.1567-8-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2017-12-11  5:15   ` [PATCH 1/8] drm/amdgpu: stop joining PDEs Chunming Zhou
  7 siblings, 1 reply; 21+ messages in thread
From: Christian König @ 2017-12-08 16:41 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Instead of falling back to 2 level and very limited address space use
2+1 PD support and 128TB + 512GB of virtual address space.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 42 ++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 26 ++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 49 ++++++++++++++++++++------------
 5 files changed, 86 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e5e0fbd43273..9517c0f76d27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -541,6 +541,7 @@ struct amdgpu_mc {
 	u64					private_aperture_end;
 	/* protects concurrent invalidation */
 	spinlock_t		invalidate_lock;
+	bool			translate_further;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 228f63e9ac5e..79134f0c26d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -69,6 +69,9 @@ struct amdgpu_bo_list_entry;
 /* PDE is handled as PTE for VEGA10 */
 #define AMDGPU_PDE_PTE		(1ULL << 54)
 
+/* PTE is handled as PDE for VEGA10 */
+#define AMDGPU_PTE_TRANSLATE_FURTHER	(1ULL << 56)
+
 /* VEGA10 only */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index f1effadfbaa6..a56f77259130 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -144,8 +144,15 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
 
 	tmp = mmVM_L2_CNTL3_DEFAULT;
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+	if (adev->mc.translate_further) {
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+	} else {
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+	}
 	WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
 
 	tmp = mmVM_L2_CNTL4_DEFAULT;
@@ -183,31 +190,40 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
 
 static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
 {
-	int i;
+	unsigned num_level, block_size;
 	uint32_t tmp;
+	int i;
+
+	num_level = adev->vm_manager.num_level;
+	block_size = adev->vm_manager.block_size;
+	if (adev->mc.translate_further)
+		num_level -= 1;
+	else
+		block_size -= 9;
 
 	for (i = 0; i <= 14; i++) {
 		tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
-				    adev->vm_manager.num_level);
+				    num_level);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+				    1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PAGE_TABLE_BLOCK_SIZE,
-				adev->vm_manager.block_size - 9);
+				    PAGE_TABLE_BLOCK_SIZE,
+				    block_size);
 		/* Send no-retry XNACK on fault to suppress VM fault storm. */
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0fe2a4e782ff..d6a19514c92b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -481,6 +481,21 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
 		*addr = adev->vm_manager.vram_base_offset + *addr -
 			adev->mc.vram_start;
 	BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+	if (!adev->mc.translate_further)
+		return;
+
+	if (level == 0) {
+		/* Set the block size */
+		if (!(*flags & AMDGPU_PDE_PTE))
+			*flags |= 9ULL << 59;
+
+	} else if (level == 1) {
+		if (*flags & AMDGPU_PDE_PTE)
+			*flags &= ~AMDGPU_PDE_PTE;
+		else
+			*flags |= AMDGPU_PTE_TRANSLATE_FURTHER;
+	}
 }
 
 static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
@@ -771,11 +786,14 @@ static int gmc_v9_0_sw_init(void *handle)
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
 		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-		if (adev->rev_id == 0x0 || adev->rev_id == 0x1)
+		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
-		else
-			/* vm_size is 64GB for legacy 2-level page support */
-			amdgpu_vm_adjust_size(adev, 64, 9, 1, 48);
+		} else {
+			/* vm_size is 128TB + 512GB for legacy 3-level page support */
+			amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
+			adev->mc.translate_further =
+				adev->vm_manager.num_level > 1;
+		}
 		break;
 	case CHIP_VEGA10:
 		/* XXX Don't know how to get VRAM type yet. */
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index bd160d8700e0..a88f43b097dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -156,10 +156,15 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
 
-	tmp = mmVM_L2_CNTL3_DEFAULT;
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
-	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
+	if (adev->mc.translate_further) {
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+	} else {
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+	}
 
 	tmp = mmVM_L2_CNTL4_DEFAULT;
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
@@ -197,32 +202,40 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
 
 static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
 {
-	int i;
+	unsigned num_level, block_size;
 	uint32_t tmp;
+	int i;
+
+	num_level = adev->vm_manager.num_level;
+	block_size = adev->vm_manager.block_size;
+	if (adev->mc.translate_further)
+		num_level -= 1;
+	else
+		block_size -= 9;
 
 	for (i = 0; i <= 14; i++) {
 		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
+		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+				    num_level);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				ENABLE_CONTEXT, 1);
-		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PAGE_TABLE_DEPTH, adev->vm_manager.num_level);
-		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+				    1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
-				PAGE_TABLE_BLOCK_SIZE,
-				adev->vm_manager.block_size - 9);
+				    PAGE_TABLE_BLOCK_SIZE,
+				    block_size);
 		/* Send no-retry XNACK on fault to suppress VM fault storm. */
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
-- 
2.11.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/8] drm/amdgpu: stop joining PDEs
       [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (6 preceding siblings ...)
  2017-12-08 16:41   ` [PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven Christian König
@ 2017-12-11  5:15   ` Chunming Zhou
  7 siblings, 0 replies; 21+ messages in thread
From: Chunming Zhou @ 2017-12-11  5:15 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年12月09日 00:41, Christian König wrote:
> That doesn't hit any more most of the time anyway.
agree, there is little chance that two PDBs are continue, Reviewed-by: 
Chunming Zhou <david1.zhou@amd.com>

>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 41 ++++++----------------------------
>   1 file changed, 7 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 3ecdbdfb04dd..d15b6edf7cce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1076,8 +1076,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   	struct amdgpu_bo *shadow;
>   	struct amdgpu_ring *ring = NULL;
>   	uint64_t pd_addr, shadow_addr = 0;
> -	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
> -	unsigned count = 0, pt_idx, ndw = 0;
> +	unsigned pt_idx, ndw = 0;
>   	struct amdgpu_job *job;
>   	struct amdgpu_pte_update_params params;
>   	struct dma_fence *fence = NULL;
> @@ -1149,41 +1148,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
>   
> -		pde = pd_addr + pt_idx * 8;
>   		incr = amdgpu_bo_size(bo);
> -		if (((last_pde + 8 * count) != pde) ||
> -		    ((last_pt + incr * count) != pt) ||
> -		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
> -
> -			if (count) {
> -				if (shadow)
> -					params.func(&params,
> -						    last_shadow,
> -						    last_pt, count,
> -						    incr,
> -						    AMDGPU_PTE_VALID);
> -
> -				params.func(&params, last_pde,
> -					    last_pt, count, incr,
> -					    AMDGPU_PTE_VALID);
> -			}
> -
> -			count = 1;
> -			last_pde = pde;
> -			last_shadow = shadow_addr + pt_idx * 8;
> -			last_pt = pt;
> -		} else {
> -			++count;
> +		if (shadow) {
> +			pde = shadow_addr + pt_idx * 8;
> +			params.func(&params, pde, pt, 1, incr,
> +				    AMDGPU_PTE_VALID);
>   		}
> -	}
>   
> -	if (count) {
> -		if (vm->root.base.bo->shadow)
> -			params.func(&params, last_shadow, last_pt,
> -				    count, incr, AMDGPU_PTE_VALID);
> -
> -		params.func(&params, last_pde, last_pt,
> -			    count, incr, AMDGPU_PTE_VALID);
> +		pde = pd_addr + pt_idx * 8;
> +		params.func(&params, pde, pt, 1, incr, AMDGPU_PTE_VALID);
>   	}
>   
>   	if (!vm->use_cpu_for_update) {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/8] drm/amdgpu: update one PDE at a time
       [not found]     ` <20171208164107.1567-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-11  5:37       ` Chunming Zhou
  0 siblings, 0 replies; 21+ messages in thread
From: Chunming Zhou @ 2017-12-11  5:37 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年12月09日 00:41, Christian König wrote:
> Horrible inefficient, but avoids problems when the root PD size becomes
> to big.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 81 +++++++++++++++-------------------
>   1 file changed, 36 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d15b6edf7cce..796375484f6f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1069,17 +1069,20 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>    * Makes sure all entries in @parent are up to date.
>    * Returns 0 for success, error for failure.
>    */
> -static int amdgpu_vm_update_level(struct amdgpu_device *adev,
> -				  struct amdgpu_vm *vm,
> -				  struct amdgpu_vm_pt *parent)
> +static int amdgpu_vm_update_pde(struct amdgpu_device *adev,
> +				struct amdgpu_vm *vm,
> +				struct amdgpu_vm_pt *parent,
> +				struct amdgpu_vm_pt *entry)
>   {
> +	struct amdgpu_pte_update_params params;
> +	struct amdgpu_bo *bo = entry->base.bo;
>   	struct amdgpu_bo *shadow;
>   	struct amdgpu_ring *ring = NULL;
>   	uint64_t pd_addr, shadow_addr = 0;
> -	unsigned pt_idx, ndw = 0;
>   	struct amdgpu_job *job;
> -	struct amdgpu_pte_update_params params;
>   	struct dma_fence *fence = NULL;
> +	unsigned ndw = 0;
> +	uint64_t pde, pt;
>   	uint32_t incr;
>   
>   	int r;
> @@ -1102,20 +1105,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		ring = container_of(vm->entity.sched, struct amdgpu_ring,
>   				    sched);
>   
> -		/* padding, etc. */
> +		/* should be sufficient for two commands plus padding, etc. */
>   		ndw = 64;
>   
> -		/* assume the worst case */
> -		ndw += parent->last_entry_used * 6;
> -
>   		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
> -
> -		if (shadow) {
> +		if (shadow)
>   			shadow_addr = amdgpu_bo_gpu_offset(shadow);
> -			ndw *= 2;
> -		} else {
> +		else
>   			shadow_addr = 0;
> -		}
>   
>   		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
>   		if (r)
> @@ -1125,40 +1122,32 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		params.func = amdgpu_vm_do_set_ptes;
>   	}
>   
> +	spin_lock(&vm->status_lock);
> +	list_del_init(&entry->base.vm_status);
> +	spin_unlock(&vm->status_lock);
>   
> -	/* walk over the address space and update the directory */
> -	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> -		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
> -		struct amdgpu_bo *bo = entry->base.bo;
> -		uint64_t pde, pt;
> -
> -		if (bo == NULL)
> -			continue;
> -
> -		spin_lock(&vm->status_lock);
> -		list_del_init(&entry->base.vm_status);
> -		spin_unlock(&vm->status_lock);
> -
> -		pt = amdgpu_bo_gpu_offset(bo);
> -		pt = amdgpu_gart_get_vm_pde(adev, pt);
> -		/* Don't update huge pages here */
> -		if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
> -		    parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
> -			continue;
> -
> -		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
> +	pt = amdgpu_bo_gpu_offset(bo);
> +	pt = amdgpu_gart_get_vm_pde(adev, pt);
> +	/* Don't update huge pages here */
> +	if (entry->addr & AMDGPU_PDE_PTE ||
> +	    entry->addr == (pt | AMDGPU_PTE_VALID)) {
> +		if (!vm->use_cpu_for_update)
> +			amdgpu_job_free(job);
> +		return 0;
> +	}
>   
> -		incr = amdgpu_bo_size(bo);
> -		if (shadow) {
> -			pde = shadow_addr + pt_idx * 8;
> -			params.func(&params, pde, pt, 1, incr,
> -				    AMDGPU_PTE_VALID);
> -		}
> +	entry->addr = pt | AMDGPU_PTE_VALID;
>   
> -		pde = pd_addr + pt_idx * 8;
> -		params.func(&params, pde, pt, 1, incr, AMDGPU_PTE_VALID);
> +	incr = amdgpu_bo_size(bo);
nit pick: When cound == 1, incr is meanness, we can just pass 0.
otherwise the patch is Reviewed-by: Chunming Zhou <davdi1.zhou@amd.com>


> +	if (shadow) {
> +		pde = shadow_addr + (entry - parent->entries) * 8;
> +		params.func(&params, pde, pt, 1, incr,
> +			    AMDGPU_PTE_VALID);

>   	}
>   
> +	pde = pd_addr + (entry - parent->entries) * 8;
> +	params.func(&params, pde, pt, 1, incr, AMDGPU_PTE_VALID);
> +
>   	if (!vm->use_cpu_for_update) {
>   		if (params.ib->length_dw == 0) {
>   			amdgpu_job_free(job);
> @@ -1249,14 +1238,16 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>   		bo = bo_base->bo->parent;
>   		if (bo) {
>   			struct amdgpu_vm_bo_base *parent;
> -			struct amdgpu_vm_pt *pt;
> +			struct amdgpu_vm_pt *pt, *entry;
>   
>   			parent = list_first_entry(&bo->va,
>   						  struct amdgpu_vm_bo_base,
>   						  bo_list);
>   			pt = container_of(parent, struct amdgpu_vm_pt, base);
> +			entry = container_of(bo_base, struct amdgpu_vm_pt,
> +					     base);
>   
> -			r = amdgpu_vm_update_level(adev, vm, pt);
> +			r = amdgpu_vm_update_pde(adev, vm, pt, entry);
>   			if (r) {
>   				amdgpu_vm_invalidate_level(vm, &vm->root);
>   				return r;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/8] drm/amdgpu: avoid the modulo in amdgpu_vm_get_entry
       [not found]     ` <20171208164107.1567-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-11  5:40       ` Chunming Zhou
  0 siblings, 0 replies; 21+ messages in thread
From: Chunming Zhou @ 2017-12-11  5:40 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Reviewed-by: Chunming Zhou <david1.zhou@amd.com>


On 2017年12月09日 00:41, Christian König wrote:
> We can do this with a simple mask as well.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++---
>   1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 796375484f6f..400a00fababd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1288,11 +1288,11 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
>   	*parent = NULL;
>   	*entry = &p->vm->root;
>   	while ((*entry)->entries) {
> -		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++);
> +		unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
>   
> -		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
>   		*parent = *entry;
> -		*entry = &(*entry)->entries[idx];
> +		*entry = &(*entry)->entries[addr >> shift];
> +		addr &= (1ULL << shift) - 1;
>   	}
>   
>   	if (level != p->adev->vm_manager.num_level)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 4/8] drm/amdgpu: remove last_entry_used from the VM code
       [not found]     ` <20171208164107.1567-4-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-11  5:52       ` Chunming Zhou
       [not found]         ` <a671871b-5bad-3d48-b76b-5db4d49c9624-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 21+ messages in thread
From: Chunming Zhou @ 2017-12-11  5:52 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年12月09日 00:41, Christian König wrote:
> Not needed any more.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 52 +++++++++++++++++++---------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 -
>   2 files changed, 29 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 400a00fababd..ae5451bf5873 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -329,9 +329,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   	    to >= amdgpu_vm_num_entries(adev, level))
>   		return -EINVAL;
>   
> -	if (to > parent->last_entry_used)
> -		parent->last_entry_used = to;
> -
>   	++level;
>   	saddr = saddr & ((1 << shift) - 1);
>   	eaddr = eaddr & ((1 << shift) - 1);
> @@ -1187,16 +1184,19 @@ static int amdgpu_vm_update_pde(struct amdgpu_device *adev,
>    *
>    * Mark all PD level as invalid after an error.
>    */
> -static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
> -				       struct amdgpu_vm_pt *parent)
> +static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
> +				       struct amdgpu_vm *vm,
> +				       struct amdgpu_vm_pt *parent,
> +				       unsigned level)
can we move level to struct amdgpu_vm_pt?
otherwise, it looks ok to me.

Regards,
David Zhou
>   {
> -	unsigned pt_idx;
> +	unsigned pt_idx, num_entries;
>   
>   	/*
>   	 * Recurse into the subdirectories. This recursion is harmless because
>   	 * we only have a maximum of 5 layers.
>   	 */
> -	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> +	num_entries = amdgpu_vm_num_entries(adev, level);
> +	for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
>   		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
>   
>   		if (!entry->base.bo)
> @@ -1207,7 +1207,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
>   		if (list_empty(&entry->base.vm_status))
>   			list_add(&entry->base.vm_status, &vm->relocated);
>   		spin_unlock(&vm->status_lock);
> -		amdgpu_vm_invalidate_level(vm, entry);
> +		amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
>   	}
>   }
>   
> @@ -1249,7 +1249,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>   
>   			r = amdgpu_vm_update_pde(adev, vm, pt, entry);
>   			if (r) {
> -				amdgpu_vm_invalidate_level(vm, &vm->root);
> +				amdgpu_vm_invalidate_level(adev, vm,
> +							   &vm->root, 0);
>   				return r;
>   			}
>   			spin_lock(&vm->status_lock);
> @@ -1652,7 +1653,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   
>   error_free:
>   	amdgpu_job_free(job);
> -	amdgpu_vm_invalidate_level(vm, &vm->root);
> +	amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0);
>   	return r;
>   }
>   
> @@ -2716,26 +2717,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   /**
>    * amdgpu_vm_free_levels - free PD/PT levels
>    *
> - * @level: PD/PT starting level to free
> + * @adev: amdgpu device structure
> + * @parent: PD/PT starting level to free
> + * @level: level of parent structure
>    *
>    * Free the page directory or page table level and all sub levels.
>    */
> -static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
> +static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
> +				  struct amdgpu_vm_pt *parent,
> +				  unsigned level)
>   {
> -	unsigned i;
> +	unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
>   
> -	if (level->base.bo) {
> -		list_del(&level->base.bo_list);
> -		list_del(&level->base.vm_status);
> -		amdgpu_bo_unref(&level->base.bo->shadow);
> -		amdgpu_bo_unref(&level->base.bo);
> +	if (parent->base.bo) {
> +		list_del(&parent->base.bo_list);
> +		list_del(&parent->base.vm_status);
> +		amdgpu_bo_unref(&parent->base.bo->shadow);
> +		amdgpu_bo_unref(&parent->base.bo);
>   	}
>   
> -	if (level->entries)
> -		for (i = 0; i <= level->last_entry_used; i++)
> -			amdgpu_vm_free_levels(&level->entries[i]);
> +	if (parent->entries)
> +		for (i = 0; i < num_entries; i++)
> +			amdgpu_vm_free_levels(adev, &parent->entries[i],
> +					      level + 1);
>   
> -	kvfree(level->entries);
> +	kvfree(parent->entries);
>   }
>   
>   /**
> @@ -2793,7 +2799,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   	if (r) {
>   		dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
>   	} else {
> -		amdgpu_vm_free_levels(&vm->root);
> +		amdgpu_vm_free_levels(adev, &vm->root, 0);
>   		amdgpu_bo_unreserve(root);
>   	}
>   	amdgpu_bo_unref(&root);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 43ea131dd411..7a308a1ea048 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -141,7 +141,6 @@ struct amdgpu_vm_pt {
>   
>   	/* array of page tables, one for each directory entry */
>   	struct amdgpu_vm_pt		*entries;
> -	unsigned			last_entry_used;
>   };
>   
>   #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 5/8] drm/amdgpu: remove keeping the addr of the VM PDs
       [not found]     ` <20171208164107.1567-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-11  5:59       ` Chunming Zhou
  0 siblings, 0 replies; 21+ messages in thread
From: Chunming Zhou @ 2017-12-11  5:59 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Reviewed-by: Chunming Zhou <david1.zhou@amd.com>


On 2017年12月09日 00:41, Christian König wrote:
> No more double house keeping.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
>   2 files changed, 5 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ae5451bf5873..abb3d4fb49f4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -383,7 +383,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   			spin_lock(&vm->status_lock);
>   			list_add(&entry->base.vm_status, &vm->relocated);
>   			spin_unlock(&vm->status_lock);
> -			entry->addr = 0;
>   		}
>   
>   		if (level < adev->vm_manager.num_level) {
> @@ -1126,15 +1125,12 @@ static int amdgpu_vm_update_pde(struct amdgpu_device *adev,
>   	pt = amdgpu_bo_gpu_offset(bo);
>   	pt = amdgpu_gart_get_vm_pde(adev, pt);
>   	/* Don't update huge pages here */
> -	if (entry->addr & AMDGPU_PDE_PTE ||
> -	    entry->addr == (pt | AMDGPU_PTE_VALID)) {
> +	if (entry->huge) {
>   		if (!vm->use_cpu_for_update)
>   			amdgpu_job_free(job);
>   		return 0;
>   	}
>   
> -	entry->addr = pt | AMDGPU_PTE_VALID;
> -
>   	incr = amdgpu_bo_size(bo);
>   	if (shadow) {
>   		pde = shadow_addr + (entry - parent->entries) * 8;
> @@ -1202,7 +1198,6 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
>   		if (!entry->base.bo)
>   			continue;
>   
> -		entry->addr = ~0ULL;
>   		spin_lock(&vm->status_lock);
>   		if (list_empty(&entry->base.vm_status))
>   			list_add(&entry->base.vm_status, &vm->relocated);
> @@ -1335,10 +1330,10 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
>   		flags |= AMDGPU_PDE_PTE;
>   	}
>   
> -	if (entry->addr == (dst | flags))
> +	if (!entry->huge && !(flags & AMDGPU_PDE_PTE))
>   		return;
>   
> -	entry->addr = (dst | flags);
> +	entry->huge = !!(flags & AMDGPU_PDE_PTE);
>   
>   	if (use_cpu_update) {
>   		/* In case a huge page is replaced with a system
> @@ -1412,7 +1407,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   		amdgpu_vm_handle_huge_pages(params, entry, parent,
>   					    nptes, dst, flags);
>   		/* We don't need to update PTEs for huge pages */
> -		if (entry->addr & AMDGPU_PDE_PTE)
> +		if (entry->huge)
>   			continue;
>   
>   		pt = entry->base.bo;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 7a308a1ea048..228f63e9ac5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -137,7 +137,7 @@ struct amdgpu_vm_bo_base {
>   
>   struct amdgpu_vm_pt {
>   	struct amdgpu_vm_bo_base	base;
> -	uint64_t			addr;
> +	bool				huge;
>   
>   	/* array of page tables, one for each directory entry */
>   	struct amdgpu_vm_pt		*entries;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/8] drm/amdgpu: batch PDE updates again
       [not found]     ` <20171208164107.1567-6-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-11  6:13       ` Chunming Zhou
  0 siblings, 0 replies; 21+ messages in thread
From: Chunming Zhou @ 2017-12-11  6:13 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Reviewed-by: Chunming Zhou <david1.zhou@amd.com>


On 2017年12月09日 00:41, Christian König wrote:
> Now instead of one submission for each PDE batch them together over all
> PDs who need an update.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 206 +++++++++++++++------------------
>   1 file changed, 94 insertions(+), 112 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index abb3d4fb49f4..6a35c58100f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1056,121 +1056,46 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   }
>   
>   /*
> - * amdgpu_vm_update_level - update a single level in the hierarchy
> + * amdgpu_vm_update_pde - update a single level in the hierarchy
>    *
> - * @adev: amdgpu_device pointer
> + * @param: parameters for the update
>    * @vm: requested vm
>    * @parent: parent directory
> + * @entry: entry to update
>    *
> - * Makes sure all entries in @parent are up to date.
> - * Returns 0 for success, error for failure.
> + * Makes sure the requested entry in parent is up to date.
>    */
> -static int amdgpu_vm_update_pde(struct amdgpu_device *adev,
> -				struct amdgpu_vm *vm,
> -				struct amdgpu_vm_pt *parent,
> -				struct amdgpu_vm_pt *entry)
> +static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
> +				 struct amdgpu_vm *vm,
> +				 struct amdgpu_vm_pt *parent,
> +				 struct amdgpu_vm_pt *entry)
>   {
> -	struct amdgpu_pte_update_params params;
> -	struct amdgpu_bo *bo = entry->base.bo;
> -	struct amdgpu_bo *shadow;
> -	struct amdgpu_ring *ring = NULL;
> +	struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL;
>   	uint64_t pd_addr, shadow_addr = 0;
> -	struct amdgpu_job *job;
> -	struct dma_fence *fence = NULL;
> -	unsigned ndw = 0;
>   	uint64_t pde, pt;
> -	uint32_t incr;
>   
> -	int r;
> -
> -	if (!parent->entries)
> -		return 0;
> -
> -	memset(&params, 0, sizeof(params));
> -	params.adev = adev;
> -	shadow = parent->base.bo->shadow;
> +	/* Don't update huge pages here */
> +	if (entry->huge)
> +		return;
>   
>   	if (vm->use_cpu_for_update) {
>   		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
> -		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
> -		if (unlikely(r))
> -			return r;
> -
> -		params.func = amdgpu_vm_cpu_set_ptes;
>   	} else {
> -		ring = container_of(vm->entity.sched, struct amdgpu_ring,
> -				    sched);
> -
> -		/* should be sufficient for two commands plus padding, etc. */
> -		ndw = 64;
> -
>   		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
> +		shadow = parent->base.bo->shadow;
>   		if (shadow)
>   			shadow_addr = amdgpu_bo_gpu_offset(shadow);
> -		else
> -			shadow_addr = 0;
> -
> -		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> -		if (r)
> -			return r;
> -
> -		params.ib = &job->ibs[0];
> -		params.func = amdgpu_vm_do_set_ptes;
>   	}
>   
> -	spin_lock(&vm->status_lock);
> -	list_del_init(&entry->base.vm_status);
> -	spin_unlock(&vm->status_lock);
> -
>   	pt = amdgpu_bo_gpu_offset(bo);
> -	pt = amdgpu_gart_get_vm_pde(adev, pt);
> -	/* Don't update huge pages here */
> -	if (entry->huge) {
> -		if (!vm->use_cpu_for_update)
> -			amdgpu_job_free(job);
> -		return 0;
> -	}
> -
> -	incr = amdgpu_bo_size(bo);
> +	pt = amdgpu_gart_get_vm_pde(params->adev, pt);
>   	if (shadow) {
>   		pde = shadow_addr + (entry - parent->entries) * 8;
> -		params.func(&params, pde, pt, 1, incr,
> -			    AMDGPU_PTE_VALID);
> +		params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
>   	}
>   
>   	pde = pd_addr + (entry - parent->entries) * 8;
> -	params.func(&params, pde, pt, 1, incr, AMDGPU_PTE_VALID);
> -
> -	if (!vm->use_cpu_for_update) {
> -		if (params.ib->length_dw == 0) {
> -			amdgpu_job_free(job);
> -		} else {
> -			amdgpu_ring_pad_ib(ring, params.ib);
> -			amdgpu_sync_resv(adev, &job->sync,
> -					 parent->base.bo->tbo.resv,
> -					 AMDGPU_FENCE_OWNER_VM, false);
> -			if (shadow)
> -				amdgpu_sync_resv(adev, &job->sync,
> -						 shadow->tbo.resv,
> -						 AMDGPU_FENCE_OWNER_VM, false);
> -
> -			WARN_ON(params.ib->length_dw > ndw);
> -			r = amdgpu_job_submit(job, ring, &vm->entity,
> -					AMDGPU_FENCE_OWNER_VM, &fence);
> -			if (r)
> -				goto error_free;
> -
> -			amdgpu_bo_fence(parent->base.bo, fence, true);
> -			dma_fence_put(vm->last_update);
> -			vm->last_update = fence;
> -		}
> -	}
> -
> -	return 0;
> -
> -error_free:
> -	amdgpu_job_free(job);
> -	return r;
> +	params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
>   }
>   
>   /*
> @@ -1218,41 +1143,63 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
>   int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>   				 struct amdgpu_vm *vm)
>   {
> +	struct amdgpu_pte_update_params params;
> +	struct amdgpu_job *job;
> +	unsigned ndw = 0;
>   	int r = 0;
>   
> +	if (list_empty(&vm->relocated))
> +		return 0;
> +
> +restart:
> +	memset(&params, 0, sizeof(params));
> +	params.adev = adev;
> +
> +	if (vm->use_cpu_for_update) {
> +		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
> +		if (unlikely(r))
> +			return r;
> +
> +		params.func = amdgpu_vm_cpu_set_ptes;
> +	} else {
> +		ndw = 512 * 8;
> +		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> +		if (r)
> +			return r;
> +
> +		params.ib = &job->ibs[0];
> +		params.func = amdgpu_vm_do_set_ptes;
> +	}
> +
>   	spin_lock(&vm->status_lock);
>   	while (!list_empty(&vm->relocated)) {
> -		struct amdgpu_vm_bo_base *bo_base;
> +		struct amdgpu_vm_bo_base *bo_base, *parent;
> +		struct amdgpu_vm_pt *pt, *entry;
>   		struct amdgpu_bo *bo;
>   
>   		bo_base = list_first_entry(&vm->relocated,
>   					   struct amdgpu_vm_bo_base,
>   					   vm_status);
> +		list_del_init(&bo_base->vm_status);
>   		spin_unlock(&vm->status_lock);
>   
>   		bo = bo_base->bo->parent;
> -		if (bo) {
> -			struct amdgpu_vm_bo_base *parent;
> -			struct amdgpu_vm_pt *pt, *entry;
> -
> -			parent = list_first_entry(&bo->va,
> -						  struct amdgpu_vm_bo_base,
> -						  bo_list);
> -			pt = container_of(parent, struct amdgpu_vm_pt, base);
> -			entry = container_of(bo_base, struct amdgpu_vm_pt,
> -					     base);
> -
> -			r = amdgpu_vm_update_pde(adev, vm, pt, entry);
> -			if (r) {
> -				amdgpu_vm_invalidate_level(adev, vm,
> -							   &vm->root, 0);
> -				return r;
> -			}
> +		if (!bo) {
>   			spin_lock(&vm->status_lock);
> -		} else {
> -			spin_lock(&vm->status_lock);
> -			list_del_init(&bo_base->vm_status);
> +			continue;
>   		}
> +
> +		parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
> +					  bo_list);
> +		pt = container_of(parent, struct amdgpu_vm_pt, base);
> +		entry = container_of(bo_base, struct amdgpu_vm_pt, base);
> +
> +		amdgpu_vm_update_pde(&params, vm, pt, entry);
> +
> +		spin_lock(&vm->status_lock);
> +		if (!vm->use_cpu_for_update &&
> +		    (ndw - params.ib->length_dw) < 32)
> +			break;
>   	}
>   	spin_unlock(&vm->status_lock);
>   
> @@ -1260,8 +1207,43 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>   		/* Flush HDP */
>   		mb();
>   		amdgpu_gart_flush_gpu_tlb(adev, 0);
> +	} else if (params.ib->length_dw == 0) {
> +		amdgpu_job_free(job);
> +	} else {
> +		struct amdgpu_bo *root = vm->root.base.bo;
> +		struct amdgpu_ring *ring;
> +		struct dma_fence *fence;
> +
> +		ring = container_of(vm->entity.sched, struct amdgpu_ring,
> +				    sched);
> +
> +		amdgpu_ring_pad_ib(ring, params.ib);
> +		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
> +				 AMDGPU_FENCE_OWNER_VM, false);
> +		if (root->shadow)
> +			amdgpu_sync_resv(adev, &job->sync,
> +					 root->shadow->tbo.resv,
> +					 AMDGPU_FENCE_OWNER_VM, false);
> +
> +		WARN_ON(params.ib->length_dw > ndw);
> +		r = amdgpu_job_submit(job, ring, &vm->entity,
> +				      AMDGPU_FENCE_OWNER_VM, &fence);
> +		if (r)
> +			goto error;
> +
> +		amdgpu_bo_fence(root, fence, true);
> +		dma_fence_put(vm->last_update);
> +		vm->last_update = fence;
>   	}
>   
> +	if (!list_empty(&vm->relocated))
> +		goto restart;
> +
> +	return 0;
> +
> +error:
> +	amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0);
> +	amdgpu_job_free(job);
>   	return r;
>   }
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven
       [not found]     ` <20171208164107.1567-8-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-11  6:49       ` Chunming Zhou
       [not found]         ` <7c8d12b8-7d2e-f5be-b8e7-295245c29a92-5C7GfCeVMHo@public.gmane.org>
  2017-12-12  7:58       ` Chunming Zhou
  1 sibling, 1 reply; 21+ messages in thread
From: Chunming Zhou @ 2017-12-11  6:49 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

[-- Attachment #1: Type: text/plain, Size: 10334 bytes --]

It is a coincidence, I also am trying to implement this, still under debug.

For yours, seems not ready yet, right?
We should handle 64KB native page to 16 * 4KB sub-PTB for TF case, which 
is the only verified option by HW.
For TF case, the number entries and shift of PTB is a bit different from 
normal,  we should count native page size to it.

Regards,
David Zhou

On 2017年12月09日 00:41, Christian König wrote:
> Instead of falling back to 2 level and very limited address space use
> 2+1 PD support and 128TB + 512GB of virtual address space.
>
> Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |  3 ++
>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 42 ++++++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 26 ++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 49 ++++++++++++++++++++------------
>   5 files changed, 86 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e5e0fbd43273..9517c0f76d27 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -541,6 +541,7 @@ struct amdgpu_mc {
>   	u64					private_aperture_end;
>   	/* protects concurrent invalidation */
>   	spinlock_t		invalidate_lock;
> +	bool			translate_further;
>   };
>   
>   /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 228f63e9ac5e..79134f0c26d9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -69,6 +69,9 @@ struct amdgpu_bo_list_entry;
>   /* PDE is handled as PTE for VEGA10 */
>   #define AMDGPU_PDE_PTE		(1ULL << 54)
>   
> +/* PTE is handled as PDE for VEGA10 */
> +#define AMDGPU_PTE_TRANSLATE_FURTHER	(1ULL << 56)
> +
>   /* VEGA10 only */
>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>   #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> index f1effadfbaa6..a56f77259130 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> @@ -144,8 +144,15 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
>   	WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
>   
>   	tmp = mmVM_L2_CNTL3_DEFAULT;
> -	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> -	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> +	if (adev->mc.translate_further) {
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> +	} else {
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
> +	}
>   	WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
>   
>   	tmp = mmVM_L2_CNTL4_DEFAULT;
> @@ -183,31 +190,40 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>   
>   static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
>   {
> -	int i;
> +	unsigned num_level, block_size;
>   	uint32_t tmp;
> +	int i;
> +
> +	num_level = adev->vm_manager.num_level;
> +	block_size = adev->vm_manager.block_size;
> +	if (adev->mc.translate_further)
> +		num_level -= 1;
> +	else
> +		block_size -= 9;
>   
>   	for (i = 0; i <= 14; i++) {
>   		tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
> -				    adev->vm_manager.num_level);
> +				    num_level);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
> +				    1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PAGE_TABLE_BLOCK_SIZE,
> -				adev->vm_manager.block_size - 9);
> +				    PAGE_TABLE_BLOCK_SIZE,
> +				    block_size);
>   		/* Send no-retry XNACK on fault to suppress VM fault storm. */
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>   				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 0fe2a4e782ff..d6a19514c92b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -481,6 +481,21 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
>   		*addr = adev->vm_manager.vram_base_offset + *addr -
>   			adev->mc.vram_start;
>   	BUG_ON(*addr & 0xFFFF00000000003FULL);
> +
> +	if (!adev->mc.translate_further)
> +		return;
> +
> +	if (level == 0) {
> +		/* Set the block size */
> +		if (!(*flags & AMDGPU_PDE_PTE))
> +			*flags |= 9ULL << 59;
> +
> +	} else if (level == 1) {
> +		if (*flags & AMDGPU_PDE_PTE)
> +			*flags &= ~AMDGPU_PDE_PTE;
> +		else
> +			*flags |= AMDGPU_PTE_TRANSLATE_FURTHER;
> +	}
>   }
>   
>   static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
> @@ -771,11 +786,14 @@ static int gmc_v9_0_sw_init(void *handle)
>   	switch (adev->asic_type) {
>   	case CHIP_RAVEN:
>   		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
> -		if (adev->rev_id == 0x0 || adev->rev_id == 0x1)
> +		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
>   			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
> -		else
> -			/* vm_size is 64GB for legacy 2-level page support */
> -			amdgpu_vm_adjust_size(adev, 64, 9, 1, 48);
> +		} else {
> +			/* vm_size is 128TB + 512GB for legacy 3-level page support */
> +			amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
> +			adev->mc.translate_further =
> +				adev->vm_manager.num_level > 1;
> +		}
>   		break;
>   	case CHIP_VEGA10:
>   		/* XXX Don't know how to get VRAM type yet. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> index bd160d8700e0..a88f43b097dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> @@ -156,10 +156,15 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
>   	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
>   	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
>   
> -	tmp = mmVM_L2_CNTL3_DEFAULT;
> -	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> -	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> -	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
> +	if (adev->mc.translate_further) {
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> +	} else {
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
> +	}
>   
>   	tmp = mmVM_L2_CNTL4_DEFAULT;
>   	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
> @@ -197,32 +202,40 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>   
>   static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
>   {
> -	int i;
> +	unsigned num_level, block_size;
>   	uint32_t tmp;
> +	int i;
> +
> +	num_level = adev->vm_manager.num_level;
> +	block_size = adev->vm_manager.block_size;
> +	if (adev->mc.translate_further)
> +		num_level -= 1;
> +	else
> +		block_size -= 9;
>   
>   	for (i = 0; i <= 14; i++) {
>   		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
> +		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
> +		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
> +				    num_level);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				ENABLE_CONTEXT, 1);
> -		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PAGE_TABLE_DEPTH, adev->vm_manager.num_level);
> -		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
> +				    1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PAGE_TABLE_BLOCK_SIZE,
> -				adev->vm_manager.block_size - 9);
> +				    PAGE_TABLE_BLOCK_SIZE,
> +				    block_size);
>   		/* Send no-retry XNACK on fault to suppress VM fault storm. */
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>   				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);


[-- Attachment #2: 0008-debug-1.patch --]
[-- Type: text/x-patch, Size: 3885 bytes --]

>From cd993ed074e81e987342a5918fb86d3af8cc46d1 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Fri, 8 Dec 2017 18:48:04 +0800
Subject: [PATCH 8/8] debug 1

Change-Id: I99994f6ecc5cce1f0d35029fe45a5cbaa5b80dd4
Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 24 ++++++++++++++++--------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ec4070787996..823c01b0a5dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -101,7 +101,7 @@ int amdgpu_deep_color = 0;
 int amdgpu_vm_size = -1;
 int amdgpu_vm_fragment_size = -1;
 int amdgpu_vm_block_size = -1;
-int amdgpu_vm_translate_further = 0;
+int amdgpu_vm_translate_further = 1;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
 int amdgpu_vram_page_split = 512;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e5e1252dfc47..08bc76cf986c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -330,12 +330,15 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 {
 	unsigned native_page = amdgpu_vm_get_native_page(adev);
 	unsigned shift = amdgpu_vm_level_shift(adev, level, native_page,
-					       sub_ptb);
+					       false);
 	unsigned pt_idx, from, to;
 	int r;
 	u64 flags;
 	uint64_t init_value = 0;
 
+	printk("%s************np:%u, saddr:0x%llx, eaddr:0x%llx, level:%u, shift:%u sub_ptb:%u, num_entry:%u\n",
+	       __func__,  native_page, saddr, eaddr, level, shift, sub_ptb,
+	       amdgpu_vm_num_entries(adev, level, native_page, false));
 	BUG_ON(level > adev->vm_manager.num_level);
 
 	if (adev->vm_manager.translate_further && level == 1)
@@ -345,7 +348,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	if (!parent->entries) {
 		unsigned num_entries = amdgpu_vm_num_entries(adev, level,
 							     native_page,
-							     sub_ptb);
+							     false);
 
 		parent->entries = kvmalloc_array(num_entries,
 						   sizeof(struct amdgpu_vm_pt),
@@ -358,9 +361,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	from = saddr >> shift;
 	to = eaddr >> shift;
 	if (from >= amdgpu_vm_num_entries(adev, level, native_page,
-					  sub_ptb) ||
+					  false) ||
 	    to >= amdgpu_vm_num_entries(adev, level, native_page,
-					sub_ptb))
+					false))
 		return -EINVAL;
 
 	if (to > parent->last_entry_used)
@@ -393,6 +396,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 		struct amdgpu_bo *pt;
 
+		printk("%s********level:%u, sub_ptb:%u, pt_idx:%u, \
+		       entry->base.bo:%p num_entry:%u\n", __func__,
+		       level, sub_ptb, pt_idx, entry->base.bo,  amdgpu_vm_num_entries(adev, level, native_page,
+							     sub_ptb));
 		if (!entry->base.bo) {
 			r = amdgpu_bo_create(adev,
 					     amdgpu_vm_bo_size(adev, level,
@@ -1378,13 +1385,14 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
 		unsigned np = amdgpu_vm_get_native_page(p->adev);
-		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level--,
-							     np, (*entry)->tf);
+		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level,
+							     np, false);
 
-		idx %= amdgpu_vm_num_entries(p->adev, level);
+		idx %= amdgpu_vm_num_entries(p->adev, level, np, (*entry)->tf);
+printk("%s*******addr:0x%llx, level:%u, tf:%u, idx:%u", __func__, addr, level, (*entry)->tf, idx);
 		*parent = *entry;
 		*entry = &(*entry)->entries[idx];
-		if (level)
+		if (level > 0)
 			level--;
 	}
 
-- 
2.14.1


[-- Attachment #3: 0007-drm-amdgpu-addr-TF-setting-in-PTE.patch --]
[-- Type: text/x-patch, Size: 1606 bytes --]

>From cefda75a99480d5d6ebf248ccd8cd27e90a83e92 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Thu, 7 Dec 2017 18:12:05 +0800
Subject: [PATCH 7/8] drm/amdgpu: addr TF setting in PTE

Change-Id: I8aafeb0b1f51fec66e951fba979a8e02e8bc8c25
Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a325a3360894..e5e1252dfc47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1199,6 +1199,9 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			parent->entries[pt_idx].addr |=
 				AMDGPU_PDE_FRAGMENT(parent->entries[pt_idx].native_page);
 		}
+		if (parent->entries[pt_idx].tf) {
+			parent->entries[pt_idx].addr |= AMDGPU_PTE_TF;
+		}
 		pde = pd_addr + pt_idx * 8;
 		incr = amdgpu_bo_size(bo);
 		if (((last_pde + 8 * count) != pde) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 93dc36bf6125..bae3504a1d43 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -70,6 +70,8 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_PDE_PTE		(1ULL << 54)
 
 /* VEGA10 only */
+/* translate further */
+#define AMDGPU_PTE_TF		(1ULL << 56)
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
 
-- 
2.14.1


[-- Attachment #4: 0006-drm-amdgpu-add-VMPT-translate-further-support.patch --]
[-- Type: text/x-patch, Size: 8031 bytes --]

>From 7878f9126c3c507ca5d73e6fdeaf5288dacf778b Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Thu, 7 Dec 2017 17:26:44 +0800
Subject: [PATCH 6/8] drm/amdgpu: add VMPT translate further support

Change-Id: If70c93b635c710e57d33f38151e86b2655c9651d
Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77 ++++++++++++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 +++
 2 files changed, 60 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e668f2921fda..a325a3360894 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -146,14 +146,17 @@ struct amdgpu_prt_cb {
  * Returns the number of bits the pfn needs to be right shifted for a level.
  */
 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
-				      unsigned level)
+				      unsigned level, unsigned native_page,
+				      bool tf)
 {
-	if (level != 0)
+	if (tf)
+		return 0;
+	else if (level != 0)
 		return 9 * (level - 1) +
 			adev->vm_manager.block_size;
 	else
 		/* For the page tables on the leaves */
-		return 0;
+		return native_page;
 }
 
 /**
@@ -170,12 +173,17 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
  * level0 --- PTB
  */
 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
-				      unsigned level, unsigned native_page)
+				      unsigned level, unsigned native_page,
+				      bool sub_ptb)
 {
 	unsigned shift = amdgpu_vm_level_shift(adev,
-					       adev->vm_manager.num_level);
+					       adev->vm_manager.num_level,
+					       native_page, sub_ptb);
 
-	if (level == adev->vm_manager.num_level)
+	if (sub_ptb)
+		/*for sub ptb */
+		return 1 << native_page;
+	else if (level == adev->vm_manager.num_level)
 		/* For the root directory */
 		return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
 	else if (level != 0)
@@ -194,10 +202,11 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
  * Calculate the size of the BO for a page directory or page table in bytes.
  */
 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level,
-				  unsigned native_page)
+				  unsigned native_page, bool sub_ptb)
 {
 	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level,
-							   native_page) * 8);
+							   native_page, sub_ptb)
+				     * 8);
 }
 
 /**
@@ -299,6 +308,10 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 	return ready;
 }
 
+static unsigned amdgpu_vm_get_native_page(struct amdgpu_device *adev)
+{
+	return adev->vm_manager.translate_further ? AMDGPU_VM_BIGK : 0;
+}
 /**
  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
  *
@@ -313,9 +326,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm,
 				  struct amdgpu_vm_pt *parent,
 				  uint64_t saddr, uint64_t eaddr,
-				  unsigned level)
+				  unsigned level, bool sub_ptb)
 {
-	unsigned shift = amdgpu_vm_level_shift(adev, level);
+	unsigned native_page = amdgpu_vm_get_native_page(adev);
+	unsigned shift = amdgpu_vm_level_shift(adev, level, native_page,
+					       sub_ptb);
 	unsigned pt_idx, from, to;
 	int r;
 	u64 flags;
@@ -323,9 +338,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 	BUG_ON(level > adev->vm_manager.num_level);
 
+	if (adev->vm_manager.translate_further && level == 1)
+		/* hw only verified 64KB---16 4KB for TF */
+		parent->native_page = AMDGPU_VM_BIGK;
+
 	if (!parent->entries) {
 		unsigned num_entries = amdgpu_vm_num_entries(adev, level,
-							     parent->native_page);
+							     native_page,
+							     sub_ptb);
 
 		parent->entries = kvmalloc_array(num_entries,
 						   sizeof(struct amdgpu_vm_pt),
@@ -337,14 +357,17 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 	from = saddr >> shift;
 	to = eaddr >> shift;
-	if (from >= amdgpu_vm_num_entries(adev, level, parent->native_page) ||
-	    to >= amdgpu_vm_num_entries(adev, level, parent->native_page))
+	if (from >= amdgpu_vm_num_entries(adev, level, native_page,
+					  sub_ptb) ||
+	    to >= amdgpu_vm_num_entries(adev, level, native_page,
+					sub_ptb))
 		return -EINVAL;
 
 	if (to > parent->last_entry_used)
 		parent->last_entry_used = to;
 
-	level--;
+	if (level > 0)
+		level--;
 	saddr = saddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
 
@@ -373,7 +396,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 		if (!entry->base.bo) {
 			r = amdgpu_bo_create(adev,
 					     amdgpu_vm_bo_size(adev, level,
-							       parent->native_page),
+							       native_page,
+							       sub_ptb),
 					     AMDGPU_GPU_PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_VRAM,
 					     flags,
@@ -403,12 +427,18 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			entry->addr = 0;
 		}
 
-		if (level > 0) {
+		if (level == 0 && adev->vm_manager.translate_further &&
+		    !sub_ptb)
+			entry->tf = true;
+		else
+			entry->tf = false;
+
+		if (level > 0 || entry->tf) {
 			uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
 			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 				((1 << shift) - 1);
 			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
-						   sub_eaddr, level);
+						   sub_eaddr, level, entry->tf);
 			if (r)
 				return r;
 		}
@@ -450,7 +480,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
 	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
-				      adev->vm_manager.num_level);
+				      adev->vm_manager.num_level, false);
 }
 
 /**
@@ -1141,7 +1171,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		params.func = amdgpu_vm_do_set_ptes;
 	}
 
-
 	/* walk over the address space and update the directory */
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
@@ -1345,7 +1374,9 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 	*parent = NULL;
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
-		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level);
+		unsigned np = amdgpu_vm_get_native_page(p->adev);
+		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level--,
+							     np, (*entry)->tf);
 
 		idx %= amdgpu_vm_num_entries(p->adev, level);
 		*parent = *entry;
@@ -1356,6 +1387,8 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 
 	if (level != 0)
 		*entry = NULL;
+	if (p->adev->vm_manager.translate_further && !(*parent)->tf)
+		*entry = NULL;
 }
 
 /**
@@ -2728,8 +2761,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 				AMDGPU_GEM_CREATE_SHADOW);
 
-	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0, 0), align, true,
-			     AMDGPU_GEM_DOMAIN_VRAM,
+	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0, 0, false), align,
+			     true, AMDGPU_GEM_DOMAIN_VRAM,
 			     flags,
 			     NULL, NULL, init_pde_value, &vm->root.base.bo);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 34f929bb6b8c..93dc36bf6125 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -122,6 +122,9 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
 #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
 
+/* 64KB native page */
+#define AMDGPU_VM_BIGK 4
+
 /* base structure for tracking BO usage in a VM */
 struct amdgpu_vm_bo_base {
 	/* constant after initialization */
@@ -144,6 +147,8 @@ struct amdgpu_vm_pt {
 
 	/* log2(number of pages) set by PDB0, indicates one PTE presents how many pages */
 	uint64_t			native_page;
+	/* translate further */
+	bool				tf;
 
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt		*entries;
-- 
2.14.1


[-- Attachment #5: 0005-drm-amdgpu-add-kernel-parameter-for-VM-translate-fur.patch --]
[-- Type: text/x-patch, Size: 3674 bytes --]

>From cee04e66cf7ac349c999884886bf2c703423c539 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Thu, 7 Dec 2017 15:58:20 +0800
Subject: [PATCH 5/8] drm/amdgpu: add kernel parameter for VM translate further

Change-Id: Ie7036f808d9c9e1ea8d994d255945555620194fd
Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 9 ++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 1 +
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c56a986a4be2..27b9ebf7f1de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -99,6 +99,7 @@ extern int amdgpu_bapm;
 extern int amdgpu_deep_color;
 extern int amdgpu_vm_size;
 extern int amdgpu_vm_block_size;
+extern int amdgpu_vm_translate_further;
 extern int amdgpu_vm_fragment_size;
 extern int amdgpu_vm_fault_stop;
 extern int amdgpu_vm_debug;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 31383e004947..ec4070787996 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -101,6 +101,7 @@ int amdgpu_deep_color = 0;
 int amdgpu_vm_size = -1;
 int amdgpu_vm_fragment_size = -1;
 int amdgpu_vm_block_size = -1;
+int amdgpu_vm_translate_further = 0;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
 int amdgpu_vram_page_split = 512;
@@ -198,6 +199,9 @@ module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444);
 MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
 module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
 
+MODULE_PARM_DESC(vm_translate_further, "VM page table translate furhter (0 = disable (default), 1 = enable");
+module_param_named(vm_translate_further, amdgpu_vm_translate_further, int, 0444);
+
 MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
 module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 004a797abb30..e668f2921fda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2644,8 +2644,15 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
 	else
 		adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
 
-	DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
+	if (amdgpu_vm_translate_further) {
+		if (adev->asic_type < CHIP_VEGA10)
+			DRM_ERROR("Don't surpport VMPT translate further feature!");
+		else
+			adev->vm_manager.translate_further = true;
+	}
+	DRM_INFO("vm size is %u GB, %u levels, TF:%d, block size is %u-bit, fragment size is %u-bit\n",
 		 vm_size, adev->vm_manager.num_level + 1,
+		 adev->vm_manager.translate_further,
 		 adev->vm_manager.block_size,
 		 adev->vm_manager.fragment_size);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index f130c1f3680e..34f929bb6b8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -242,6 +242,7 @@ struct amdgpu_vm_manager {
 	uint32_t				num_level;
 	uint32_t				block_size;
 	uint32_t				fragment_size;
+	bool					translate_further;
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
 	/* vm pte handling */
-- 
2.14.1


[-- Attachment #6: 0004-drm-amdgpu-set-native-page-in-PDE0.patch --]
[-- Type: text/x-patch, Size: 1818 bytes --]

>From 08425d15be0802697d7dd1e2bafa832ebda312db Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Thu, 7 Dec 2017 15:22:26 +0800
Subject: [PATCH 4/8] drm/amdgpu: set native page in PDE0

Change-Id: If1e12bb721e89cf9c2aacd8d51c93ed8b3dde4b4
Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 +++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9ed825f64259..004a797abb30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1163,7 +1163,13 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			continue;
 
 		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
-
+		if (parent->entries[pt_idx].native_page &&
+		    parent->entries[pt_idx].native_page <= 32) {
+			parent->entries[pt_idx].addr &=
+				~AMDGPU_PDE_FRAGMENT_MASK;
+			parent->entries[pt_idx].addr |=
+				AMDGPU_PDE_FRAGMENT(parent->entries[pt_idx].native_page);
+		}
 		pde = pd_addr + pt_idx * 8;
 		incr = amdgpu_bo_size(bo);
 		if (((last_pde + 8 * count) != pde) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 0fd96d6b5d67..f130c1f3680e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -73,6 +73,9 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
 
+#define AMDGPU_PDE_FRAGMENT(a)	((uint64_t)a << 59)
+#define AMDGPU_PDE_FRAGMENT_MASK AMDGPU_PDE_FRAGMENT(32ULL)
+
 /* For Raven */
 #define AMDGPU_MTYPE_CC 2
 
-- 
2.14.1


[-- Attachment #7: 0003-drm-amdgpu-take-native-page-into-count-PTE-entries.patch --]
[-- Type: text/x-patch, Size: 4169 bytes --]

>From 6114cb0c23538c7ef32da029b6ac56463692c107 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Thu, 7 Dec 2017 14:23:00 +0800
Subject: [PATCH 3/8] drm/amdgpu: take native page into count PTE entries

BLOCK Fragment bits[63:59] of PDE0 presents native page size pointed by its PTEs.

Change-Id: I7af9e111e0df122ed3b38a36e1c50d312f3a6d2f
Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +++
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index affe64e42cef..9ed825f64259 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -170,7 +170,7 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
  * level0 --- PTB
  */
 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
-				      unsigned level)
+				      unsigned level, unsigned native_page)
 {
 	unsigned shift = amdgpu_vm_level_shift(adev,
 					       adev->vm_manager.num_level);
@@ -183,7 +183,7 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 		return 512;
 	else
 		/* For the page tables on the leaves(PTB) */
-		return AMDGPU_VM_PTE_COUNT(adev);
+		return AMDGPU_VM_PTE_COUNT(adev) >> native_page;
 }
 
 /**
@@ -193,9 +193,11 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
  *
  * Calculate the size of the BO for a page directory or page table in bytes.
  */
-static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
+static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level,
+				  unsigned native_page)
 {
-	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
+	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level,
+							   native_page) * 8);
 }
 
 /**
@@ -322,7 +324,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	BUG_ON(level > adev->vm_manager.num_level);
 
 	if (!parent->entries) {
-		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
+		unsigned num_entries = amdgpu_vm_num_entries(adev, level,
+							     parent->native_page);
 
 		parent->entries = kvmalloc_array(num_entries,
 						   sizeof(struct amdgpu_vm_pt),
@@ -334,8 +337,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 	from = saddr >> shift;
 	to = eaddr >> shift;
-	if (from >= amdgpu_vm_num_entries(adev, level) ||
-	    to >= amdgpu_vm_num_entries(adev, level))
+	if (from >= amdgpu_vm_num_entries(adev, level, parent->native_page) ||
+	    to >= amdgpu_vm_num_entries(adev, level, parent->native_page))
 		return -EINVAL;
 
 	if (to > parent->last_entry_used)
@@ -369,7 +372,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 		if (!entry->base.bo) {
 			r = amdgpu_bo_create(adev,
-					     amdgpu_vm_bo_size(adev, level),
+					     amdgpu_vm_bo_size(adev, level,
+							       parent->native_page),
 					     AMDGPU_GPU_PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_VRAM,
 					     flags,
@@ -2711,7 +2715,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 				AMDGPU_GEM_CREATE_SHADOW);
 
-	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
+	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0, 0), align, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     flags,
 			     NULL, NULL, init_pde_value, &vm->root.base.bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 43ea131dd411..0fd96d6b5d67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -139,6 +139,9 @@ struct amdgpu_vm_pt {
 	struct amdgpu_vm_bo_base	base;
 	uint64_t			addr;
 
+	/* log2(number of pages) set by PDB0, indicates one PTE presents how many pages */
+	uint64_t			native_page;
+
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt		*entries;
 	unsigned			last_entry_used;
-- 
2.14.1


[-- Attachment #8: 0002-drm-amdgpu-fix-pte-index-calculation.patch --]
[-- Type: text/x-patch, Size: 1211 bytes --]

>From 99c061b224128804fd6c2c1850e54716afa75c73 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Fri, 8 Dec 2017 18:51:34 +0800
Subject: [PATCH 2/8] drm/amdgpu: fix pte index calculation

Change-Id: I40ecf31ad4b51022a2c0c076ae45188b6e9d63de
Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8904ccf78fc9..affe64e42cef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1335,11 +1335,13 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 	*parent = NULL;
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
-		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level--);
+		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level);
 
-		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
+		idx %= amdgpu_vm_num_entries(p->adev, level);
 		*parent = *entry;
 		*entry = &(*entry)->entries[idx];
+		if (level)
+			level--;
 	}
 
 	if (level != 0)
-- 
2.14.1


[-- Attachment #9: 0001-drm-amdgpu-reverse-PDBs-order.patch --]
[-- Type: text/x-patch, Size: 4378 bytes --]

>From bb3170b6f1324389f38222e36428e923fed431b0 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Thu, 7 Dec 2017 13:02:23 +0800
Subject: [PATCH 1/8] drm/amdgpu: reverse PDBs order

The hiberachy of page table is as below, which aligns hw names.
PDB2->PDB1->PDB0->PTB, accordingly:
level3 --- PDB2
level2 --- PDB1
level1 --- PDB0
level0 --- PTB

Change-Id: I2d748e5e96cffe18294c104c4b192d910b2f8e6b
Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 37 ++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3ecdbdfb04dd..8904ccf78fc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -148,8 +148,8 @@ struct amdgpu_prt_cb {
 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
 				      unsigned level)
 {
-	if (level != adev->vm_manager.num_level)
-		return 9 * (adev->vm_manager.num_level - level - 1) +
+	if (level != 0)
+		return 9 * (level - 1) +
 			adev->vm_manager.block_size;
 	else
 		/* For the page tables on the leaves */
@@ -162,20 +162,27 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
  * @adev: amdgpu_device pointer
  *
  * Calculate the number of entries in a page directory or page table.
+ * The hiberachy of page table is as below, which aligns hw names.
+ * PDB2->PDB1->PDB0->PTB, accordingly:
+ * level3 --- PDB2
+ * level2 --- PDB1
+ * level1 --- PDB0
+ * level0 --- PTB
  */
 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 				      unsigned level)
 {
-	unsigned shift = amdgpu_vm_level_shift(adev, 0);
+	unsigned shift = amdgpu_vm_level_shift(adev,
+					       adev->vm_manager.num_level);
 
-	if (level == 0)
+	if (level == adev->vm_manager.num_level)
 		/* For the root directory */
 		return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
-	else if (level != adev->vm_manager.num_level)
+	else if (level != 0)
 		/* Everything in between */
 		return 512;
 	else
-		/* For the page tables on the leaves */
+		/* For the page tables on the leaves(PTB) */
 		return AMDGPU_VM_PTE_COUNT(adev);
 }
 
@@ -312,6 +319,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	u64 flags;
 	uint64_t init_value = 0;
 
+	BUG_ON(level > adev->vm_manager.num_level);
+
 	if (!parent->entries) {
 		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
 
@@ -332,7 +341,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	if (to > parent->last_entry_used)
 		parent->last_entry_used = to;
 
-	++level;
+	level--;
 	saddr = saddr & ((1 << shift) - 1);
 	eaddr = eaddr & ((1 << shift) - 1);
 
@@ -346,7 +355,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 	if (vm->pte_support_ats) {
 		init_value = AMDGPU_PTE_DEFAULT_ATC;
-		if (level != adev->vm_manager.num_level - 1)
+		/* != PDB0 */
+		if (level != 1)
 			init_value |= AMDGPU_PDE_PTE;
 
 	}
@@ -389,7 +399,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			entry->addr = 0;
 		}
 
-		if (level < adev->vm_manager.num_level) {
+		if (level > 0) {
 			uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
 			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
 				((1 << shift) - 1);
@@ -435,7 +445,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
-	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0);
+	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
+				      adev->vm_manager.num_level);
 }
 
 /**
@@ -1319,19 +1330,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 			 struct amdgpu_vm_pt **entry,
 			 struct amdgpu_vm_pt **parent)
 {
-	unsigned level = 0;
+	unsigned level = p->adev->vm_manager.num_level;
 
 	*parent = NULL;
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
-		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++);
+		unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level--);
 
 		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
 		*parent = *entry;
 		*entry = &(*entry)->entries[idx];
 	}
 
-	if (level != p->adev->vm_manager.num_level)
+	if (level != 0)
 		*entry = NULL;
 }
 
-- 
2.14.1


[-- Attachment #10: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven
       [not found]         ` <7c8d12b8-7d2e-f5be-b8e7-295245c29a92-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-11 10:14           ` Christian König
  0 siblings, 0 replies; 21+ messages in thread
From: Christian König @ 2017-12-11 10:14 UTC (permalink / raw)
  To: Chunming Zhou, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> For yours, seems not ready yet, right?
It is completely functional and tested.

> We should handle 64KB native page to 16 * 4KB sub-PTB for TF case, 
> which is the only verified option by HW.
No, we are using only 2M page since Vega10 now and so far that works 
perfectly fine.

We are seriously not using 64K page any more, so please completely drop 
that approach.

Regards,
Christian.

Am 11.12.2017 um 07:49 schrieb Chunming Zhou:
> It is a coincidence, I also am trying to implement this, still under 
> debug.
>
> For yours, seems not ready yet, right?
> We should handle 64KB native page to 16 * 4KB sub-PTB for TF case, 
> which is the only verified option by HW.
> For TF case, the number entries and shift of PTB is a bit different 
> from normal,  we should count native page size to it.
>
> Regards,
> David Zhou
>
> On 2017年12月09日 00:41, Christian König wrote:
>> Instead of falling back to 2 level and very limited address space use
>> 2+1 PD support and 128TB + 512GB of virtual address space.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |  3 ++
>>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 42 
>> ++++++++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 26 ++++++++++++++---
>>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 49 
>> ++++++++++++++++++++------------
>>   5 files changed, 86 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index e5e0fbd43273..9517c0f76d27 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -541,6 +541,7 @@ struct amdgpu_mc {
>>       u64                    private_aperture_end;
>>       /* protects concurrent invalidation */
>>       spinlock_t        invalidate_lock;
>> +    bool            translate_further;
>>   };
>>     /*
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 228f63e9ac5e..79134f0c26d9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -69,6 +69,9 @@ struct amdgpu_bo_list_entry;
>>   /* PDE is handled as PTE for VEGA10 */
>>   #define AMDGPU_PDE_PTE        (1ULL << 54)
>>   +/* PTE is handled as PDE for VEGA10 */
>> +#define AMDGPU_PTE_TRANSLATE_FURTHER    (1ULL << 56)
>> +
>>   /* VEGA10 only */
>>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>>   #define AMDGPU_PTE_MTYPE_MASK    AMDGPU_PTE_MTYPE(3ULL)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> index f1effadfbaa6..a56f77259130 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> @@ -144,8 +144,15 @@ static void gfxhub_v1_0_init_cache_regs(struct 
>> amdgpu_device *adev)
>>       WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
>>         tmp = mmVM_L2_CNTL3_DEFAULT;
>> -    tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> -    tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, 
>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> +    if (adev->mc.translate_further) {
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> +                    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> +    } else {
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> +                    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
>> +    }
>>       WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
>>         tmp = mmVM_L2_CNTL4_DEFAULT;
>> @@ -183,31 +190,40 @@ static void 
>> gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>>     static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device 
>> *adev)
>>   {
>> -    int i;
>> +    unsigned num_level, block_size;
>>       uint32_t tmp;
>> +    int i;
>> +
>> +    num_level = adev->vm_manager.num_level;
>> +    block_size = adev->vm_manager.block_size;
>> +    if (adev->mc.translate_further)
>> +        num_level -= 1;
>> +    else
>> +        block_size -= 9;
>>         for (i = 0; i <= 14; i++) {
>>           tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
>> -                    adev->vm_manager.num_level);
>> +                    num_level);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
>> +                    1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PAGE_TABLE_BLOCK_SIZE,
>> -                adev->vm_manager.block_size - 9);
>> +                    PAGE_TABLE_BLOCK_SIZE,
>> +                    block_size);
>>           /* Send no-retry XNACK on fault to suppress VM fault storm. */
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>                       RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 0fe2a4e782ff..d6a19514c92b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -481,6 +481,21 @@ static void gmc_v9_0_get_vm_pde(struct 
>> amdgpu_device *adev, int level,
>>           *addr = adev->vm_manager.vram_base_offset + *addr -
>>               adev->mc.vram_start;
>>       BUG_ON(*addr & 0xFFFF00000000003FULL);
>> +
>> +    if (!adev->mc.translate_further)
>> +        return;
>> +
>> +    if (level == 0) {
>> +        /* Set the block size */
>> +        if (!(*flags & AMDGPU_PDE_PTE))
>> +            *flags |= 9ULL << 59;
>> +
>> +    } else if (level == 1) {
>> +        if (*flags & AMDGPU_PDE_PTE)
>> +            *flags &= ~AMDGPU_PDE_PTE;
>> +        else
>> +            *flags |= AMDGPU_PTE_TRANSLATE_FURTHER;
>> +    }
>>   }
>>     static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
>> @@ -771,11 +786,14 @@ static int gmc_v9_0_sw_init(void *handle)
>>       switch (adev->asic_type) {
>>       case CHIP_RAVEN:
>>           adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
>> -        if (adev->rev_id == 0x0 || adev->rev_id == 0x1)
>> +        if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
>>               amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
>> -        else
>> -            /* vm_size is 64GB for legacy 2-level page support */
>> -            amdgpu_vm_adjust_size(adev, 64, 9, 1, 48);
>> +        } else {
>> +            /* vm_size is 128TB + 512GB for legacy 3-level page 
>> support */
>> +            amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
>> +            adev->mc.translate_further =
>> +                adev->vm_manager.num_level > 1;
>> +        }
>>           break;
>>       case CHIP_VEGA10:
>>           /* XXX Don't know how to get VRAM type yet. */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> index bd160d8700e0..a88f43b097dc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> @@ -156,10 +156,15 @@ static void mmhub_v1_0_init_cache_regs(struct 
>> amdgpu_device *adev)
>>       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
>>       WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
>>   -    tmp = mmVM_L2_CNTL3_DEFAULT;
>> -    tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> -    tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, 
>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> -    WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
>> +    if (adev->mc.translate_further) {
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> +                    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> +    } else {
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> +                    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
>> +    }
>>         tmp = mmVM_L2_CNTL4_DEFAULT;
>>       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, 
>> VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
>> @@ -197,32 +202,40 @@ static void 
>> mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>>     static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
>>   {
>> -    int i;
>> +    unsigned num_level, block_size;
>>       uint32_t tmp;
>> +    int i;
>> +
>> +    num_level = adev->vm_manager.num_level;
>> +    block_size = adev->vm_manager.block_size;
>> +    if (adev->mc.translate_further)
>> +        num_level -= 1;
>> +    else
>> +        block_size -= 9;
>>         for (i = 0; i <= 14; i++) {
>>           tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
>> +        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
>> +        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
>> +                    num_level);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                ENABLE_CONTEXT, 1);
>> -        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PAGE_TABLE_DEPTH, adev->vm_manager.num_level);
>> -        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
>> +                    1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PAGE_TABLE_BLOCK_SIZE,
>> -                adev->vm_manager.block_size - 9);
>> +                    PAGE_TABLE_BLOCK_SIZE,
>> +                    block_size);
>>           /* Send no-retry XNACK on fault to suppress VM fault storm. */
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>                       RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 4/8] drm/amdgpu: remove last_entry_used from the VM code
       [not found]         ` <a671871b-5bad-3d48-b76b-5db4d49c9624-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-11 12:08           ` Christian König
       [not found]             ` <8d22ff81-e27f-abe5-c3f3-3fbdf80334be-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 21+ messages in thread
From: Christian König @ 2017-12-11 12:08 UTC (permalink / raw)
  To: Chunming Zhou, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 11.12.2017 um 06:52 schrieb Chunming Zhou:
>
>
> On 2017年12月09日 00:41, Christian König wrote:
>> Not needed any more.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 52 
>> +++++++++++++++++++---------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 -
>>   2 files changed, 29 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 400a00fababd..ae5451bf5873 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -329,9 +329,6 @@ static int amdgpu_vm_alloc_levels(struct 
>> amdgpu_device *adev,
>>           to >= amdgpu_vm_num_entries(adev, level))
>>           return -EINVAL;
>>   -    if (to > parent->last_entry_used)
>> -        parent->last_entry_used = to;
>> -
>>       ++level;
>>       saddr = saddr & ((1 << shift) - 1);
>>       eaddr = eaddr & ((1 << shift) - 1);
>> @@ -1187,16 +1184,19 @@ static int amdgpu_vm_update_pde(struct 
>> amdgpu_device *adev,
>>    *
>>    * Mark all PD level as invalid after an error.
>>    */
>> -static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
>> -                       struct amdgpu_vm_pt *parent)
>> +static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
>> +                       struct amdgpu_vm *vm,
>> +                       struct amdgpu_vm_pt *parent,
>> +                       unsigned level)
> can we move level to struct amdgpu_vm_pt?

I considered this as well, but then abandoned the approach and moved to 
using it as parameter again.

The general problem is that amdgpu_vm_pt is already *WAY* to big, we use 
60 bytes to manage 4K in the worst case.

Working on getting this down to something sane again, but adding the 
level here just to save passing it as parameter during the destruction 
would make it worse.

Christian.

> otherwise, it looks ok to me.
>
> Regards,
> David Zhou
>>   {
>> -    unsigned pt_idx;
>> +    unsigned pt_idx, num_entries;
>>         /*
>>        * Recurse into the subdirectories. This recursion is harmless 
>> because
>>        * we only have a maximum of 5 layers.
>>        */
>> -    for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
>> +    num_entries = amdgpu_vm_num_entries(adev, level);
>> +    for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
>>           struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
>>             if (!entry->base.bo)
>> @@ -1207,7 +1207,7 @@ static void amdgpu_vm_invalidate_level(struct 
>> amdgpu_vm *vm,
>>           if (list_empty(&entry->base.vm_status))
>>               list_add(&entry->base.vm_status, &vm->relocated);
>>           spin_unlock(&vm->status_lock);
>> -        amdgpu_vm_invalidate_level(vm, entry);
>> +        amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
>>       }
>>   }
>>   @@ -1249,7 +1249,8 @@ int amdgpu_vm_update_directories(struct 
>> amdgpu_device *adev,
>>                 r = amdgpu_vm_update_pde(adev, vm, pt, entry);
>>               if (r) {
>> -                amdgpu_vm_invalidate_level(vm, &vm->root);
>> +                amdgpu_vm_invalidate_level(adev, vm,
>> +                               &vm->root, 0);
>>                   return r;
>>               }
>>               spin_lock(&vm->status_lock);
>> @@ -1652,7 +1653,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
>> amdgpu_device *adev,
>>     error_free:
>>       amdgpu_job_free(job);
>> -    amdgpu_vm_invalidate_level(vm, &vm->root);
>> +    amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0);
>>       return r;
>>   }
>>   @@ -2716,26 +2717,31 @@ int amdgpu_vm_init(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm,
>>   /**
>>    * amdgpu_vm_free_levels - free PD/PT levels
>>    *
>> - * @level: PD/PT starting level to free
>> + * @adev: amdgpu device structure
>> + * @parent: PD/PT starting level to free
>> + * @level: level of parent structure
>>    *
>>    * Free the page directory or page table level and all sub levels.
>>    */
>> -static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
>> +static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
>> +                  struct amdgpu_vm_pt *parent,
>> +                  unsigned level)
>>   {
>> -    unsigned i;
>> +    unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
>>   -    if (level->base.bo) {
>> -        list_del(&level->base.bo_list);
>> -        list_del(&level->base.vm_status);
>> -        amdgpu_bo_unref(&level->base.bo->shadow);
>> -        amdgpu_bo_unref(&level->base.bo);
>> +    if (parent->base.bo) {
>> +        list_del(&parent->base.bo_list);
>> +        list_del(&parent->base.vm_status);
>> +        amdgpu_bo_unref(&parent->base.bo->shadow);
>> +        amdgpu_bo_unref(&parent->base.bo);
>>       }
>>   -    if (level->entries)
>> -        for (i = 0; i <= level->last_entry_used; i++)
>> -            amdgpu_vm_free_levels(&level->entries[i]);
>> +    if (parent->entries)
>> +        for (i = 0; i < num_entries; i++)
>> +            amdgpu_vm_free_levels(adev, &parent->entries[i],
>> +                          level + 1);
>>   -    kvfree(level->entries);
>> +    kvfree(parent->entries);
>>   }
>>     /**
>> @@ -2793,7 +2799,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm)
>>       if (r) {
>>           dev_err(adev->dev, "Leaking page tables because BO 
>> reservation failed\n");
>>       } else {
>> -        amdgpu_vm_free_levels(&vm->root);
>> +        amdgpu_vm_free_levels(adev, &vm->root, 0);
>>           amdgpu_bo_unreserve(root);
>>       }
>>       amdgpu_bo_unref(&root);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 43ea131dd411..7a308a1ea048 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -141,7 +141,6 @@ struct amdgpu_vm_pt {
>>         /* array of page tables, one for each directory entry */
>>       struct amdgpu_vm_pt        *entries;
>> -    unsigned            last_entry_used;
>>   };
>>     #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven
       [not found]     ` <20171208164107.1567-8-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2017-12-11  6:49       ` Chunming Zhou
@ 2017-12-12  7:58       ` Chunming Zhou
       [not found]         ` <ebea9c30-b89e-898e-92a4-c8a2317b8b86-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 21+ messages in thread
From: Chunming Zhou @ 2017-12-12  7:58 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年12月09日 00:41, Christian König wrote:
> Instead of falling back to 2 level and very limited address space use
> 2+1 PD support and 128TB + 512GB of virtual address space.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |  3 ++
>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 42 ++++++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 26 ++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 49 ++++++++++++++++++++------------
>   5 files changed, 86 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e5e0fbd43273..9517c0f76d27 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -541,6 +541,7 @@ struct amdgpu_mc {
>   	u64					private_aperture_end;
>   	/* protects concurrent invalidation */
>   	spinlock_t		invalidate_lock;
> +	bool			translate_further;
>   };
>   
>   /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 228f63e9ac5e..79134f0c26d9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -69,6 +69,9 @@ struct amdgpu_bo_list_entry;
>   /* PDE is handled as PTE for VEGA10 */
>   #define AMDGPU_PDE_PTE		(1ULL << 54)
>   
> +/* PTE is handled as PDE for VEGA10 */
> +#define AMDGPU_PTE_TRANSLATE_FURTHER	(1ULL << 56)
> +
>   /* VEGA10 only */
>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>   #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> index f1effadfbaa6..a56f77259130 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> @@ -144,8 +144,15 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
>   	WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
>   
>   	tmp = mmVM_L2_CNTL3_DEFAULT;
> -	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> -	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> +	if (adev->mc.translate_further) {
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> +	} else {
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
> +	}
>   	WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
>   
>   	tmp = mmVM_L2_CNTL4_DEFAULT;
> @@ -183,31 +190,40 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>   
>   static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
>   {
> -	int i;
> +	unsigned num_level, block_size;
>   	uint32_t tmp;
> +	int i;
> +
> +	num_level = adev->vm_manager.num_level;
> +	block_size = adev->vm_manager.block_size;
> +	if (adev->mc.translate_further)
> +		num_level -= 1;
> +	else
> +		block_size -= 9;
>   
>   	for (i = 0; i <= 14; i++) {
>   		tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
> -				    adev->vm_manager.num_level);
> +				    num_level);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
> +				    1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PAGE_TABLE_BLOCK_SIZE,
> -				adev->vm_manager.block_size - 9);
> +				    PAGE_TABLE_BLOCK_SIZE,
> +				    block_size);
>   		/* Send no-retry XNACK on fault to suppress VM fault storm. */
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>   				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 0fe2a4e782ff..d6a19514c92b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -481,6 +481,21 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
>   		*addr = adev->vm_manager.vram_base_offset + *addr -
>   			adev->mc.vram_start;
>   	BUG_ON(*addr & 0xFFFF00000000003FULL);
> +
> +	if (!adev->mc.translate_further)
> +		return;
> +
> +	if (level == 0) {
it's better to check if (level == num_level - 1 -1)
Or as you posted in 'reverse PDBs order', we can use enumerate here and 
below for level checking.

> +		/* Set the block size */
> +		if (!(*flags & AMDGPU_PDE_PTE))
> +			*flags |= 9ULL << 59;
here native page size is 9, why the 
VM_L2_CNTL3.L2_CACHE_BIGK_FRAGMENT_SIZE is 6?

> +
> +	} else if (level == 1) {
it's better to check if (level == num_level -1)

BTW: when we enable TF bit, the shift and num_entries of PTB is 
different by different native page size, I didn't see it in your 
patches, but since you select 2^9 * 2MB as block size, which avoid many 
trouble things.
I tested it with 2+1, it works, but when I change a bit for 4+1, It failed.
Anyway, it works for RV2 case.

Regards,
David Zhou

> +		if (*flags & AMDGPU_PDE_PTE)
> +			*flags &= ~AMDGPU_PDE_PTE;
> +		else
> +			*flags |= AMDGPU_PTE_TRANSLATE_FURTHER;
> +	}
>   }
>   
>   static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
> @@ -771,11 +786,14 @@ static int gmc_v9_0_sw_init(void *handle)
>   	switch (adev->asic_type) {
>   	case CHIP_RAVEN:
>   		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
> -		if (adev->rev_id == 0x0 || adev->rev_id == 0x1)
> +		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
>   			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
> -		else
> -			/* vm_size is 64GB for legacy 2-level page support */
> -			amdgpu_vm_adjust_size(adev, 64, 9, 1, 48);
> +		} else {
> +			/* vm_size is 128TB + 512GB for legacy 3-level page support */
> +			amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
> +			adev->mc.translate_further =
> +				adev->vm_manager.num_level > 1;
> +		}
>   		break;
>   	case CHIP_VEGA10:
>   		/* XXX Don't know how to get VRAM type yet. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> index bd160d8700e0..a88f43b097dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> @@ -156,10 +156,15 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
>   	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
>   	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
>   
> -	tmp = mmVM_L2_CNTL3_DEFAULT;
> -	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> -	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> -	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
> +	if (adev->mc.translate_further) {
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> +	} else {
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
> +		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
> +	}
>   
>   	tmp = mmVM_L2_CNTL4_DEFAULT;
>   	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
> @@ -197,32 +202,40 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>   
>   static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
>   {
> -	int i;
> +	unsigned num_level, block_size;
>   	uint32_t tmp;
> +	int i;
> +
> +	num_level = adev->vm_manager.num_level;
> +	block_size = adev->vm_manager.block_size;
> +	if (adev->mc.translate_further)
> +		num_level -= 1;
> +	else
> +		block_size -= 9;
>   
>   	for (i = 0; i <= 14; i++) {
>   		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
> +		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
> +		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
> +				    num_level);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				ENABLE_CONTEXT, 1);
> -		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PAGE_TABLE_DEPTH, adev->vm_manager.num_level);
> -		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
> +				    1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> +				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> -				PAGE_TABLE_BLOCK_SIZE,
> -				adev->vm_manager.block_size - 9);
> +				    PAGE_TABLE_BLOCK_SIZE,
> +				    block_size);
>   		/* Send no-retry XNACK on fault to suppress VM fault storm. */
>   		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>   				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 4/8] drm/amdgpu: remove last_entry_used from the VM code
       [not found]             ` <8d22ff81-e27f-abe5-c3f3-3fbdf80334be-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-12-12  9:23               ` Christian König
       [not found]                 ` <aa19a12f-3d8b-74eb-75ae-e9c3cddf96b2-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 21+ messages in thread
From: Christian König @ 2017-12-12  9:23 UTC (permalink / raw)
  To: Chunming Zhou, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 11.12.2017 um 13:08 schrieb Christian König:
> Am 11.12.2017 um 06:52 schrieb Chunming Zhou:
>>
>>
>> On 2017年12月09日 00:41, Christian König wrote:
>>> Not needed any more.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 52 
>>> +++++++++++++++++++---------------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 -
>>>   2 files changed, 29 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 400a00fababd..ae5451bf5873 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -329,9 +329,6 @@ static int amdgpu_vm_alloc_levels(struct 
>>> amdgpu_device *adev,
>>>           to >= amdgpu_vm_num_entries(adev, level))
>>>           return -EINVAL;
>>>   -    if (to > parent->last_entry_used)
>>> -        parent->last_entry_used = to;
>>> -
>>>       ++level;
>>>       saddr = saddr & ((1 << shift) - 1);
>>>       eaddr = eaddr & ((1 << shift) - 1);
>>> @@ -1187,16 +1184,19 @@ static int amdgpu_vm_update_pde(struct 
>>> amdgpu_device *adev,
>>>    *
>>>    * Mark all PD level as invalid after an error.
>>>    */
>>> -static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
>>> -                       struct amdgpu_vm_pt *parent)
>>> +static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
>>> +                       struct amdgpu_vm *vm,
>>> +                       struct amdgpu_vm_pt *parent,
>>> +                       unsigned level)
>> can we move level to struct amdgpu_vm_pt?
>
> I considered this as well, but then abandoned the approach and moved 
> to using it as parameter again.
>
> The general problem is that amdgpu_vm_pt is already *WAY* to big, we 
> use 60 bytes to manage 4K in the worst case.
>
> Working on getting this down to something sane again, but adding the 
> level here just to save passing it as parameter during the destruction 
> would make it worse.

Ping? Any more objections to this patch or can I commit it?

Wanted to commit those up till patch #7, then add your work to reverse 
the level and then put patch #8 on top.

Christian.

>
> Christian.
>
>> otherwise, it looks ok to me.
>>
>> Regards,
>> David Zhou
>>>   {
>>> -    unsigned pt_idx;
>>> +    unsigned pt_idx, num_entries;
>>>         /*
>>>        * Recurse into the subdirectories. This recursion is harmless 
>>> because
>>>        * we only have a maximum of 5 layers.
>>>        */
>>> -    for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
>>> +    num_entries = amdgpu_vm_num_entries(adev, level);
>>> +    for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
>>>           struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
>>>             if (!entry->base.bo)
>>> @@ -1207,7 +1207,7 @@ static void amdgpu_vm_invalidate_level(struct 
>>> amdgpu_vm *vm,
>>>           if (list_empty(&entry->base.vm_status))
>>>               list_add(&entry->base.vm_status, &vm->relocated);
>>>           spin_unlock(&vm->status_lock);
>>> -        amdgpu_vm_invalidate_level(vm, entry);
>>> +        amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
>>>       }
>>>   }
>>>   @@ -1249,7 +1249,8 @@ int amdgpu_vm_update_directories(struct 
>>> amdgpu_device *adev,
>>>                 r = amdgpu_vm_update_pde(adev, vm, pt, entry);
>>>               if (r) {
>>> -                amdgpu_vm_invalidate_level(vm, &vm->root);
>>> +                amdgpu_vm_invalidate_level(adev, vm,
>>> +                               &vm->root, 0);
>>>                   return r;
>>>               }
>>>               spin_lock(&vm->status_lock);
>>> @@ -1652,7 +1653,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
>>> amdgpu_device *adev,
>>>     error_free:
>>>       amdgpu_job_free(job);
>>> -    amdgpu_vm_invalidate_level(vm, &vm->root);
>>> +    amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0);
>>>       return r;
>>>   }
>>>   @@ -2716,26 +2717,31 @@ int amdgpu_vm_init(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm,
>>>   /**
>>>    * amdgpu_vm_free_levels - free PD/PT levels
>>>    *
>>> - * @level: PD/PT starting level to free
>>> + * @adev: amdgpu device structure
>>> + * @parent: PD/PT starting level to free
>>> + * @level: level of parent structure
>>>    *
>>>    * Free the page directory or page table level and all sub levels.
>>>    */
>>> -static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
>>> +static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
>>> +                  struct amdgpu_vm_pt *parent,
>>> +                  unsigned level)
>>>   {
>>> -    unsigned i;
>>> +    unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
>>>   -    if (level->base.bo) {
>>> -        list_del(&level->base.bo_list);
>>> -        list_del(&level->base.vm_status);
>>> -        amdgpu_bo_unref(&level->base.bo->shadow);
>>> -        amdgpu_bo_unref(&level->base.bo);
>>> +    if (parent->base.bo) {
>>> +        list_del(&parent->base.bo_list);
>>> +        list_del(&parent->base.vm_status);
>>> +        amdgpu_bo_unref(&parent->base.bo->shadow);
>>> +        amdgpu_bo_unref(&parent->base.bo);
>>>       }
>>>   -    if (level->entries)
>>> -        for (i = 0; i <= level->last_entry_used; i++)
>>> -            amdgpu_vm_free_levels(&level->entries[i]);
>>> +    if (parent->entries)
>>> +        for (i = 0; i < num_entries; i++)
>>> +            amdgpu_vm_free_levels(adev, &parent->entries[i],
>>> +                          level + 1);
>>>   -    kvfree(level->entries);
>>> +    kvfree(parent->entries);
>>>   }
>>>     /**
>>> @@ -2793,7 +2799,7 @@ void amdgpu_vm_fini(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm)
>>>       if (r) {
>>>           dev_err(adev->dev, "Leaking page tables because BO 
>>> reservation failed\n");
>>>       } else {
>>> -        amdgpu_vm_free_levels(&vm->root);
>>> +        amdgpu_vm_free_levels(adev, &vm->root, 0);
>>>           amdgpu_bo_unreserve(root);
>>>       }
>>>       amdgpu_bo_unref(&root);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 43ea131dd411..7a308a1ea048 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -141,7 +141,6 @@ struct amdgpu_vm_pt {
>>>         /* array of page tables, one for each directory entry */
>>>       struct amdgpu_vm_pt        *entries;
>>> -    unsigned            last_entry_used;
>>>   };
>>>     #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | 
>>> (addr))
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven
       [not found]         ` <ebea9c30-b89e-898e-92a4-c8a2317b8b86-5C7GfCeVMHo@public.gmane.org>
@ 2017-12-12  9:28           ` Christian König
  0 siblings, 0 replies; 21+ messages in thread
From: Christian König @ 2017-12-12  9:28 UTC (permalink / raw)
  To: Chunming Zhou, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 12.12.2017 um 08:58 schrieb Chunming Zhou:
>
>
> On 2017年12月09日 00:41, Christian König wrote:
>> Instead of falling back to 2 level and very limited address space use
>> 2+1 PD support and 128TB + 512GB of virtual address space.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |  3 ++
>>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 42 
>> ++++++++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 26 ++++++++++++++---
>>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 49 
>> ++++++++++++++++++++------------
>>   5 files changed, 86 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index e5e0fbd43273..9517c0f76d27 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -541,6 +541,7 @@ struct amdgpu_mc {
>>       u64                    private_aperture_end;
>>       /* protects concurrent invalidation */
>>       spinlock_t        invalidate_lock;
>> +    bool            translate_further;
>>   };
>>     /*
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 228f63e9ac5e..79134f0c26d9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -69,6 +69,9 @@ struct amdgpu_bo_list_entry;
>>   /* PDE is handled as PTE for VEGA10 */
>>   #define AMDGPU_PDE_PTE        (1ULL << 54)
>>   +/* PTE is handled as PDE for VEGA10 */
>> +#define AMDGPU_PTE_TRANSLATE_FURTHER    (1ULL << 56)
>> +
>>   /* VEGA10 only */
>>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>>   #define AMDGPU_PTE_MTYPE_MASK    AMDGPU_PTE_MTYPE(3ULL)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> index f1effadfbaa6..a56f77259130 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> @@ -144,8 +144,15 @@ static void gfxhub_v1_0_init_cache_regs(struct 
>> amdgpu_device *adev)
>>       WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
>>         tmp = mmVM_L2_CNTL3_DEFAULT;
>> -    tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> -    tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, 
>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> +    if (adev->mc.translate_further) {
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> +                    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> +    } else {
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> +                    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
>> +    }
>>       WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
>>         tmp = mmVM_L2_CNTL4_DEFAULT;
>> @@ -183,31 +190,40 @@ static void 
>> gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>>     static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device 
>> *adev)
>>   {
>> -    int i;
>> +    unsigned num_level, block_size;
>>       uint32_t tmp;
>> +    int i;
>> +
>> +    num_level = adev->vm_manager.num_level;
>> +    block_size = adev->vm_manager.block_size;
>> +    if (adev->mc.translate_further)
>> +        num_level -= 1;
>> +    else
>> +        block_size -= 9;
>>         for (i = 0; i <= 14; i++) {
>>           tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
>> -                    adev->vm_manager.num_level);
>> +                    num_level);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
>> +                    1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PAGE_TABLE_BLOCK_SIZE,
>> -                adev->vm_manager.block_size - 9);
>> +                    PAGE_TABLE_BLOCK_SIZE,
>> +                    block_size);
>>           /* Send no-retry XNACK on fault to suppress VM fault storm. */
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>                       RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 0fe2a4e782ff..d6a19514c92b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -481,6 +481,21 @@ static void gmc_v9_0_get_vm_pde(struct 
>> amdgpu_device *adev, int level,
>>           *addr = adev->vm_manager.vram_base_offset + *addr -
>>               adev->mc.vram_start;
>>       BUG_ON(*addr & 0xFFFF00000000003FULL);
>> +
>> +    if (!adev->mc.translate_further)
>> +        return;
>> +
>> +    if (level == 0) {
> it's better to check if (level == num_level - 1 -1)
> Or as you posted in 'reverse PDBs order', we can use enumerate here 
> and below for level checking.

That is a very good argument for your change, yes.

>
>> +        /* Set the block size */
>> +        if (!(*flags & AMDGPU_PDE_PTE))
>> +            *flags |= 9ULL << 59;
> here native page size is 9, why the 
> VM_L2_CNTL3.L2_CACHE_BIGK_FRAGMENT_SIZE is 6?

Take a look below. I change L2_CACHE_BIGK_FRAGMENT_SIZE to 9 when this 
is active.

But it reminds me that I wanted to add a define for this to amdgpu_vm.h.

>
>> +
>> +    } else if (level == 1) {
> it's better to check if (level == num_level -1)
>
> BTW: when we enable TF bit, the shift and num_entries of PTB is 
> different by different native page size, I didn't see it in your 
> patches, but since you select 2^9 * 2MB as block size, which avoid 
> many trouble things.
> I tested it with 2+1, it works, but when I change a bit for 4+1, It 
> failed.
> Anyway, it works for RV2 case.

Correct, yes that is only implemented for the 2+1 case. With the 
cleanups you suggested 3+1 should be working as well, but I don't see 
much use for it.

Any other native page size than 2MB also doesn't make to much sense, 
cause we want to reduce the usage of VRAM on APUs as much as possible 
and the huge pages we get from the OS for system memory are also 2MB in 
size.

Regards,
Christian.

>
> Regards,
> David Zhou
>
>> +        if (*flags & AMDGPU_PDE_PTE)
>> +            *flags &= ~AMDGPU_PDE_PTE;
>> +        else
>> +            *flags |= AMDGPU_PTE_TRANSLATE_FURTHER;
>> +    }
>>   }
>>     static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
>> @@ -771,11 +786,14 @@ static int gmc_v9_0_sw_init(void *handle)
>>       switch (adev->asic_type) {
>>       case CHIP_RAVEN:
>>           adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
>> -        if (adev->rev_id == 0x0 || adev->rev_id == 0x1)
>> +        if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
>>               amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
>> -        else
>> -            /* vm_size is 64GB for legacy 2-level page support */
>> -            amdgpu_vm_adjust_size(adev, 64, 9, 1, 48);
>> +        } else {
>> +            /* vm_size is 128TB + 512GB for legacy 3-level page 
>> support */
>> +            amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
>> +            adev->mc.translate_further =
>> +                adev->vm_manager.num_level > 1;
>> +        }
>>           break;
>>       case CHIP_VEGA10:
>>           /* XXX Don't know how to get VRAM type yet. */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> index bd160d8700e0..a88f43b097dc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> @@ -156,10 +156,15 @@ static void mmhub_v1_0_init_cache_regs(struct 
>> amdgpu_device *adev)
>>       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
>>       WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
>>   -    tmp = mmVM_L2_CNTL3_DEFAULT;
>> -    tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> -    tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, 
>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> -    WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
>> +    if (adev->mc.translate_further) {
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> +                    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> +    } else {
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
>> +        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> +                    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
>> +    }
>>         tmp = mmVM_L2_CNTL4_DEFAULT;
>>       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, 
>> VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
>> @@ -197,32 +202,40 @@ static void 
>> mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>>     static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
>>   {
>> -    int i;
>> +    unsigned num_level, block_size;
>>       uint32_t tmp;
>> +    int i;
>> +
>> +    num_level = adev->vm_manager.num_level;
>> +    block_size = adev->vm_manager.block_size;
>> +    if (adev->mc.translate_further)
>> +        num_level -= 1;
>> +    else
>> +        block_size -= 9;
>>         for (i = 0; i <= 14; i++) {
>>           tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
>> +        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
>> +        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
>> +                    num_level);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                ENABLE_CONTEXT, 1);
>> -        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PAGE_TABLE_DEPTH, adev->vm_manager.num_level);
>> -        tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
>> +                    1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> +                    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> -                PAGE_TABLE_BLOCK_SIZE,
>> -                adev->vm_manager.block_size - 9);
>> +                    PAGE_TABLE_BLOCK_SIZE,
>> +                    block_size);
>>           /* Send no-retry XNACK on fault to suppress VM fault storm. */
>>           tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>                       RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 4/8] drm/amdgpu: remove last_entry_used from the VM code
       [not found]                 ` <aa19a12f-3d8b-74eb-75ae-e9c3cddf96b2-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-12-12  9:32                   ` Chunming Zhou
  0 siblings, 0 replies; 21+ messages in thread
From: Chunming Zhou @ 2017-12-12  9:32 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年12月12日 17:23, Christian König wrote:
> Am 11.12.2017 um 13:08 schrieb Christian König:
>> Am 11.12.2017 um 06:52 schrieb Chunming Zhou:
>>>
>>>
>>> On 2017年12月09日 00:41, Christian König wrote:
>>>> Not needed any more.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 52 
>>>> +++++++++++++++++++---------------
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 -
>>>>   2 files changed, 29 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 400a00fababd..ae5451bf5873 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -329,9 +329,6 @@ static int amdgpu_vm_alloc_levels(struct 
>>>> amdgpu_device *adev,
>>>>           to >= amdgpu_vm_num_entries(adev, level))
>>>>           return -EINVAL;
>>>>   -    if (to > parent->last_entry_used)
>>>> -        parent->last_entry_used = to;
>>>> -
>>>>       ++level;
>>>>       saddr = saddr & ((1 << shift) - 1);
>>>>       eaddr = eaddr & ((1 << shift) - 1);
>>>> @@ -1187,16 +1184,19 @@ static int amdgpu_vm_update_pde(struct 
>>>> amdgpu_device *adev,
>>>>    *
>>>>    * Mark all PD level as invalid after an error.
>>>>    */
>>>> -static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
>>>> -                       struct amdgpu_vm_pt *parent)
>>>> +static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
>>>> +                       struct amdgpu_vm *vm,
>>>> +                       struct amdgpu_vm_pt *parent,
>>>> +                       unsigned level)
>>> can we move level to struct amdgpu_vm_pt?
>>
>> I considered this as well, but then abandoned the approach and moved 
>> to using it as parameter again.
>>
>> The general problem is that amdgpu_vm_pt is already *WAY* to big, we 
>> use 60 bytes to manage 4K in the worst case.
>>
>> Working on getting this down to something sane again, but adding the 
>> level here just to save passing it as parameter during the 
>> destruction would make it worse.
>
> Ping? Any more objections to this patch or can I commit it?
>
> Wanted to commit those up till patch #7, 
feel free to add my RB on this one and patch#7.

Regards,
david Zhou
> then add your work to reverse the level and then put patch #8 on top.
>
> Christian.
>
>>
>> Christian.
>>
>>> otherwise, it looks ok to me.
>>>
>>> Regards,
>>> David Zhou
>>>>   {
>>>> -    unsigned pt_idx;
>>>> +    unsigned pt_idx, num_entries;
>>>>         /*
>>>>        * Recurse into the subdirectories. This recursion is 
>>>> harmless because
>>>>        * we only have a maximum of 5 layers.
>>>>        */
>>>> -    for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
>>>> +    num_entries = amdgpu_vm_num_entries(adev, level);
>>>> +    for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
>>>>           struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
>>>>             if (!entry->base.bo)
>>>> @@ -1207,7 +1207,7 @@ static void amdgpu_vm_invalidate_level(struct 
>>>> amdgpu_vm *vm,
>>>>           if (list_empty(&entry->base.vm_status))
>>>>               list_add(&entry->base.vm_status, &vm->relocated);
>>>>           spin_unlock(&vm->status_lock);
>>>> -        amdgpu_vm_invalidate_level(vm, entry);
>>>> +        amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
>>>>       }
>>>>   }
>>>>   @@ -1249,7 +1249,8 @@ int amdgpu_vm_update_directories(struct 
>>>> amdgpu_device *adev,
>>>>                 r = amdgpu_vm_update_pde(adev, vm, pt, entry);
>>>>               if (r) {
>>>> -                amdgpu_vm_invalidate_level(vm, &vm->root);
>>>> +                amdgpu_vm_invalidate_level(adev, vm,
>>>> +                               &vm->root, 0);
>>>>                   return r;
>>>>               }
>>>>               spin_lock(&vm->status_lock);
>>>> @@ -1652,7 +1653,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
>>>> amdgpu_device *adev,
>>>>     error_free:
>>>>       amdgpu_job_free(job);
>>>> -    amdgpu_vm_invalidate_level(vm, &vm->root);
>>>> +    amdgpu_vm_invalidate_level(adev, vm, &vm->root, 0);
>>>>       return r;
>>>>   }
>>>>   @@ -2716,26 +2717,31 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>> *adev, struct amdgpu_vm *vm,
>>>>   /**
>>>>    * amdgpu_vm_free_levels - free PD/PT levels
>>>>    *
>>>> - * @level: PD/PT starting level to free
>>>> + * @adev: amdgpu device structure
>>>> + * @parent: PD/PT starting level to free
>>>> + * @level: level of parent structure
>>>>    *
>>>>    * Free the page directory or page table level and all sub levels.
>>>>    */
>>>> -static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
>>>> +static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
>>>> +                  struct amdgpu_vm_pt *parent,
>>>> +                  unsigned level)
>>>>   {
>>>> -    unsigned i;
>>>> +    unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
>>>>   -    if (level->base.bo) {
>>>> -        list_del(&level->base.bo_list);
>>>> -        list_del(&level->base.vm_status);
>>>> -        amdgpu_bo_unref(&level->base.bo->shadow);
>>>> -        amdgpu_bo_unref(&level->base.bo);
>>>> +    if (parent->base.bo) {
>>>> +        list_del(&parent->base.bo_list);
>>>> +        list_del(&parent->base.vm_status);
>>>> + amdgpu_bo_unref(&parent->base.bo->shadow);
>>>> +        amdgpu_bo_unref(&parent->base.bo);
>>>>       }
>>>>   -    if (level->entries)
>>>> -        for (i = 0; i <= level->last_entry_used; i++)
>>>> - amdgpu_vm_free_levels(&level->entries[i]);
>>>> +    if (parent->entries)
>>>> +        for (i = 0; i < num_entries; i++)
>>>> +            amdgpu_vm_free_levels(adev, &parent->entries[i],
>>>> +                          level + 1);
>>>>   -    kvfree(level->entries);
>>>> +    kvfree(parent->entries);
>>>>   }
>>>>     /**
>>>> @@ -2793,7 +2799,7 @@ void amdgpu_vm_fini(struct amdgpu_device 
>>>> *adev, struct amdgpu_vm *vm)
>>>>       if (r) {
>>>>           dev_err(adev->dev, "Leaking page tables because BO 
>>>> reservation failed\n");
>>>>       } else {
>>>> -        amdgpu_vm_free_levels(&vm->root);
>>>> +        amdgpu_vm_free_levels(adev, &vm->root, 0);
>>>>           amdgpu_bo_unreserve(root);
>>>>       }
>>>>       amdgpu_bo_unref(&root);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index 43ea131dd411..7a308a1ea048 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -141,7 +141,6 @@ struct amdgpu_vm_pt {
>>>>         /* array of page tables, one for each directory entry */
>>>>       struct amdgpu_vm_pt        *entries;
>>>> -    unsigned            last_entry_used;
>>>>   };
>>>>     #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | 
>>>> (addr))
>>>
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2017-12-12  9:32 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-08 16:41 [PATCH 1/8] drm/amdgpu: stop joining PDEs Christian König
     [not found] ` <20171208164107.1567-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2017-12-08 16:41   ` [PATCH 2/8] drm/amdgpu: update one PDE at a time Christian König
     [not found]     ` <20171208164107.1567-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2017-12-11  5:37       ` Chunming Zhou
2017-12-08 16:41   ` [PATCH 3/8] drm/amdgpu: avoid the modulo in amdgpu_vm_get_entry Christian König
     [not found]     ` <20171208164107.1567-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2017-12-11  5:40       ` Chunming Zhou
2017-12-08 16:41   ` [PATCH 4/8] drm/amdgpu: remove last_entry_used from the VM code Christian König
     [not found]     ` <20171208164107.1567-4-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2017-12-11  5:52       ` Chunming Zhou
     [not found]         ` <a671871b-5bad-3d48-b76b-5db4d49c9624-5C7GfCeVMHo@public.gmane.org>
2017-12-11 12:08           ` Christian König
     [not found]             ` <8d22ff81-e27f-abe5-c3f3-3fbdf80334be-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-12-12  9:23               ` Christian König
     [not found]                 ` <aa19a12f-3d8b-74eb-75ae-e9c3cddf96b2-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-12-12  9:32                   ` Chunming Zhou
2017-12-08 16:41   ` [PATCH 5/8] drm/amdgpu: remove keeping the addr of the VM PDs Christian König
     [not found]     ` <20171208164107.1567-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2017-12-11  5:59       ` Chunming Zhou
2017-12-08 16:41   ` [PATCH 6/8] drm/amdgpu: batch PDE updates again Christian König
     [not found]     ` <20171208164107.1567-6-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2017-12-11  6:13       ` Chunming Zhou
2017-12-08 16:41   ` [PATCH 7/8] drm/amdgpu: allow get_vm_pde to change flags as well Christian König
2017-12-08 16:41   ` [PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven Christian König
     [not found]     ` <20171208164107.1567-8-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2017-12-11  6:49       ` Chunming Zhou
     [not found]         ` <7c8d12b8-7d2e-f5be-b8e7-295245c29a92-5C7GfCeVMHo@public.gmane.org>
2017-12-11 10:14           ` Christian König
2017-12-12  7:58       ` Chunming Zhou
     [not found]         ` <ebea9c30-b89e-898e-92a4-c8a2317b8b86-5C7GfCeVMHo@public.gmane.org>
2017-12-12  9:28           ` Christian König
2017-12-11  5:15   ` [PATCH 1/8] drm/amdgpu: stop joining PDEs Chunming Zhou

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.