All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2
@ 2017-05-17  9:22 Christian König
       [not found] ` <1495012972-20698-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-05-17  9:22 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Rename adjust_mc_addr to get_vm_pde and check the address bits in one place.

v2: handle vcn as well, keep setting the valid bit manually,
    add a BUG_ON() for GMC v6, v7 and v8 as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 26 +++++++++-----------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  6 ++----
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c  |  7 +++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  9 ++++++++-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  9 ++++++++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  6 ++----
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 12 ++++--------
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  6 ++----
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c  |  5 ++---
 11 files changed, 53 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ae7e775..4acbca7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -309,8 +309,8 @@ struct amdgpu_gart_funcs {
 	/* set pte flags based per asic */
 	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
 				     uint32_t flags);
-	/* adjust mc addr in fb for APU case */
-	u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr);
+	/* get the pde for a given mc addr */
+	u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr);
 	uint32_t (*get_invalidate_req)(unsigned int vm_id);
 };
 
@@ -1820,6 +1820,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
+#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 88420dc..344f943 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -682,16 +682,6 @@ static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
 	return false;
 }
 
-static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
-{
-	u64 addr = mc_addr;
-
-	if (adev->gart.gart_funcs->adjust_mc_addr)
-		addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr);
-
-	return addr;
-}
-
 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 				  struct amdgpu_job *job)
 {
@@ -1034,18 +1024,18 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
 
 			if (count) {
-				uint64_t pt_addr =
-					amdgpu_vm_adjust_mc_addr(adev, last_pt);
+				uint64_t entry;
 
+				entry = amdgpu_gart_get_vm_pde(adev, last_pt);
 				if (shadow)
 					amdgpu_vm_do_set_ptes(&params,
 							      last_shadow,
-							      pt_addr, count,
+							      entry, count,
 							      incr,
 							      AMDGPU_PTE_VALID);
 
 				amdgpu_vm_do_set_ptes(&params, last_pde,
-						      pt_addr, count, incr,
+						      entry, count, incr,
 						      AMDGPU_PTE_VALID);
 			}
 
@@ -1059,13 +1049,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	}
 
 	if (count) {
-		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
+		uint64_t entry;
+
+		entry = amdgpu_gart_get_vm_pde(adev, last_pt);
 
 		if (vm->root.bo->shadow)
-			amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
+			amdgpu_vm_do_set_ptes(&params, last_shadow, entry,
 					      count, incr, AMDGPU_PTE_VALID);
 
-		amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
+		amdgpu_vm_do_set_ptes(&params, last_pde, entry,
 				      count, incr, AMDGPU_PTE_VALID);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6dc75d2..e08a232 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3759,10 +3759,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
 				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 1e6263a..d5f3d873 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -395,6 +395,12 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
+static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+	BUG_ON(addr & 0xFFFFF0000000FFFULL);
+	return addr;
+}
+
 static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
 					      bool value)
 {
@@ -1127,6 +1133,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v6_0_gart_set_pte_pde,
 	.set_prt = gmc_v6_0_set_prt,
+	.get_vm_pde = gmc_v6_0_get_vm_pde,
 	.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 967505b..4f140e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
+static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+	BUG_ON(addr & 0xFFFFF0000000FFFULL);
+	return addr;
+}
+
 /**
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  *
@@ -1293,7 +1299,8 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v7_0_gart_set_pte_pde,
 	.set_prt = gmc_v7_0_set_prt,
-	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
+	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
+	.get_vm_pde = gmc_v7_0_get_vm_pde
 };
 
 static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 3b5ea0f..f05c034 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
+static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+	BUG_ON(addr & 0xFFF0000000000FFFULL);
+	return addr;
+}
+
 /**
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  *
@@ -1612,7 +1618,8 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v8_0_gart_set_pte_pde,
 	.set_prt = gmc_v8_0_set_prt,
-	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
+	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
+	.get_vm_pde = gmc_v8_0_get_vm_pde
 };
 
 static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 19e1027..047b1a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -358,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
+static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
 {
-	return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start;
+	addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
+	BUG_ON(addr & 0xFFFF00000000003FULL);
+	return addr;
 }
 
 static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v9_0_gart_set_pte_pde,
-	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
-	.adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
 	.get_invalidate_req = gmc_v9_0_get_invalidate_req,
+	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
+	.get_vm_pde = gmc_v9_0_get_vm_pde
 };
 
 static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 91cf7e6..fb6f4b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1143,10 +1143,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 22f42f3..1997ec8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1316,10 +1316,8 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t data0, data1, mask;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
 	data1 = upper_32_bits(pd_addr);
@@ -1358,10 +1356,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 07b2ac7..8dbd0b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -926,10 +926,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index ad1862f..b2d995b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -882,9 +882,8 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t data0, data1, mask;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
 	data1 = upper_32_bits(pd_addr);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/7] drm/amdgpu: add some extra VM error handling
       [not found] ` <1495012972-20698-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-17  9:22   ` Christian König
  2017-05-17  9:22   ` [PATCH 3/7] drm/amdgpu: fix another fundamental VM bug Christian König
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-05-17  9:22 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

If updating the PDs fails we now invalidate all entries to try again later.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 344f943..b877f9f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1105,6 +1105,32 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 }
 
 /*
+ * amdgpu_vm_invalidate_level - mark all PD levels as invalid
+ *
+ * @parent: parent PD
+ *
+ * Mark all PD level as invalid after an error.
+ */
+static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
+{
+	unsigned pt_idx;
+
+	/*
+	 * Recurse into the subdirectories. This recursion is harmless because
+	 * we only have a maximum of 5 layers.
+	 */
+	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
+
+		if (!entry->bo)
+			continue;
+
+		entry->addr = ~0ULL;
+		amdgpu_vm_invalidate_level(entry);
+	}
+}
+
+/*
  * amdgpu_vm_update_directories - make sure that all directories are valid
  *
  * @adev: amdgpu_device pointer
@@ -1116,7 +1142,13 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm)
 {
-	return amdgpu_vm_update_level(adev, vm, &vm->root, 0);
+	int r;
+
+	r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
+	if (r)
+		amdgpu_vm_invalidate_level(&vm->root);
+
+	return r;
 }
 
 /**
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/7] drm/amdgpu: fix another fundamental VM bug
       [not found] ` <1495012972-20698-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-05-17  9:22   ` [PATCH 2/7] drm/amdgpu: add some extra VM error handling Christian König
@ 2017-05-17  9:22   ` Christian König
  2017-05-17  9:22   ` [PATCH 4/7] drm/amdgpu: Return EINVAL if no PT BO Christian König
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-05-17  9:22 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

I always wondered why this code uses the MC address. Now it came to me that
this is actually a bug and only works by coincident because we used to have
VRAM mapped at zero.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index db1542c..967786f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -884,7 +884,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 	}
 
 	if (p->job->vm) {
-		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
+		p->job->vm_pd_addr = vm->root.bo->tbo.mem.start << PAGE_SHIFT;
 
 		r = amdgpu_bo_vm_update_pte(p);
 		if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b877f9f3..7256fcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1012,7 +1012,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 				return r;
 		}
 
-		pt = amdgpu_bo_gpu_offset(bo);
+		pt = bo->tbo.mem.start << PAGE_SHIFT;
 		if (parent->entries[pt_idx].addr == pt)
 			continue;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 047b1a7..63cb573 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -360,7 +360,7 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
 
 static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
 {
-	addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
+	addr = adev->vm_manager.vram_base_offset + addr;
 	BUG_ON(addr & 0xFFFF00000000003FULL);
 	return addr;
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/7] drm/amdgpu: Return EINVAL if no PT BO
       [not found] ` <1495012972-20698-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-05-17  9:22   ` [PATCH 2/7] drm/amdgpu: add some extra VM error handling Christian König
  2017-05-17  9:22   ` [PATCH 3/7] drm/amdgpu: fix another fundamental VM bug Christian König
@ 2017-05-17  9:22   ` Christian König
  2017-05-17  9:22   ` [PATCH 5/7] drm/amdgpu: cache the complete pde Christian König
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-05-17  9:22 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>

This change is also useful for the upcoming changes where page tables
can be updated by CPU.

Change-Id: I07510ed60c94cf1944ee96bb4b16c40ec88ea17c
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 48 +++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7256fcc..8676a75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1188,8 +1188,9 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
  * @flags: mapping flags
  *
  * Update the page tables in the range @start - @end.
+ * Returns 0 for success, -EINVAL for failure.
  */
-static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
+static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint64_t flags)
 {
@@ -1207,12 +1208,12 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	pt = amdgpu_vm_get_pt(params, addr);
 	if (!pt) {
 		pr_err("PT not found, aborting update_ptes\n");
-		return;
+		return -EINVAL;
 	}
 
 	if (params->shadow) {
 		if (!pt->shadow)
-			return;
+			return 0;
 		pt = pt->shadow;
 	}
 	if ((addr & ~mask) == (end & ~mask))
@@ -1234,12 +1235,12 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		pt = amdgpu_vm_get_pt(params, addr);
 		if (!pt) {
 			pr_err("PT not found, aborting update_ptes\n");
-			return;
+			return -EINVAL;
 		}
 
 		if (params->shadow) {
 			if (!pt->shadow)
-				return;
+				return 0;
 			pt = pt->shadow;
 		}
 
@@ -1274,6 +1275,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 
 	params->func(params, cur_pe_start, cur_dst, cur_nptes,
 		     AMDGPU_GPU_PAGE_SIZE, flags);
+
+	return 0;
 }
 
 /*
@@ -1285,11 +1288,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
  * @end: last PTE to handle
  * @dst: addr those PTEs should point to
  * @flags: hw mapping flags
+ * Returns 0 for success, -EINVAL for failure.
  */
-static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
+static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
 				uint64_t dst, uint64_t flags)
 {
+	int r;
+
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * field in the PTE. When this field is set to a non-zero value, page
@@ -1318,28 +1324,30 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 
 	/* system pages are non continuously */
 	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end)) {
-
-		amdgpu_vm_update_ptes(params, start, end, dst, flags);
-		return;
-	}
+	    (frag_start >= frag_end))
+		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
 	/* handle the 4K area at the beginning */
 	if (start != frag_start) {
-		amdgpu_vm_update_ptes(params, start, frag_start,
-				      dst, flags);
+		r = amdgpu_vm_update_ptes(params, start, frag_start,
+					  dst, flags);
+		if (r)
+			return r;
 		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
 	}
 
 	/* handle the area in the middle */
-	amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-			      flags | frag_flags);
+	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
+				  flags | frag_flags);
+	if (r)
+		return r;
 
 	/* handle the 4K area at the end */
 	if (frag_end != end) {
 		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
 	}
+	return r;
 }
 
 /**
@@ -1460,9 +1468,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		goto error_free;
 
 	params.shadow = true;
-	amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	if (r)
+		goto error_free;
 	params.shadow = false;
-	amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	if (r)
+		goto error_free;
 
 	amdgpu_ring_pad_ib(ring, params.ib);
 	WARN_ON(params.ib->length_dw > ndw);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/7] drm/amdgpu: cache the complete pde
       [not found] ` <1495012972-20698-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-05-17  9:22   ` [PATCH 4/7] drm/amdgpu: Return EINVAL if no PT BO Christian König
@ 2017-05-17  9:22   ` Christian König
  2017-05-17  9:22   ` [PATCH 6/7] drm/amdgpu: stop joining VM PTE updates Christian König
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-05-17  9:22 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Makes it easier to update the PDE with huge pages.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8676a75..6a926f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1013,6 +1013,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		}
 
 		pt = bo->tbo.mem.start << PAGE_SHIFT;
+		pt = amdgpu_gart_get_vm_pde(adev, pt);
 		if (parent->entries[pt_idx].addr == pt)
 			continue;
 
@@ -1024,18 +1025,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
 
 			if (count) {
-				uint64_t entry;
-
-				entry = amdgpu_gart_get_vm_pde(adev, last_pt);
 				if (shadow)
 					amdgpu_vm_do_set_ptes(&params,
 							      last_shadow,
-							      entry, count,
+							      last_pt, count,
 							      incr,
 							      AMDGPU_PTE_VALID);
 
 				amdgpu_vm_do_set_ptes(&params, last_pde,
-						      entry, count, incr,
+						      last_pt, count, incr,
 						      AMDGPU_PTE_VALID);
 			}
 
@@ -1049,15 +1047,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	}
 
 	if (count) {
-		uint64_t entry;
-
-		entry = amdgpu_gart_get_vm_pde(adev, last_pt);
-
 		if (vm->root.bo->shadow)
-			amdgpu_vm_do_set_ptes(&params, last_shadow, entry,
+			amdgpu_vm_do_set_ptes(&params, last_shadow, last_pt,
 					      count, incr, AMDGPU_PTE_VALID);
 
-		amdgpu_vm_do_set_ptes(&params, last_pde, entry,
+		amdgpu_vm_do_set_ptes(&params, last_pde, last_pt,
 				      count, incr, AMDGPU_PTE_VALID);
 	}
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 6/7] drm/amdgpu: stop joining VM PTE updates
       [not found] ` <1495012972-20698-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-05-17  9:22   ` [PATCH 5/7] drm/amdgpu: cache the complete pde Christian König
@ 2017-05-17  9:22   ` Christian König
       [not found]     ` <1495012972-20698-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-05-17  9:22   ` [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM Christian König
  2017-05-18  5:33   ` [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2 Zhang, Jerry (Junwei)
  6 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-05-17  9:22 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

This isn't beneficial any more since VRAM allocations are now split
so that they fits into a single page table.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 61 ++++------------------------------
 1 file changed, 7 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6a926f4..860a669 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1191,41 +1191,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	struct amdgpu_device *adev = params->adev;
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
 
-	uint64_t cur_pe_start, cur_nptes, cur_dst;
-	uint64_t addr; /* next GPU address to be updated */
+	uint64_t addr, pe_start;
 	struct amdgpu_bo *pt;
-	unsigned nptes; /* next number of ptes to be updated */
-	uint64_t next_pe_start;
-
-	/* initialize the variables */
-	addr = start;
-	pt = amdgpu_vm_get_pt(params, addr);
-	if (!pt) {
-		pr_err("PT not found, aborting update_ptes\n");
-		return -EINVAL;
-	}
-
-	if (params->shadow) {
-		if (!pt->shadow)
-			return 0;
-		pt = pt->shadow;
-	}
-	if ((addr & ~mask) == (end & ~mask))
-		nptes = end - addr;
-	else
-		nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
-	cur_pe_start = amdgpu_bo_gpu_offset(pt);
-	cur_pe_start += (addr & mask) * 8;
-	cur_nptes = nptes;
-	cur_dst = dst;
-
-	/* for next ptb*/
-	addr += nptes;
-	dst += nptes * AMDGPU_GPU_PAGE_SIZE;
+	unsigned nptes;
 
 	/* walk over the address space and update the page tables */
-	while (addr < end) {
+	for (addr = start; addr < end; addr += nptes) {
 		pt = amdgpu_vm_get_pt(params, addr);
 		if (!pt) {
 			pr_err("PT not found, aborting update_ptes\n");
@@ -1243,33 +1214,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		else
 			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
 
-		next_pe_start = amdgpu_bo_gpu_offset(pt);
-		next_pe_start += (addr & mask) * 8;
-
-		if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
-		    ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
-			/* The next ptb is consecutive to current ptb.
-			 * Don't call the update function now.
-			 * Will update two ptbs together in future.
-			*/
-			cur_nptes += nptes;
-		} else {
-			params->func(params, cur_pe_start, cur_dst, cur_nptes,
-				     AMDGPU_GPU_PAGE_SIZE, flags);
+		pe_start = amdgpu_bo_gpu_offset(pt);
+		pe_start += (addr & mask) * 8;
 
-			cur_pe_start = next_pe_start;
-			cur_nptes = nptes;
-			cur_dst = dst;
-		}
+		params->func(params, pe_start, dst, nptes,
+			     AMDGPU_GPU_PAGE_SIZE, flags);
 
-		/* for next ptb*/
-		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	params->func(params, cur_pe_start, cur_dst, cur_nptes,
-		     AMDGPU_GPU_PAGE_SIZE, flags);
-
 	return 0;
 }
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM
       [not found] ` <1495012972-20698-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (4 preceding siblings ...)
  2017-05-17  9:22   ` [PATCH 6/7] drm/amdgpu: stop joining VM PTE updates Christian König
@ 2017-05-17  9:22   ` Christian König
       [not found]     ` <1495012972-20698-7-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-05-18  5:33   ` [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2 Zhang, Jerry (Junwei)
  6 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-05-17  9:22 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

The hardware can use huge pages to map 2MB of address space with only one PDE.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 96 +++++++++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 ++
 2 files changed, 76 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 860a669..8be1d7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 			entry->bo = pt;
 			entry->addr = 0;
+			entry->huge_page = false;
 		}
 
 		if (level < adev->vm_manager.num_level) {
@@ -1014,7 +1015,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 		pt = bo->tbo.mem.start << PAGE_SHIFT;
 		pt = amdgpu_gart_get_vm_pde(adev, pt);
-		if (parent->entries[pt_idx].addr == pt)
+		if (parent->entries[pt_idx].addr == pt ||
+		    parent->entries[pt_idx].huge_page)
 			continue;
 
 		parent->entries[pt_idx].addr = pt;
@@ -1146,29 +1148,70 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_find_pt - find the page table for an address
+ * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
  *
  * @p: see amdgpu_pte_update_params definition
  * @addr: virtual address in question
+ * @nptes: number of PTEs updated with this operation
+ * @dst: destination address where the PTEs should point to
+ * @flags: access flags fro the PTEs
+ * @bo: resulting page tables BO
  *
- * Find the page table BO for a virtual address, return NULL when none found.
+ * Check if we can update the PD with a huge page. Also finds the page table
+ * BO for a virtual address, returns -ENOENT when nothing found.
  */
-static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
-					  uint64_t addr)
+static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
+				       uint64_t addr, unsigned nptes,
+				       uint64_t dst, uint64_t flags,
+				       struct amdgpu_bo **bo)
 {
-	struct amdgpu_vm_pt *entry = &p->vm->root;
-	unsigned idx, level = p->adev->vm_manager.num_level;
+	unsigned pt_idx, level = p->adev->vm_manager.num_level;
+	struct amdgpu_vm_pt *entry = &p->vm->root, *parent;
+	uint64_t pd_addr, pde, pt;
 
-	while (entry->entries) {
-		idx = addr >> (p->adev->vm_manager.block_size * level--);
-		idx %= amdgpu_bo_size(entry->bo) / 8;
-		entry = &entry->entries[idx];
-	}
+	do {
+		pt_idx = addr >> (p->adev->vm_manager.block_size * level--);
+		pt_idx %= amdgpu_bo_size(entry->bo) / 8;
+		parent = entry;
+		entry = &entry->entries[pt_idx];
+	} while (entry->entries);
 
 	if (level)
-		return NULL;
+		return -ENOENT;
+
+	*bo = entry->bo;
+
+	/* In the case of a mixed PT the PDE must point to it*/
+	if (p->adev->asic_type < CHIP_VEGA10 ||
+	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
+	    p->func != amdgpu_vm_do_set_ptes ||
+	    !(flags & AMDGPU_PTE_VALID)) {
+
+		pt = (*bo)->tbo.mem.start << PAGE_SHIFT;
+		pt = amdgpu_gart_get_vm_pde(p->adev, pt);
+		flags = AMDGPU_PTE_VALID;
+	} else {
+		pt = amdgpu_gart_get_vm_pde(p->adev, dst);
+		flags |= AMDGPU_PDE_PTE;
+	}
 
-	return entry->bo;
+	if (entry->addr == pt &&
+	    entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
+		return 0;
+
+	entry->addr = pt;
+	entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
+
+	if (parent->bo->shadow) {
+		pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+		pde = pd_addr + pt_idx * 8;
+		amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
+	}
+
+	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+	pde = pd_addr + pt_idx * 8;
+	amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
+	return 0;
 }
 
 /**
@@ -1194,14 +1237,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	uint64_t addr, pe_start;
 	struct amdgpu_bo *pt;
 	unsigned nptes;
+	int r;
 
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; addr += nptes) {
-		pt = amdgpu_vm_get_pt(params, addr);
-		if (!pt) {
-			pr_err("PT not found, aborting update_ptes\n");
-			return -EINVAL;
-		}
+
+		if ((addr & ~mask) == (end & ~mask))
+			nptes = end - addr;
+		else
+			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
+
+		r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
+						dst, flags, &pt);
+		if (r)
+			return r;
 
 		if (params->shadow) {
 			if (!pt->shadow)
@@ -1209,11 +1258,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			pt = pt->shadow;
 		}
 
-		if ((addr & ~mask) == (end & ~mask))
-			nptes = end - addr;
-		else
-			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
 		pe_start = amdgpu_bo_gpu_offset(pt);
 		pe_start += (addr & mask) * 8;
 
@@ -1353,6 +1397,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;
 
+	/* one PDE write for each huge page */
+	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
+
 	if (src) {
 		/* only copy commands needed */
 		ndw += ncmds * 7;
@@ -1437,6 +1484,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
+	amdgpu_vm_invalidate_level(&vm->root);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index afe9073..1c5e0ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -68,6 +68,9 @@ struct amdgpu_bo_list_entry;
 /* TILED for VEGA10, reserved for older ASICs  */
 #define AMDGPU_PTE_PRT		(1ULL << 51)
 
+/* PDE is handled as PTE for VEGA10 */
+#define AMDGPU_PDE_PTE		(1ULL << 54)
+
 /* VEGA10 only */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
@@ -90,6 +93,7 @@ struct amdgpu_bo_list_entry;
 struct amdgpu_vm_pt {
 	struct amdgpu_bo	*bo;
 	uint64_t		addr;
+	bool			huge_page;
 
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt	*entries;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* RE: [PATCH 6/7] drm/amdgpu: stop joining VM PTE updates
       [not found]     ` <1495012972-20698-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-17 10:08       ` Zhou, David(ChunMing)
       [not found]         ` <MWHPR1201MB02064FC493616E242D75A2C6B4E70-3iK1xFAIwjrUF/YbdlDdgWrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Zhou, David(ChunMing) @ 2017-05-17 10:08 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


Patch#2: Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Patch#3: RB should be from Hawking, so Acked-by: Chunming Zhou <david1.zhou@amd.com>
Patch#5 #6, are Reviewed-by: Chunming Zhou <david1.zhou@amd.com>

For patch#7, further transfer +1 page table, I need a bit time of tomorrow.

Regards,
David Zhou

-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Christian K?nig
Sent: Wednesday, May 17, 2017 5:23 PM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 6/7] drm/amdgpu: stop joining VM PTE updates

From: Christian König <christian.koenig@amd.com>

This isn't beneficial any more since VRAM allocations are now split so that they fits into a single page table.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 61 ++++------------------------------
 1 file changed, 7 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6a926f4..860a669 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1191,41 +1191,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	struct amdgpu_device *adev = params->adev;
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
 
-	uint64_t cur_pe_start, cur_nptes, cur_dst;
-	uint64_t addr; /* next GPU address to be updated */
+	uint64_t addr, pe_start;
 	struct amdgpu_bo *pt;
-	unsigned nptes; /* next number of ptes to be updated */
-	uint64_t next_pe_start;
-
-	/* initialize the variables */
-	addr = start;
-	pt = amdgpu_vm_get_pt(params, addr);
-	if (!pt) {
-		pr_err("PT not found, aborting update_ptes\n");
-		return -EINVAL;
-	}
-
-	if (params->shadow) {
-		if (!pt->shadow)
-			return 0;
-		pt = pt->shadow;
-	}
-	if ((addr & ~mask) == (end & ~mask))
-		nptes = end - addr;
-	else
-		nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
-	cur_pe_start = amdgpu_bo_gpu_offset(pt);
-	cur_pe_start += (addr & mask) * 8;
-	cur_nptes = nptes;
-	cur_dst = dst;
-
-	/* for next ptb*/
-	addr += nptes;
-	dst += nptes * AMDGPU_GPU_PAGE_SIZE;
+	unsigned nptes;
 
 	/* walk over the address space and update the page tables */
-	while (addr < end) {
+	for (addr = start; addr < end; addr += nptes) {
 		pt = amdgpu_vm_get_pt(params, addr);
 		if (!pt) {
 			pr_err("PT not found, aborting update_ptes\n"); @@ -1243,33 +1214,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		else
 			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
 
-		next_pe_start = amdgpu_bo_gpu_offset(pt);
-		next_pe_start += (addr & mask) * 8;
-
-		if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
-		    ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
-			/* The next ptb is consecutive to current ptb.
-			 * Don't call the update function now.
-			 * Will update two ptbs together in future.
-			*/
-			cur_nptes += nptes;
-		} else {
-			params->func(params, cur_pe_start, cur_dst, cur_nptes,
-				     AMDGPU_GPU_PAGE_SIZE, flags);
+		pe_start = amdgpu_bo_gpu_offset(pt);
+		pe_start += (addr & mask) * 8;
 
-			cur_pe_start = next_pe_start;
-			cur_nptes = nptes;
-			cur_dst = dst;
-		}
+		params->func(params, pe_start, dst, nptes,
+			     AMDGPU_GPU_PAGE_SIZE, flags);
 
-		/* for next ptb*/
-		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	params->func(params, cur_pe_start, cur_dst, cur_nptes,
-		     AMDGPU_GPU_PAGE_SIZE, flags);
-
 	return 0;
 }
 
--
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM
       [not found]     ` <1495012972-20698-7-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-18  5:30       ` zhoucm1
       [not found]         ` <591D3164.7070105-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: zhoucm1 @ 2017-05-18  5:30 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2017年05月17日 17:22, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> The hardware can use huge pages to map 2MB of address space with only one PDE.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 96 +++++++++++++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 ++
>   2 files changed, 76 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 860a669..8be1d7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   
>   			entry->bo = pt;
>   			entry->addr = 0;
> +			entry->huge_page = false;
>   		}
>   
>   		if (level < adev->vm_manager.num_level) {
> @@ -1014,7 +1015,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   
>   		pt = bo->tbo.mem.start << PAGE_SHIFT;
>   		pt = amdgpu_gart_get_vm_pde(adev, pt);
> -		if (parent->entries[pt_idx].addr == pt)
> +		if (parent->entries[pt_idx].addr == pt ||
> +		    parent->entries[pt_idx].huge_page)
>   			continue;
>   
>   		parent->entries[pt_idx].addr = pt;
> @@ -1146,29 +1148,70 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>   }
>   
>   /**
> - * amdgpu_vm_find_pt - find the page table for an address
> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
>    *
>    * @p: see amdgpu_pte_update_params definition
>    * @addr: virtual address in question
> + * @nptes: number of PTEs updated with this operation
> + * @dst: destination address where the PTEs should point to
> + * @flags: access flags fro the PTEs
> + * @bo: resulting page tables BO
>    *
> - * Find the page table BO for a virtual address, return NULL when none found.
> + * Check if we can update the PD with a huge page. Also finds the page table
> + * BO for a virtual address, returns -ENOENT when nothing found.
>    */
> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
> -					  uint64_t addr)
> +static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
> +				       uint64_t addr, unsigned nptes,
> +				       uint64_t dst, uint64_t flags,
> +				       struct amdgpu_bo **bo)
>   {
> -	struct amdgpu_vm_pt *entry = &p->vm->root;
> -	unsigned idx, level = p->adev->vm_manager.num_level;
> +	unsigned pt_idx, level = p->adev->vm_manager.num_level;
> +	struct amdgpu_vm_pt *entry = &p->vm->root, *parent;
> +	uint64_t pd_addr, pde, pt;
>   
> -	while (entry->entries) {
> -		idx = addr >> (p->adev->vm_manager.block_size * level--);
> -		idx %= amdgpu_bo_size(entry->bo) / 8;
> -		entry = &entry->entries[idx];
> -	}
> +	do {
> +		pt_idx = addr >> (p->adev->vm_manager.block_size * level--);
> +		pt_idx %= amdgpu_bo_size(entry->bo) / 8;
> +		parent = entry;
> +		entry = &entry->entries[pt_idx];
> +	} while (entry->entries);
>   
>   	if (level)
> -		return NULL;
> +		return -ENOENT;
> +
> +	*bo = entry->bo;
> +
> +	/* In the case of a mixed PT the PDE must point to it*/
> +	if (p->adev->asic_type < CHIP_VEGA10 ||
> +	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
> +	    p->func != amdgpu_vm_do_set_ptes ||
> +	    !(flags & AMDGPU_PTE_VALID)) {
> +
> +		pt = (*bo)->tbo.mem.start << PAGE_SHIFT;
> +		pt = amdgpu_gart_get_vm_pde(p->adev, pt);
> +		flags = AMDGPU_PTE_VALID;
This case should be handled when updating levels, so return directly?
> +	} else {
> +		pt = amdgpu_gart_get_vm_pde(p->adev, dst);
> +		flags |= AMDGPU_PDE_PTE;
> +	}
>   
> -	return entry->bo;
> +	if (entry->addr == pt &&
> +	    entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
> +		return 0;
> +
> +	entry->addr = pt;
> +	entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
> +
> +	if (parent->bo->shadow) {
> +		pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
> +		pde = pd_addr + pt_idx * 8;
> +		amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
 From the spec "any pde in the chain can itself take on the format of a 
PTE and point directly to an aligned block of logical address space by 
setting the P bit.",
So here should pass addr into PDE instead of pt.
> +	}
> +
> +	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +	pde = pd_addr + pt_idx * 8;
> +	amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
Should pass addr into PDE instead of pt as well.


> +	return 0;
>   }
>   
>   /**
> @@ -1194,14 +1237,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   	uint64_t addr, pe_start;
>   	struct amdgpu_bo *pt;
>   	unsigned nptes;
> +	int r;
>   
>   	/* walk over the address space and update the page tables */
>   	for (addr = start; addr < end; addr += nptes) {
> -		pt = amdgpu_vm_get_pt(params, addr);
> -		if (!pt) {
> -			pr_err("PT not found, aborting update_ptes\n");
> -			return -EINVAL;
> -		}
> +
> +		if ((addr & ~mask) == (end & ~mask))
> +			nptes = end - addr;
> +		else
> +			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> +
> +		r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
> +						dst, flags, &pt);
If huge page happens, its sub PTEs don't need to update more, they 
cannot be indexed by page table when that PDE is PTE, right?

Btw: Is this BigK which people often said?

Regards,
David Zhou
> +		if (r)
> +			return r;
>   
>   		if (params->shadow) {
>   			if (!pt->shadow)
> @@ -1209,11 +1258,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   			pt = pt->shadow;
>   		}
>   
> -		if ((addr & ~mask) == (end & ~mask))
> -			nptes = end - addr;
> -		else
> -			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> -
>   		pe_start = amdgpu_bo_gpu_offset(pt);
>   		pe_start += (addr & mask) * 8;
>   
> @@ -1353,6 +1397,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	/* padding, etc. */
>   	ndw = 64;
>   
> +	/* one PDE write for each huge page */
> +	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
> +
>   	if (src) {
>   		/* only copy commands needed */
>   		ndw += ncmds * 7;
> @@ -1437,6 +1484,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   
>   error_free:
>   	amdgpu_job_free(job);
> +	amdgpu_vm_invalidate_level(&vm->root);
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index afe9073..1c5e0ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -68,6 +68,9 @@ struct amdgpu_bo_list_entry;
>   /* TILED for VEGA10, reserved for older ASICs  */
>   #define AMDGPU_PTE_PRT		(1ULL << 51)
>   
> +/* PDE is handled as PTE for VEGA10 */
> +#define AMDGPU_PDE_PTE		(1ULL << 54)
> +
>   /* VEGA10 only */
>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>   #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
> @@ -90,6 +93,7 @@ struct amdgpu_bo_list_entry;
>   struct amdgpu_vm_pt {
>   	struct amdgpu_bo	*bo;
>   	uint64_t		addr;
> +	bool			huge_page;
>   
>   	/* array of page tables, one for each directory entry */
>   	struct amdgpu_vm_pt	*entries;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2
       [not found] ` <1495012972-20698-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (5 preceding siblings ...)
  2017-05-17  9:22   ` [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM Christian König
@ 2017-05-18  5:33   ` Zhang, Jerry (Junwei)
       [not found]     ` <591D323A.5000304-5C7GfCeVMHo@public.gmane.org>
  6 siblings, 1 reply; 15+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-05-18  5:33 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 05/17/2017 05:22 PM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Rename adjust_mc_addr to get_vm_pde and check the address bits in one place.
>
> v2: handle vcn as well, keep setting the valid bit manually,
>      add a BUG_ON() for GMC v6, v7 and v8 as well.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  5 +++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 26 +++++++++-----------------
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  6 ++----
>   drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c  |  7 +++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  9 ++++++++-
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  9 ++++++++-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 10 ++++++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  6 ++----
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 12 ++++--------
>   drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  6 ++----
>   drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c  |  5 ++---
>   11 files changed, 53 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ae7e775..4acbca7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -309,8 +309,8 @@ struct amdgpu_gart_funcs {
>   	/* set pte flags based per asic */
>   	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
>   				     uint32_t flags);
> -	/* adjust mc addr in fb for APU case */
> -	u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr);
> +	/* get the pde for a given mc addr */
> +	u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr);
>   	uint32_t (*get_invalidate_req)(unsigned int vm_id);
>   };
>
> @@ -1820,6 +1820,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
>   #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
>   #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
> +#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr))
>   #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
>   #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
>   #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 88420dc..344f943 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -682,16 +682,6 @@ static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
>   	return false;
>   }
>
> -static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
> -{
> -	u64 addr = mc_addr;
> -
> -	if (adev->gart.gart_funcs->adjust_mc_addr)
> -		addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr);
> -
> -	return addr;
> -}
> -
>   bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
>   				  struct amdgpu_job *job)
>   {
> @@ -1034,18 +1024,18 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
>
>   			if (count) {
> -				uint64_t pt_addr =
> -					amdgpu_vm_adjust_mc_addr(adev, last_pt);
> +				uint64_t entry;
>
> +				entry = amdgpu_gart_get_vm_pde(adev, last_pt);
>   				if (shadow)
>   					amdgpu_vm_do_set_ptes(&params,
>   							      last_shadow,
> -							      pt_addr, count,
> +							      entry, count,
>   							      incr,
>   							      AMDGPU_PTE_VALID);
>
>   				amdgpu_vm_do_set_ptes(&params, last_pde,
> -						      pt_addr, count, incr,
> +						      entry, count, incr,
>   						      AMDGPU_PTE_VALID);
>   			}
>
> @@ -1059,13 +1049,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>   	}
>
>   	if (count) {
> -		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
> +		uint64_t entry;
> +
> +		entry = amdgpu_gart_get_vm_pde(adev, last_pt);
>
>   		if (vm->root.bo->shadow)
> -			amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
> +			amdgpu_vm_do_set_ptes(&params, last_shadow, entry,
>   					      count, incr, AMDGPU_PTE_VALID);
>
> -		amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
> +		amdgpu_vm_do_set_ptes(&params, last_pde, entry,
>   				      count, incr, AMDGPU_PTE_VALID);
>   	}
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 6dc75d2..e08a232 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3759,10 +3759,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>   	unsigned eng = ring->vm_inv_eng;
>
> -	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
> -	pd_addr = pd_addr | 0x1; /* valid bit */
> -	/* now only use physical base address of PDE and valid */
> -	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
> +	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
> +	pd_addr |= AMDGPU_PTE_VALID;
>
>   	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
>   				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index 1e6263a..d5f3d873 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -395,6 +395,12 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
>   	return pte_flag;
>   }
>
> +static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
> +{
> +	BUG_ON(addr & 0xFFFFF0000000FFFULL);
> +	return addr;
> +}

Just wonder why we didn't return the address as below?
adev->vm_manager.vram_base_offset + addr

Does that cause gmc6~gmc8 has no APU support officially?
Or I miss any info about them?

Jerry

> +
>   static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
>   					      bool value)
>   {
> @@ -1127,6 +1133,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = {
>   	.flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
>   	.set_pte_pde = gmc_v6_0_gart_set_pte_pde,
>   	.set_prt = gmc_v6_0_set_prt,
> +	.get_vm_pde = gmc_v6_0_get_vm_pde,
>   	.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 967505b..4f140e8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
>   	return pte_flag;
>   }
>
> +static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
> +{
> +	BUG_ON(addr & 0xFFFFF0000000FFFULL);
> +	return addr;
> +}
> +
>   /**
>    * gmc_v8_0_set_fault_enable_default - update VM fault handling
>    *
> @@ -1293,7 +1299,8 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = {
>   	.flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
>   	.set_pte_pde = gmc_v7_0_gart_set_pte_pde,
>   	.set_prt = gmc_v7_0_set_prt,
> -	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
> +	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
> +	.get_vm_pde = gmc_v7_0_get_vm_pde
>   };
>
>   static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 3b5ea0f..f05c034 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
>   	return pte_flag;
>   }
>
> +static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
> +{
> +	BUG_ON(addr & 0xFFF0000000000FFFULL);
> +	return addr;
> +}
> +
>   /**
>    * gmc_v8_0_set_fault_enable_default - update VM fault handling
>    *
> @@ -1612,7 +1618,8 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = {
>   	.flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
>   	.set_pte_pde = gmc_v8_0_gart_set_pte_pde,
>   	.set_prt = gmc_v8_0_set_prt,
> -	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
> +	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
> +	.get_vm_pde = gmc_v8_0_get_vm_pde
>   };
>
>   static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 19e1027..047b1a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -358,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
>   	return pte_flag;
>   }
>
> -static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
> +static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
>   {
> -	return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start;
> +	addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
> +	BUG_ON(addr & 0xFFFF00000000003FULL);
> +	return addr;
>   }
>
>   static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
>   	.flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
>   	.set_pte_pde = gmc_v9_0_gart_set_pte_pde,
> -	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
> -	.adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
>   	.get_invalidate_req = gmc_v9_0_get_invalidate_req,
> +	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
> +	.get_vm_pde = gmc_v9_0_get_vm_pde
>   };
>
>   static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 91cf7e6..fb6f4b3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1143,10 +1143,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>   	unsigned eng = ring->vm_inv_eng;
>
> -	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
> -	pd_addr = pd_addr | 0x1; /* valid bit */
> -	/* now only use physical base address of PDE and valid */
> -	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
> +	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
> +	pd_addr |= AMDGPU_PTE_VALID;
>
>   	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
>   			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index 22f42f3..1997ec8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -1316,10 +1316,8 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   	uint32_t data0, data1, mask;
>   	unsigned eng = ring->vm_inv_eng;
>
> -	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
> -	pd_addr = pd_addr | 0x1; /* valid bit */
> -	/* now only use physical base address of PDE and valid */
> -	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
> +	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
> +	pd_addr |= AMDGPU_PTE_VALID;
>
>   	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
>   	data1 = upper_32_bits(pd_addr);
> @@ -1358,10 +1356,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>   	unsigned eng = ring->vm_inv_eng;
>
> -	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
> -	pd_addr = pd_addr | 0x1; /* valid bit */
> -	/* now only use physical base address of PDE and valid */
> -	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
> +	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
> +	pd_addr |= AMDGPU_PTE_VALID;
>
>   	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
>   	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 07b2ac7..8dbd0b7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -926,10 +926,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
>   	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>   	unsigned eng = ring->vm_inv_eng;
>
> -	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
> -	pd_addr = pd_addr | 0x1; /* valid bit */
> -	/* now only use physical base address of PDE and valid */
> -	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
> +	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
> +	pd_addr |= AMDGPU_PTE_VALID;
>
>   	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
>   	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> index ad1862f..b2d995b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> @@ -882,9 +882,8 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   	uint32_t data0, data1, mask;
>   	unsigned eng = ring->vm_inv_eng;
>
> -	pd_addr = pd_addr | 0x1; /* valid bit */
> -	/* now only use physical base address of PDE and valid */
> -	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
> +	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
> +	pd_addr |= AMDGPU_PTE_VALID;
>
>   	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
>   	data1 = upper_32_bits(pd_addr);
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 6/7] drm/amdgpu: stop joining VM PTE updates
       [not found]         ` <MWHPR1201MB02064FC493616E242D75A2C6B4E70-3iK1xFAIwjrUF/YbdlDdgWrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2017-05-18  5:35           ` Zhang, Jerry (Junwei)
  0 siblings, 0 replies; 15+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-05-18  5:35 UTC (permalink / raw)
  To: Zhou, David(ChunMing),
	Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 05/17/2017 06:08 PM, Zhou, David(ChunMing) wrote:
>
> Patch#2: Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
> Patch#3: RB should be from Hawking, so Acked-by: Chunming Zhou <david1.zhou@amd.com>
> Patch#5 #6, are Reviewed-by: Chunming Zhou <david1.zhou@amd.com>

Feel free to add my RB about Patch #2 ~ #5

Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

Jerry

>
> For patch#7, further transfer +1 page table, I need a bit time of tomorrow.
>
> Regards,
> David Zhou
>
> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Christian K?nig
> Sent: Wednesday, May 17, 2017 5:23 PM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH 6/7] drm/amdgpu: stop joining VM PTE updates
>
> From: Christian König <christian.koenig@amd.com>
>
> This isn't beneficial any more since VRAM allocations are now split so that they fits into a single page table.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 61 ++++------------------------------
>   1 file changed, 7 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 6a926f4..860a669 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1191,41 +1191,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   	struct amdgpu_device *adev = params->adev;
>   	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
>
> -	uint64_t cur_pe_start, cur_nptes, cur_dst;
> -	uint64_t addr; /* next GPU address to be updated */
> +	uint64_t addr, pe_start;
>   	struct amdgpu_bo *pt;
> -	unsigned nptes; /* next number of ptes to be updated */
> -	uint64_t next_pe_start;
> -
> -	/* initialize the variables */
> -	addr = start;
> -	pt = amdgpu_vm_get_pt(params, addr);
> -	if (!pt) {
> -		pr_err("PT not found, aborting update_ptes\n");
> -		return -EINVAL;
> -	}
> -
> -	if (params->shadow) {
> -		if (!pt->shadow)
> -			return 0;
> -		pt = pt->shadow;
> -	}
> -	if ((addr & ~mask) == (end & ~mask))
> -		nptes = end - addr;
> -	else
> -		nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> -
> -	cur_pe_start = amdgpu_bo_gpu_offset(pt);
> -	cur_pe_start += (addr & mask) * 8;
> -	cur_nptes = nptes;
> -	cur_dst = dst;
> -
> -	/* for next ptb*/
> -	addr += nptes;
> -	dst += nptes * AMDGPU_GPU_PAGE_SIZE;
> +	unsigned nptes;
>
>   	/* walk over the address space and update the page tables */
> -	while (addr < end) {
> +	for (addr = start; addr < end; addr += nptes) {
>   		pt = amdgpu_vm_get_pt(params, addr);
>   		if (!pt) {
>   			pr_err("PT not found, aborting update_ptes\n"); @@ -1243,33 +1214,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>   		else
>   			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>
> -		next_pe_start = amdgpu_bo_gpu_offset(pt);
> -		next_pe_start += (addr & mask) * 8;
> -
> -		if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
> -		    ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
> -			/* The next ptb is consecutive to current ptb.
> -			 * Don't call the update function now.
> -			 * Will update two ptbs together in future.
> -			*/
> -			cur_nptes += nptes;
> -		} else {
> -			params->func(params, cur_pe_start, cur_dst, cur_nptes,
> -				     AMDGPU_GPU_PAGE_SIZE, flags);
> +		pe_start = amdgpu_bo_gpu_offset(pt);
> +		pe_start += (addr & mask) * 8;
>
> -			cur_pe_start = next_pe_start;
> -			cur_nptes = nptes;
> -			cur_dst = dst;
> -		}
> +		params->func(params, pe_start, dst, nptes,
> +			     AMDGPU_GPU_PAGE_SIZE, flags);
>
> -		/* for next ptb*/
> -		addr += nptes;
>   		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
>   	}
>
> -	params->func(params, cur_pe_start, cur_dst, cur_nptes,
> -		     AMDGPU_GPU_PAGE_SIZE, flags);
> -
>   	return 0;
>   }
>
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM
       [not found]         ` <591D3164.7070105-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-24  9:15           ` Christian König
       [not found]             ` <5efe2316-b0bb-799b-345e-120c040237ae-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-05-24  9:15 UTC (permalink / raw)
  To: zhoucm1, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 18.05.2017 um 07:30 schrieb zhoucm1:
>
>
> On 2017年05月17日 17:22, Christian König wrote:
>> [SNIP]
>> +    /* In the case of a mixed PT the PDE must point to it*/
>> +    if (p->adev->asic_type < CHIP_VEGA10 ||
>> +        nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
>> +        p->func != amdgpu_vm_do_set_ptes ||
>> +        !(flags & AMDGPU_PTE_VALID)) {
>> +
>> +        pt = (*bo)->tbo.mem.start << PAGE_SHIFT;
>> +        pt = amdgpu_gart_get_vm_pde(p->adev, pt);
>> +        flags = AMDGPU_PTE_VALID;
> This case should be handled when updating levels, so return directly?

The problem is that when we switch from huge page back to a normal 
mapping because the BO was evicted we also need to reset the PDE.

>> +    } else {
>> +        pt = amdgpu_gart_get_vm_pde(p->adev, dst);
>> +        flags |= AMDGPU_PDE_PTE;
>> +    }
>>   -    return entry->bo;
>> +    if (entry->addr == pt &&
>> +        entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
>> +        return 0;
>> +
>> +    entry->addr = pt;
>> +    entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
>> +
>> +    if (parent->bo->shadow) {
>> +        pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
>> +        pde = pd_addr + pt_idx * 8;
>> +        amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
> From the spec "any pde in the chain can itself take on the format of a 
> PTE and point directly to an aligned block of logical address space by 
> setting the P bit.",
> So here should pass addr into PDE instead of pt.

The pt variable was overwritten with the address a few lines above. The 
code was indeed hard to understand, so I've fixed it.

>> +    }
>> +
>> +    pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>> +    pde = pd_addr + pt_idx * 8;
>> +    amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
> Should pass addr into PDE instead of pt as well.
>
>
>> +    return 0;
>>   }
>>     /**
>> @@ -1194,14 +1237,20 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_pte_update_params *params,
>>       uint64_t addr, pe_start;
>>       struct amdgpu_bo *pt;
>>       unsigned nptes;
>> +    int r;
>>         /* walk over the address space and update the page tables */
>>       for (addr = start; addr < end; addr += nptes) {
>> -        pt = amdgpu_vm_get_pt(params, addr);
>> -        if (!pt) {
>> -            pr_err("PT not found, aborting update_ptes\n");
>> -            return -EINVAL;
>> -        }
>> +
>> +        if ((addr & ~mask) == (end & ~mask))
>> +            nptes = end - addr;
>> +        else
>> +            nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>> +
>> +        r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
>> +                        dst, flags, &pt);
> If huge page happens, its sub PTEs don't need to update more, they 
> cannot be indexed by page table when that PDE is PTE, right?

Right, but I wasn't sure if we don't need them for something. So I've 
kept the update for now.

Going to try dropping this today.

> Btw: Is this BigK which people often said?

Yes and no. I can explain more internally if you like.

Regards,
Christian.

>
> Regards,
> David Zhou
>> +        if (r)
>> +            return r;
>>             if (params->shadow) {
>>               if (!pt->shadow)
>> @@ -1209,11 +1258,6 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_pte_update_params *params,
>>               pt = pt->shadow;
>>           }
>>   -        if ((addr & ~mask) == (end & ~mask))
>> -            nptes = end - addr;
>> -        else
>> -            nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>> -
>>           pe_start = amdgpu_bo_gpu_offset(pt);
>>           pe_start += (addr & mask) * 8;
>>   @@ -1353,6 +1397,9 @@ static int amdgpu_vm_bo_update_mapping(struct 
>> amdgpu_device *adev,
>>       /* padding, etc. */
>>       ndw = 64;
>>   +    /* one PDE write for each huge page */
>> +    ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
>> +
>>       if (src) {
>>           /* only copy commands needed */
>>           ndw += ncmds * 7;
>> @@ -1437,6 +1484,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
>> amdgpu_device *adev,
>>     error_free:
>>       amdgpu_job_free(job);
>> +    amdgpu_vm_invalidate_level(&vm->root);
>>       return r;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index afe9073..1c5e0ce 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -68,6 +68,9 @@ struct amdgpu_bo_list_entry;
>>   /* TILED for VEGA10, reserved for older ASICs  */
>>   #define AMDGPU_PTE_PRT        (1ULL << 51)
>>   +/* PDE is handled as PTE for VEGA10 */
>> +#define AMDGPU_PDE_PTE        (1ULL << 54)
>> +
>>   /* VEGA10 only */
>>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>>   #define AMDGPU_PTE_MTYPE_MASK    AMDGPU_PTE_MTYPE(3ULL)
>> @@ -90,6 +93,7 @@ struct amdgpu_bo_list_entry;
>>   struct amdgpu_vm_pt {
>>       struct amdgpu_bo    *bo;
>>       uint64_t        addr;
>> +    bool            huge_page;
>>         /* array of page tables, one for each directory entry */
>>       struct amdgpu_vm_pt    *entries;
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2
       [not found]     ` <591D323A.5000304-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-24  9:18       ` Christian König
       [not found]         ` <b9c0c5b3-a42c-b2e8-0b84-718807d5e7cf-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-05-24  9:18 UTC (permalink / raw)
  To: Zhang, Jerry (Junwei), amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 18.05.2017 um 07:33 schrieb Zhang, Jerry (Junwei):
> On 05/17/2017 05:22 PM, Christian König wrote:
>> [SNIP]
>> +static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, 
>> uint64_t addr)
>> +{
>> +    BUG_ON(addr & 0xFFFFF0000000FFFULL);
>> +    return addr;
>> +}
>
> Just wonder why we didn't return the address as below?
> adev->vm_manager.vram_base_offset + addr

That is a really good question and I couldn't figure out all the details 
either.

I've tested it on an Kaveri and it still seems to work fine, so the 
logic is indeed correct.

> Does that cause gmc6~gmc8 has no APU support officially?

No, only the pro driver doesn't support APUs. The open source driver 
works perfectly fine on them.

> Or I miss any info about them?

The documentation for gfx6-8 is very unclear on this.

It says that the registers and PDE should use physical addresses, but 
from my testing that isn't the case.

Going to ping the hardware dev on this once more when I have time.

Regards,
Christian.

>
> Jerry
>
>> +
>>   static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device 
>> *adev,
>>                             bool value)
>>   {
>> @@ -1127,6 +1133,7 @@ static const struct amdgpu_gart_funcs 
>> gmc_v6_0_gart_funcs = {
>>       .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
>>       .set_pte_pde = gmc_v6_0_gart_set_pte_pde,
>>       .set_prt = gmc_v6_0_set_prt,
>> +    .get_vm_pde = gmc_v6_0_get_vm_pde,
>>       .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
>>   };
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index 967505b..4f140e8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct 
>> amdgpu_device *adev,
>>       return pte_flag;
>>   }
>>
>> +static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, 
>> uint64_t addr)
>> +{
>> +    BUG_ON(addr & 0xFFFFF0000000FFFULL);
>> +    return addr;
>> +}
>> +
>>   /**
>>    * gmc_v8_0_set_fault_enable_default - update VM fault handling
>>    *
>> @@ -1293,7 +1299,8 @@ static const struct amdgpu_gart_funcs 
>> gmc_v7_0_gart_funcs = {
>>       .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
>>       .set_pte_pde = gmc_v7_0_gart_set_pte_pde,
>>       .set_prt = gmc_v7_0_set_prt,
>> -    .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
>> +    .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
>> +    .get_vm_pde = gmc_v7_0_get_vm_pde
>>   };
>>
>>   static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 3b5ea0f..f05c034 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct 
>> amdgpu_device *adev,
>>       return pte_flag;
>>   }
>>
>> +static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, 
>> uint64_t addr)
>> +{
>> +    BUG_ON(addr & 0xFFF0000000000FFFULL);
>> +    return addr;
>> +}
>> +
>>   /**
>>    * gmc_v8_0_set_fault_enable_default - update VM fault handling
>>    *
>> @@ -1612,7 +1618,8 @@ static const struct amdgpu_gart_funcs 
>> gmc_v8_0_gart_funcs = {
>>       .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
>>       .set_pte_pde = gmc_v8_0_gart_set_pte_pde,
>>       .set_prt = gmc_v8_0_set_prt,
>> -    .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
>> +    .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
>> +    .get_vm_pde = gmc_v8_0_get_vm_pde
>>   };
>>
>>   static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 19e1027..047b1a7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -358,17 +358,19 @@ static uint64_t 
>> gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
>>       return pte_flag;
>>   }
>>
>> -static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 
>> mc_addr)
>> +static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
>>   {
>> -    return adev->vm_manager.vram_base_offset + mc_addr - 
>> adev->mc.vram_start;
>> +    addr = adev->vm_manager.vram_base_offset + addr - 
>> adev->mc.vram_start;
>> +    BUG_ON(addr & 0xFFFF00000000003FULL);
>> +    return addr;
>>   }
>>
>>   static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
>>       .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
>>       .set_pte_pde = gmc_v9_0_gart_set_pte_pde,
>> -    .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
>> -    .adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
>>       .get_invalidate_req = gmc_v9_0_get_invalidate_req,
>> +    .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
>> +    .get_vm_pde = gmc_v9_0_get_vm_pde
>>   };
>>
>>   static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> index 91cf7e6..fb6f4b3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>> @@ -1143,10 +1143,8 @@ static void 
>> sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>       uint32_t req = 
>> ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>>       unsigned eng = ring->vm_inv_eng;
>>
>> -    pd_addr = 
>> ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>> -    /* now only use physical base address of PDE and valid */
>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>> +    pd_addr |= AMDGPU_PTE_VALID;
>>
>>       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
>>                 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>> index 22f42f3..1997ec8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>> @@ -1316,10 +1316,8 @@ static void uvd_v7_0_ring_emit_vm_flush(struct 
>> amdgpu_ring *ring,
>>       uint32_t data0, data1, mask;
>>       unsigned eng = ring->vm_inv_eng;
>>
>> -    pd_addr = 
>> ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>> -    /* now only use physical base address of PDE and valid */
>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>> +    pd_addr |= AMDGPU_PTE_VALID;
>>
>>       data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
>>       data1 = upper_32_bits(pd_addr);
>> @@ -1358,10 +1356,8 @@ static void 
>> uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>       uint32_t req = 
>> ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>>       unsigned eng = ring->vm_inv_eng;
>>
>> -    pd_addr = 
>> ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>> -    /* now only use physical base address of PDE and valid */
>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>> +    pd_addr |= AMDGPU_PTE_VALID;
>>
>>       amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
>>       amdgpu_ring_write(ring,    (hub->ctx0_ptb_addr_hi32 + vm_id * 
>> 2) << 2);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>> index 07b2ac7..8dbd0b7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>> @@ -926,10 +926,8 @@ static void vce_v4_0_emit_vm_flush(struct 
>> amdgpu_ring *ring,
>>       uint32_t req = 
>> ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>>       unsigned eng = ring->vm_inv_eng;
>>
>> -    pd_addr = 
>> ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>> -    /* now only use physical base address of PDE and valid */
>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>> +    pd_addr |= AMDGPU_PTE_VALID;
>>
>>       amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
>>       amdgpu_ring_write(ring,    (hub->ctx0_ptb_addr_hi32 + vm_id * 
>> 2) << 2);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>> index ad1862f..b2d995b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>> @@ -882,9 +882,8 @@ static void 
>> vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>       uint32_t data0, data1, mask;
>>       unsigned eng = ring->vm_inv_eng;
>>
>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>> -    /* now only use physical base address of PDE and valid */
>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>> +    pd_addr |= AMDGPU_PTE_VALID;
>>
>>       data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
>>       data1 = upper_32_bits(pd_addr);
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM
       [not found]             ` <5efe2316-b0bb-799b-345e-120c040237ae-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-24  9:28               ` zhoucm1
  0 siblings, 0 replies; 15+ messages in thread
From: zhoucm1 @ 2017-05-24  9:28 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 5491 bytes --]



On 2017年05月24日 17:15, Christian König wrote:
> Am 18.05.2017 um 07:30 schrieb zhoucm1:
>>
>>
>> On 2017年05月17日 17:22, Christian König wrote:
>>> [SNIP]
>>> +    /* In the case of a mixed PT the PDE must point to it*/
>>> +    if (p->adev->asic_type < CHIP_VEGA10 ||
>>> +        nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
>>> +        p->func != amdgpu_vm_do_set_ptes ||
>>> +        !(flags & AMDGPU_PTE_VALID)) {
>>> +
>>> +        pt = (*bo)->tbo.mem.start << PAGE_SHIFT;
>>> +        pt = amdgpu_gart_get_vm_pde(p->adev, pt);
>>> +        flags = AMDGPU_PTE_VALID;
>> This case should be handled when updating levels, so return directly?
>
> The problem is that when we switch from huge page back to a normal 
> mapping because the BO was evicted we also need to reset the PDE.
>
>>> +    } else {
>>> +        pt = amdgpu_gart_get_vm_pde(p->adev, dst);
>>> +        flags |= AMDGPU_PDE_PTE;
>>> +    }
>>>   -    return entry->bo;
>>> +    if (entry->addr == pt &&
>>> +        entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
>>> +        return 0;
>>> +
>>> +    entry->addr = pt;
>>> +    entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
>>> +
>>> +    if (parent->bo->shadow) {
>>> +        pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
>>> +        pde = pd_addr + pt_idx * 8;
>>> +        amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
>> From the spec "any pde in the chain can itself take on the format of 
>> a PTE and point directly to an aligned block of logical address space 
>> by setting the P bit.",
>> So here should pass addr into PDE instead of pt.
>
> The pt variable was overwritten with the address a few lines above. 
> The code was indeed hard to understand, so I've fixed it.
this isn't my mean, if I understand spec correctly, it should be 
amdgpu_vm_do_set_ptes(p, pde, addr, 1, 0, flags);

>
>>> +    }
>>> +
>>> +    pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>>> +    pde = pd_addr + pt_idx * 8;
>>> +    amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
>> Should pass addr into PDE instead of pt as well.
>>
>>
>>> +    return 0;
>>>   }
>>>     /**
>>> @@ -1194,14 +1237,20 @@ static int amdgpu_vm_update_ptes(struct 
>>> amdgpu_pte_update_params *params,
>>>       uint64_t addr, pe_start;
>>>       struct amdgpu_bo *pt;
>>>       unsigned nptes;
>>> +    int r;
>>>         /* walk over the address space and update the page tables */
>>>       for (addr = start; addr < end; addr += nptes) {
>>> -        pt = amdgpu_vm_get_pt(params, addr);
>>> -        if (!pt) {
>>> -            pr_err("PT not found, aborting update_ptes\n");
>>> -            return -EINVAL;
>>> -        }
>>> +
>>> +        if ((addr & ~mask) == (end & ~mask))
>>> +            nptes = end - addr;
>>> +        else
>>> +            nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>>> +
>>> +        r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
>>> +                        dst, flags, &pt);
>> If huge page happens, its sub PTEs don't need to update more, they 
>> cannot be indexed by page table when that PDE is PTE, right?
>
> Right, but I wasn't sure if we don't need them for something. So I've 
> kept the update for now.
>
> Going to try dropping this today.
>
>> Btw: Is this BigK which people often said?
>
> Yes and no. I can explain more internally if you like.
Welcome.

Regards,
David Zhou
>
> Regards,
> Christian.
>
>>
>> Regards,
>> David Zhou
>>> +        if (r)
>>> +            return r;
>>>             if (params->shadow) {
>>>               if (!pt->shadow)
>>> @@ -1209,11 +1258,6 @@ static int amdgpu_vm_update_ptes(struct 
>>> amdgpu_pte_update_params *params,
>>>               pt = pt->shadow;
>>>           }
>>>   -        if ((addr & ~mask) == (end & ~mask))
>>> -            nptes = end - addr;
>>> -        else
>>> -            nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>>> -
>>>           pe_start = amdgpu_bo_gpu_offset(pt);
>>>           pe_start += (addr & mask) * 8;
>>>   @@ -1353,6 +1397,9 @@ static int 
>>> amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>       /* padding, etc. */
>>>       ndw = 64;
>>>   +    /* one PDE write for each huge page */
>>> +    ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
>>> +
>>>       if (src) {
>>>           /* only copy commands needed */
>>>           ndw += ncmds * 7;
>>> @@ -1437,6 +1484,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
>>> amdgpu_device *adev,
>>>     error_free:
>>>       amdgpu_job_free(job);
>>> +    amdgpu_vm_invalidate_level(&vm->root);
>>>       return r;
>>>   }
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index afe9073..1c5e0ce 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -68,6 +68,9 @@ struct amdgpu_bo_list_entry;
>>>   /* TILED for VEGA10, reserved for older ASICs  */
>>>   #define AMDGPU_PTE_PRT        (1ULL << 51)
>>>   +/* PDE is handled as PTE for VEGA10 */
>>> +#define AMDGPU_PDE_PTE        (1ULL << 54)
>>> +
>>>   /* VEGA10 only */
>>>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>>>   #define AMDGPU_PTE_MTYPE_MASK    AMDGPU_PTE_MTYPE(3ULL)
>>> @@ -90,6 +93,7 @@ struct amdgpu_bo_list_entry;
>>>   struct amdgpu_vm_pt {
>>>       struct amdgpu_bo    *bo;
>>>       uint64_t        addr;
>>> +    bool            huge_page;
>>>         /* array of page tables, one for each directory entry */
>>>       struct amdgpu_vm_pt    *entries;
>>
>


[-- Attachment #1.2: Type: text/html, Size: 10176 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2
       [not found]         ` <b9c0c5b3-a42c-b2e8-0b84-718807d5e7cf-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-25  2:10           ` Zhang, Jerry (Junwei)
  0 siblings, 0 replies; 15+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-05-25  2:10 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 05/24/2017 05:18 PM, Christian König wrote:
> Am 18.05.2017 um 07:33 schrieb Zhang, Jerry (Junwei):
>> On 05/17/2017 05:22 PM, Christian König wrote:
>>> [SNIP]
>>> +static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
>>> +{
>>> +    BUG_ON(addr & 0xFFFFF0000000FFFULL);
>>> +    return addr;
>>> +}
>>
>> Just wonder why we didn't return the address as below?
>> adev->vm_manager.vram_base_offset + addr
>
> That is a really good question and I couldn't figure out all the details either.
>
> I've tested it on an Kaveri and it still seems to work fine, so the logic is
> indeed correct.

Thanks for your verification.

Then it's a makeshift change for now, although it looks a little suspicious.
Let's dig it more in the future with HW team.
Please feel free to add my RB for the v3 one.

Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

Jerry

>
>> Does that cause gmc6~gmc8 has no APU support officially?
>
> No, only the pro driver doesn't support APUs. The open source driver works
> perfectly fine on them.
>
>> Or I miss any info about them?
>
> The documentation for gfx6-8 is very unclear on this.
>
> It says that the registers and PDE should use physical addresses, but from my
> testing that isn't the case.
>
> Going to ping the hardware dev on this once more when I have time.
>
> Regards,
> Christian.
>
>>
>> Jerry
>>
>>> +
>>>   static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
>>>                             bool value)
>>>   {
>>> @@ -1127,6 +1133,7 @@ static const struct amdgpu_gart_funcs
>>> gmc_v6_0_gart_funcs = {
>>>       .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
>>>       .set_pte_pde = gmc_v6_0_gart_set_pte_pde,
>>>       .set_prt = gmc_v6_0_set_prt,
>>> +    .get_vm_pde = gmc_v6_0_get_vm_pde,
>>>       .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
>>>   };
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> index 967505b..4f140e8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>> @@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct
>>> amdgpu_device *adev,
>>>       return pte_flag;
>>>   }
>>>
>>> +static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
>>> +{
>>> +    BUG_ON(addr & 0xFFFFF0000000FFFULL);
>>> +    return addr;
>>> +}
>>> +
>>>   /**
>>>    * gmc_v8_0_set_fault_enable_default - update VM fault handling
>>>    *
>>> @@ -1293,7 +1299,8 @@ static const struct amdgpu_gart_funcs
>>> gmc_v7_0_gart_funcs = {
>>>       .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
>>>       .set_pte_pde = gmc_v7_0_gart_set_pte_pde,
>>>       .set_prt = gmc_v7_0_set_prt,
>>> -    .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
>>> +    .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
>>> +    .get_vm_pde = gmc_v7_0_get_vm_pde
>>>   };
>>>
>>>   static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> index 3b5ea0f..f05c034 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>> @@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct
>>> amdgpu_device *adev,
>>>       return pte_flag;
>>>   }
>>>
>>> +static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
>>> +{
>>> +    BUG_ON(addr & 0xFFF0000000000FFFULL);
>>> +    return addr;
>>> +}
>>> +
>>>   /**
>>>    * gmc_v8_0_set_fault_enable_default - update VM fault handling
>>>    *
>>> @@ -1612,7 +1618,8 @@ static const struct amdgpu_gart_funcs
>>> gmc_v8_0_gart_funcs = {
>>>       .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
>>>       .set_pte_pde = gmc_v8_0_gart_set_pte_pde,
>>>       .set_prt = gmc_v8_0_set_prt,
>>> -    .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
>>> +    .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
>>> +    .get_vm_pde = gmc_v8_0_get_vm_pde
>>>   };
>>>
>>>   static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index 19e1027..047b1a7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -358,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct
>>> amdgpu_device *adev,
>>>       return pte_flag;
>>>   }
>>>
>>> -static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
>>> +static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
>>>   {
>>> -    return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start;
>>> +    addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
>>> +    BUG_ON(addr & 0xFFFF00000000003FULL);
>>> +    return addr;
>>>   }
>>>
>>>   static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
>>>       .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
>>>       .set_pte_pde = gmc_v9_0_gart_set_pte_pde,
>>> -    .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
>>> -    .adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
>>>       .get_invalidate_req = gmc_v9_0_get_invalidate_req,
>>> +    .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
>>> +    .get_vm_pde = gmc_v9_0_get_vm_pde
>>>   };
>>>
>>>   static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> index 91cf7e6..fb6f4b3 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> @@ -1143,10 +1143,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct
>>> amdgpu_ring *ring,
>>>       uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>>>       unsigned eng = ring->vm_inv_eng;
>>>
>>> -    pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev,
>>> pd_addr);
>>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>>> -    /* now only use physical base address of PDE and valid */
>>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>>> +    pd_addr |= AMDGPU_PTE_VALID;
>>>
>>>       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
>>>                 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> index 22f42f3..1997ec8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> @@ -1316,10 +1316,8 @@ static void uvd_v7_0_ring_emit_vm_flush(struct
>>> amdgpu_ring *ring,
>>>       uint32_t data0, data1, mask;
>>>       unsigned eng = ring->vm_inv_eng;
>>>
>>> -    pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev,
>>> pd_addr);
>>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>>> -    /* now only use physical base address of PDE and valid */
>>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>>> +    pd_addr |= AMDGPU_PTE_VALID;
>>>
>>>       data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
>>>       data1 = upper_32_bits(pd_addr);
>>> @@ -1358,10 +1356,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct
>>> amdgpu_ring *ring,
>>>       uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>>>       unsigned eng = ring->vm_inv_eng;
>>>
>>> -    pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev,
>>> pd_addr);
>>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>>> -    /* now only use physical base address of PDE and valid */
>>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>>> +    pd_addr |= AMDGPU_PTE_VALID;
>>>
>>>       amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
>>>       amdgpu_ring_write(ring,    (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>>> index 07b2ac7..8dbd0b7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
>>> @@ -926,10 +926,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring
>>> *ring,
>>>       uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>>>       unsigned eng = ring->vm_inv_eng;
>>>
>>> -    pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev,
>>> pd_addr);
>>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>>> -    /* now only use physical base address of PDE and valid */
>>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>>> +    pd_addr |= AMDGPU_PTE_VALID;
>>>
>>>       amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
>>>       amdgpu_ring_write(ring,    (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>>> index ad1862f..b2d995b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>>> @@ -882,9 +882,8 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct
>>> amdgpu_ring *ring,
>>>       uint32_t data0, data1, mask;
>>>       unsigned eng = ring->vm_inv_eng;
>>>
>>> -    pd_addr = pd_addr | 0x1; /* valid bit */
>>> -    /* now only use physical base address of PDE and valid */
>>> -    BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>>> +    pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
>>> +    pd_addr |= AMDGPU_PTE_VALID;
>>>
>>>       data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
>>>       data1 = upper_32_bits(pd_addr);
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-05-25  2:10 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-17  9:22 [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2 Christian König
     [not found] ` <1495012972-20698-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-17  9:22   ` [PATCH 2/7] drm/amdgpu: add some extra VM error handling Christian König
2017-05-17  9:22   ` [PATCH 3/7] drm/amdgpu: fix another fundamental VM bug Christian König
2017-05-17  9:22   ` [PATCH 4/7] drm/amdgpu: Return EINVAL if no PT BO Christian König
2017-05-17  9:22   ` [PATCH 5/7] drm/amdgpu: cache the complete pde Christian König
2017-05-17  9:22   ` [PATCH 6/7] drm/amdgpu: stop joining VM PTE updates Christian König
     [not found]     ` <1495012972-20698-6-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-17 10:08       ` Zhou, David(ChunMing)
     [not found]         ` <MWHPR1201MB02064FC493616E242D75A2C6B4E70-3iK1xFAIwjrUF/YbdlDdgWrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-05-18  5:35           ` Zhang, Jerry (Junwei)
2017-05-17  9:22   ` [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM Christian König
     [not found]     ` <1495012972-20698-7-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-18  5:30       ` zhoucm1
     [not found]         ` <591D3164.7070105-5C7GfCeVMHo@public.gmane.org>
2017-05-24  9:15           ` Christian König
     [not found]             ` <5efe2316-b0bb-799b-345e-120c040237ae-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-24  9:28               ` zhoucm1
2017-05-18  5:33   ` [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v2 Zhang, Jerry (Junwei)
     [not found]     ` <591D323A.5000304-5C7GfCeVMHo@public.gmane.org>
2017-05-24  9:18       ` Christian König
     [not found]         ` <b9c0c5b3-a42c-b2e8-0b84-718807d5e7cf-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-25  2:10           ` Zhang, Jerry (Junwei)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.