All of lore.kernel.org
 help / color / mirror / Atom feed
* Huge page support for Vega10
@ 2017-05-23 16:52 Christian König
       [not found] ` <1495558374-3513-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2017-05-23 16:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

another round of this patch set, I've dropped all not necessary patches.

Going to handle all the cleanups later on when I have more time to test.

Anyway huge pages now at least not cause any any performance regression any more, but they still doesn't seem to have the desired effect speedup effect. I probably need to dig deeper into TLB handling.

Additional to that I didn't had time to answer all the questions on the last set in detail, going to do so tomorrow.

Regards,
Christian.

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v3
       [not found] ` <1495558374-3513-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-23 16:52   ` Christian König
       [not found]     ` <1495558374-3513-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-05-23 16:52   ` [PATCH 2/7] drm/amdgpu: add some extra VM error handling Christian König
                     ` (5 subsequent siblings)
  6 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2017-05-23 16:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Rename adjust_mc_addr to get_vm_pde and check the address bits in one place.

v2: handle vcn as well, keep setting the valid bit manually,
    add a BUG_ON() for GMC v6, v7 and v8 as well.
v3: handle vcn_v1_0_enc_ring_emit_vm_flush as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 26 +++++++++-----------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  6 ++----
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c  |  7 +++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c  |  9 ++++++++-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  |  9 ++++++++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  6 ++----
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 12 ++++--------
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  6 ++----
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c  | 12 ++++--------
 11 files changed, 55 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5f7c408..72f4fad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -309,8 +309,8 @@ struct amdgpu_gart_funcs {
 	/* set pte flags based per asic */
 	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
 				     uint32_t flags);
-	/* adjust mc addr in fb for APU case */
-	u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr);
+	/* get the pde for a given mc addr */
+	u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr);
 	uint32_t (*get_invalidate_req)(unsigned int vm_id);
 };
 
@@ -1820,6 +1820,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
+#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3ecde81..c119032 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -682,16 +682,6 @@ static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
 	return false;
 }
 
-static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
-{
-	u64 addr = mc_addr;
-
-	if (adev->gart.gart_funcs->adjust_mc_addr)
-		addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr);
-
-	return addr;
-}
-
 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 				  struct amdgpu_job *job)
 {
@@ -1033,18 +1023,18 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
 
 			if (count) {
-				uint64_t pt_addr =
-					amdgpu_vm_adjust_mc_addr(adev, last_pt);
+				uint64_t entry;
 
+				entry = amdgpu_gart_get_vm_pde(adev, last_pt);
 				if (shadow)
 					amdgpu_vm_do_set_ptes(&params,
 							      last_shadow,
-							      pt_addr, count,
+							      entry, count,
 							      incr,
 							      AMDGPU_PTE_VALID);
 
 				amdgpu_vm_do_set_ptes(&params, last_pde,
-						      pt_addr, count, incr,
+						      entry, count, incr,
 						      AMDGPU_PTE_VALID);
 			}
 
@@ -1058,13 +1048,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	}
 
 	if (count) {
-		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
+		uint64_t entry;
+
+		entry = amdgpu_gart_get_vm_pde(adev, last_pt);
 
 		if (vm->root.bo->shadow)
-			amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr,
+			amdgpu_vm_do_set_ptes(&params, last_shadow, entry,
 					      count, incr, AMDGPU_PTE_VALID);
 
-		amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr,
+		amdgpu_vm_do_set_ptes(&params, last_pde, entry,
 				      count, incr, AMDGPU_PTE_VALID);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e7130cd..843578e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3771,10 +3771,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
 				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 1e6263a..d5f3d873 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -395,6 +395,12 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
+static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+	BUG_ON(addr & 0xFFFFF0000000FFFULL);
+	return addr;
+}
+
 static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
 					      bool value)
 {
@@ -1127,6 +1133,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v6_0_gart_set_pte_pde,
 	.set_prt = gmc_v6_0_set_prt,
+	.get_vm_pde = gmc_v6_0_get_vm_pde,
 	.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 967505b..4f140e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
+static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+	BUG_ON(addr & 0xFFFFF0000000FFFULL);
+	return addr;
+}
+
 /**
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  *
@@ -1293,7 +1299,8 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v7_0_gart_set_pte_pde,
 	.set_prt = gmc_v7_0_set_prt,
-	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags
+	.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
+	.get_vm_pde = gmc_v7_0_get_vm_pde
 };
 
 static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 3b5ea0f..f05c034 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
+static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+{
+	BUG_ON(addr & 0xFFF0000000000FFFULL);
+	return addr;
+}
+
 /**
  * gmc_v8_0_set_fault_enable_default - update VM fault handling
  *
@@ -1612,7 +1618,8 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v8_0_gart_set_pte_pde,
 	.set_prt = gmc_v8_0_set_prt,
-	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags
+	.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
+	.get_vm_pde = gmc_v8_0_get_vm_pde
 };
 
 static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 19e1027..047b1a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -358,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
+static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
 {
-	return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start;
+	addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
+	BUG_ON(addr & 0xFFFF00000000003FULL);
+	return addr;
 }
 
 static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
 	.flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
 	.set_pte_pde = gmc_v9_0_gart_set_pte_pde,
-	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
-	.adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
 	.get_invalidate_req = gmc_v9_0_get_invalidate_req,
+	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
+	.get_vm_pde = gmc_v9_0_get_vm_pde
 };
 
 static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 332f2fd..73003fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1143,10 +1143,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 74e3f23..dd9ec81 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1316,10 +1316,8 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t data0, data1, mask;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
 	data1 = upper_32_bits(pd_addr);
@@ -1358,10 +1356,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 0012835..0b7fcc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -926,10 +926,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index e8c8c78e..ec33e8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -882,10 +882,8 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t data0, data1, mask;
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
 	data1 = upper_32_bits(pd_addr);
@@ -1015,10 +1013,8 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->vm_inv_eng;
 
-	pd_addr = ring->adev->gart.gart_funcs->adjust_mc_addr(ring->adev, pd_addr);
-	pd_addr = pd_addr | 0x1; /* valid bit */
-	/* now only use physical base address of PDE and valid */
-	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
+	pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
+	pd_addr |= AMDGPU_PTE_VALID;
 
 	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
 	amdgpu_ring_write(ring,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/7] drm/amdgpu: add some extra VM error handling
       [not found] ` <1495558374-3513-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-05-23 16:52   ` [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v3 Christian König
@ 2017-05-23 16:52   ` Christian König
  2017-05-23 16:52   ` [PATCH 3/7] drm/amdgpu: Return EINVAL if no PT BO Christian König
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2017-05-23 16:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

If updating the PDs fails we now invalidate all entries to try again later.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c119032..6af2d3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1104,6 +1104,32 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 }
 
 /*
+ * amdgpu_vm_invalidate_level - mark all PD levels as invalid
+ *
+ * @parent: parent PD
+ *
+ * Mark all PD level as invalid after an error.
+ */
+static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
+{
+	unsigned pt_idx;
+
+	/*
+	 * Recurse into the subdirectories. This recursion is harmless because
+	 * we only have a maximum of 5 layers.
+	 */
+	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
+
+		if (!entry->bo)
+			continue;
+
+		entry->addr = ~0ULL;
+		amdgpu_vm_invalidate_level(entry);
+	}
+}
+
+/*
  * amdgpu_vm_update_directories - make sure that all directories are valid
  *
  * @adev: amdgpu_device pointer
@@ -1115,7 +1141,13 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm)
 {
-	return amdgpu_vm_update_level(adev, vm, &vm->root, 0);
+	int r;
+
+	r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
+	if (r)
+		amdgpu_vm_invalidate_level(&vm->root);
+
+	return r;
 }
 
 /**
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/7] drm/amdgpu: Return EINVAL if no PT BO
       [not found] ` <1495558374-3513-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-05-23 16:52   ` [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v3 Christian König
  2017-05-23 16:52   ` [PATCH 2/7] drm/amdgpu: add some extra VM error handling Christian König
@ 2017-05-23 16:52   ` Christian König
  2017-05-23 16:52   ` [PATCH 4/7] drm/amdgpu: cache the complete pde Christian König
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2017-05-23 16:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>

This change is also useful for the upcoming changes where page tables
can be updated by CPU.

Change-Id: I07510ed60c94cf1944ee96bb4b16c40ec88ea17c
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 48 +++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6af2d3c..b2384b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1187,8 +1187,9 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
  * @flags: mapping flags
  *
  * Update the page tables in the range @start - @end.
+ * Returns 0 for success, -EINVAL for failure.
  */
-static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
+static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint64_t flags)
 {
@@ -1206,12 +1207,12 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	pt = amdgpu_vm_get_pt(params, addr);
 	if (!pt) {
 		pr_err("PT not found, aborting update_ptes\n");
-		return;
+		return -EINVAL;
 	}
 
 	if (params->shadow) {
 		if (!pt->shadow)
-			return;
+			return 0;
 		pt = pt->shadow;
 	}
 	if ((addr & ~mask) == (end & ~mask))
@@ -1233,12 +1234,12 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		pt = amdgpu_vm_get_pt(params, addr);
 		if (!pt) {
 			pr_err("PT not found, aborting update_ptes\n");
-			return;
+			return -EINVAL;
 		}
 
 		if (params->shadow) {
 			if (!pt->shadow)
-				return;
+				return 0;
 			pt = pt->shadow;
 		}
 
@@ -1273,6 +1274,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 
 	params->func(params, cur_pe_start, cur_dst, cur_nptes,
 		     AMDGPU_GPU_PAGE_SIZE, flags);
+
+	return 0;
 }
 
 /*
@@ -1284,11 +1287,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
  * @end: last PTE to handle
  * @dst: addr those PTEs should point to
  * @flags: hw mapping flags
+ * Returns 0 for success, -EINVAL for failure.
  */
-static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
+static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
 				uint64_t dst, uint64_t flags)
 {
+	int r;
+
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * field in the PTE. When this field is set to a non-zero value, page
@@ -1317,28 +1323,30 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 
 	/* system pages are non continuously */
 	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end)) {
-
-		amdgpu_vm_update_ptes(params, start, end, dst, flags);
-		return;
-	}
+	    (frag_start >= frag_end))
+		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
 	/* handle the 4K area at the beginning */
 	if (start != frag_start) {
-		amdgpu_vm_update_ptes(params, start, frag_start,
-				      dst, flags);
+		r = amdgpu_vm_update_ptes(params, start, frag_start,
+					  dst, flags);
+		if (r)
+			return r;
 		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
 	}
 
 	/* handle the area in the middle */
-	amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-			      flags | frag_flags);
+	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
+				  flags | frag_flags);
+	if (r)
+		return r;
 
 	/* handle the 4K area at the end */
 	if (frag_end != end) {
 		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
 	}
+	return r;
 }
 
 /**
@@ -1459,9 +1467,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		goto error_free;
 
 	params.shadow = true;
-	amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	if (r)
+		goto error_free;
 	params.shadow = false;
-	amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+	if (r)
+		goto error_free;
 
 	amdgpu_ring_pad_ib(ring, params.ib);
 	WARN_ON(params.ib->length_dw > ndw);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/7] drm/amdgpu: cache the complete pde
       [not found] ` <1495558374-3513-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-05-23 16:52   ` [PATCH 3/7] drm/amdgpu: Return EINVAL if no PT BO Christian König
@ 2017-05-23 16:52   ` Christian König
  2017-05-23 16:52   ` [PATCH 5/7] drm/amdgpu: stop joining VM PTE updates Christian König
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2017-05-23 16:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Makes it easier to update the PDE with huge pages.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b2384b8..ecb2814 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1012,6 +1012,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		}
 
 		pt = amdgpu_bo_gpu_offset(bo);
+		pt = amdgpu_gart_get_vm_pde(adev, pt);
 		if (parent->entries[pt_idx].addr == pt)
 			continue;
 
@@ -1023,18 +1024,15 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
 
 			if (count) {
-				uint64_t entry;
-
-				entry = amdgpu_gart_get_vm_pde(adev, last_pt);
 				if (shadow)
 					amdgpu_vm_do_set_ptes(&params,
 							      last_shadow,
-							      entry, count,
+							      last_pt, count,
 							      incr,
 							      AMDGPU_PTE_VALID);
 
 				amdgpu_vm_do_set_ptes(&params, last_pde,
-						      entry, count, incr,
+						      last_pt, count, incr,
 						      AMDGPU_PTE_VALID);
 			}
 
@@ -1048,15 +1046,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	}
 
 	if (count) {
-		uint64_t entry;
-
-		entry = amdgpu_gart_get_vm_pde(adev, last_pt);
-
 		if (vm->root.bo->shadow)
-			amdgpu_vm_do_set_ptes(&params, last_shadow, entry,
+			amdgpu_vm_do_set_ptes(&params, last_shadow, last_pt,
 					      count, incr, AMDGPU_PTE_VALID);
 
-		amdgpu_vm_do_set_ptes(&params, last_pde, entry,
+		amdgpu_vm_do_set_ptes(&params, last_pde, last_pt,
 				      count, incr, AMDGPU_PTE_VALID);
 	}
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/7] drm/amdgpu: stop joining VM PTE updates
       [not found] ` <1495558374-3513-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-05-23 16:52   ` [PATCH 4/7] drm/amdgpu: cache the complete pde Christian König
@ 2017-05-23 16:52   ` Christian König
  2017-05-23 16:52   ` [PATCH 6/7] drm/amdgpu: increase fragmentation size for Vega10 Christian König
  2017-05-23 16:52   ` [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM v2 Christian König
  6 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2017-05-23 16:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

This isn't beneficial any more since VRAM allocations are now split
so that they fits into a single page table.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 61 ++++------------------------------
 1 file changed, 7 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ecb2814..062dadc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1190,41 +1190,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	struct amdgpu_device *adev = params->adev;
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
 
-	uint64_t cur_pe_start, cur_nptes, cur_dst;
-	uint64_t addr; /* next GPU address to be updated */
+	uint64_t addr, pe_start;
 	struct amdgpu_bo *pt;
-	unsigned nptes; /* next number of ptes to be updated */
-	uint64_t next_pe_start;
-
-	/* initialize the variables */
-	addr = start;
-	pt = amdgpu_vm_get_pt(params, addr);
-	if (!pt) {
-		pr_err("PT not found, aborting update_ptes\n");
-		return -EINVAL;
-	}
-
-	if (params->shadow) {
-		if (!pt->shadow)
-			return 0;
-		pt = pt->shadow;
-	}
-	if ((addr & ~mask) == (end & ~mask))
-		nptes = end - addr;
-	else
-		nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
-	cur_pe_start = amdgpu_bo_gpu_offset(pt);
-	cur_pe_start += (addr & mask) * 8;
-	cur_nptes = nptes;
-	cur_dst = dst;
-
-	/* for next ptb*/
-	addr += nptes;
-	dst += nptes * AMDGPU_GPU_PAGE_SIZE;
+	unsigned nptes;
 
 	/* walk over the address space and update the page tables */
-	while (addr < end) {
+	for (addr = start; addr < end; addr += nptes) {
 		pt = amdgpu_vm_get_pt(params, addr);
 		if (!pt) {
 			pr_err("PT not found, aborting update_ptes\n");
@@ -1242,33 +1213,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		else
 			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
 
-		next_pe_start = amdgpu_bo_gpu_offset(pt);
-		next_pe_start += (addr & mask) * 8;
-
-		if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
-		    ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
-			/* The next ptb is consecutive to current ptb.
-			 * Don't call the update function now.
-			 * Will update two ptbs together in future.
-			*/
-			cur_nptes += nptes;
-		} else {
-			params->func(params, cur_pe_start, cur_dst, cur_nptes,
-				     AMDGPU_GPU_PAGE_SIZE, flags);
+		pe_start = amdgpu_bo_gpu_offset(pt);
+		pe_start += (addr & mask) * 8;
 
-			cur_pe_start = next_pe_start;
-			cur_nptes = nptes;
-			cur_dst = dst;
-		}
+		params->func(params, pe_start, dst, nptes,
+			     AMDGPU_GPU_PAGE_SIZE, flags);
 
-		/* for next ptb*/
-		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	params->func(params, cur_pe_start, cur_dst, cur_nptes,
-		     AMDGPU_GPU_PAGE_SIZE, flags);
-
 	return 0;
 }
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 6/7] drm/amdgpu: increase fragmentation size for Vega10
       [not found] ` <1495558374-3513-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (4 preceding siblings ...)
  2017-05-23 16:52   ` [PATCH 5/7] drm/amdgpu: stop joining VM PTE updates Christian König
@ 2017-05-23 16:52   ` Christian König
  2017-05-23 16:52   ` [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM v2 Christian König
  6 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2017-05-23 16:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

The fragment bits work differently for Vega10 compared to previous generations.

Increase the fragment size to 2MB for now to better handle that.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  | 4 +++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index f6edf68..83c4d0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -548,8 +548,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
 		dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
 		dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
-		dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) *
-					     AMDGPU_GPU_PAGE_SIZE;
+		dev_info.pte_fragment_size =
+			(1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) *
+			AMDGPU_GPU_PAGE_SIZE;
 		dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
 
 		dev_info.cu_active_number = adev->gfx.cu_info.number;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 062dadc..f7afdfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1262,8 +1262,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 	 */
 
 	/* SI and newer are optimized for 64KB */
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
-	uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
+	unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev);
+	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
+	uint64_t frag_align = 1 << pages_per_frag;
 
 	uint64_t frag_start = ALIGN(start, frag_align);
 	uint64_t frag_end = end & ~(frag_align - 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 8309bc7..0f83fe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -51,7 +51,9 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
 
 /* LOG2 number of continuous pages for the fragment field */
-#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
+	((adev)->asic_type < CHIP_VEGA10 ? 4 : \
+	 (adev)->vm_manager.block_size)
 
 #define AMDGPU_PTE_VALID	(1ULL << 0)
 #define AMDGPU_PTE_SYSTEM	(1ULL << 1)
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM v2
       [not found] ` <1495558374-3513-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (5 preceding siblings ...)
  2017-05-23 16:52   ` [PATCH 6/7] drm/amdgpu: increase fragmentation size for Vega10 Christian König
@ 2017-05-23 16:52   ` Christian König
       [not found]     ` <1495558374-3513-8-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  6 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2017-05-23 16:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

The hardware can use huge pages to map 2MB of address space with only one PDE.

v2: few cleanups and rebased

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 95 +++++++++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 ++
 2 files changed, 75 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f7afdfa..f07c9b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 			entry->bo = pt;
 			entry->addr = 0;
+			entry->huge_page = false;
 		}
 
 		if (level < adev->vm_manager.num_level) {
@@ -1013,7 +1014,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 		pt = amdgpu_bo_gpu_offset(bo);
 		pt = amdgpu_gart_get_vm_pde(adev, pt);
-		if (parent->entries[pt_idx].addr == pt)
+		if (parent->entries[pt_idx].addr == pt ||
+		    parent->entries[pt_idx].huge_page)
 			continue;
 
 		parent->entries[pt_idx].addr = pt;
@@ -1145,29 +1147,69 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_find_pt - find the page table for an address
+ * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
  *
  * @p: see amdgpu_pte_update_params definition
  * @addr: virtual address in question
+ * @nptes: number of PTEs updated with this operation
+ * @dst: destination address where the PTEs should point to
+ * @flags: access flags fro the PTEs
+ * @bo: resulting page tables BO
  *
- * Find the page table BO for a virtual address, return NULL when none found.
+ * Check if we can update the PD with a huge page. Also finds the page table
+ * BO for a virtual address, returns -ENOENT when nothing found.
  */
-static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
-					  uint64_t addr)
+static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
+				       uint64_t addr, unsigned nptes,
+				       uint64_t dst, uint64_t flags,
+				       struct amdgpu_bo **bo)
 {
-	struct amdgpu_vm_pt *entry = &p->vm->root;
-	unsigned idx, level = p->adev->vm_manager.num_level;
+	unsigned pt_idx, level = p->adev->vm_manager.num_level;
+	struct amdgpu_vm_pt *entry = &p->vm->root, *parent;
+	uint64_t pd_addr, pde;
 
-	while (entry->entries) {
-		idx = addr >> (p->adev->vm_manager.block_size * level--);
-		idx %= amdgpu_bo_size(entry->bo) / 8;
-		entry = &entry->entries[idx];
-	}
+	do {
+		pt_idx = addr >> (p->adev->vm_manager.block_size * level--);
+		pt_idx %= amdgpu_bo_size(entry->bo) / 8;
+		parent = entry;
+		entry = &entry->entries[pt_idx];
+	} while (entry->entries);
 
 	if (level)
-		return NULL;
+		return -ENOENT;
+
+	*bo = entry->bo;
+
+	/* In the case of a mixed PT the PDE must point to it*/
+	if (p->adev->asic_type < CHIP_VEGA10 ||
+	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
+	    p->func != amdgpu_vm_do_set_ptes ||
+	    !(flags & AMDGPU_PTE_VALID)) {
+
+		dst = amdgpu_bo_gpu_offset(*bo);
+		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
+		flags = AMDGPU_PTE_VALID;
+	} else {
+		flags |= AMDGPU_PDE_PTE;
+	}
 
-	return entry->bo;
+	if (entry->addr == dst &&
+	    entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
+		return 0;
+
+	entry->addr = dst;
+	entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
+
+	if (parent->bo->shadow) {
+		pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+		pde = pd_addr + pt_idx * 8;
+		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+	}
+
+	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+	pde = pd_addr + pt_idx * 8;
+	amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+	return 0;
 }
 
 /**
@@ -1193,14 +1235,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	uint64_t addr, pe_start;
 	struct amdgpu_bo *pt;
 	unsigned nptes;
+	int r;
 
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; addr += nptes) {
-		pt = amdgpu_vm_get_pt(params, addr);
-		if (!pt) {
-			pr_err("PT not found, aborting update_ptes\n");
-			return -EINVAL;
-		}
+
+		if ((addr & ~mask) == (end & ~mask))
+			nptes = end - addr;
+		else
+			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
+
+		r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
+						dst, flags, &pt);
+		if (r)
+			return r;
 
 		if (params->shadow) {
 			if (!pt->shadow)
@@ -1208,11 +1256,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			pt = pt->shadow;
 		}
 
-		if ((addr & ~mask) == (end & ~mask))
-			nptes = end - addr;
-		else
-			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
 		pe_start = amdgpu_bo_gpu_offset(pt);
 		pe_start += (addr & mask) * 8;
 
@@ -1353,6 +1396,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;
 
+	/* one PDE write for each huge page */
+	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
+
 	if (src) {
 		/* only copy commands needed */
 		ndw += ncmds * 7;
@@ -1437,6 +1483,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
+	amdgpu_vm_invalidate_level(&vm->root);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 0f83fe3..2940d66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
 /* TILED for VEGA10, reserved for older ASICs  */
 #define AMDGPU_PTE_PRT		(1ULL << 51)
 
+/* PDE is handled as PTE for VEGA10 */
+#define AMDGPU_PDE_PTE		(1ULL << 54)
+
 /* VEGA10 only */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
@@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry;
 struct amdgpu_vm_pt {
 	struct amdgpu_bo	*bo;
 	uint64_t		addr;
+	bool			huge_page;
 
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt	*entries;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v3
       [not found]     ` <1495558374-3513-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-23 19:03       ` Felix Kuehling
       [not found]         ` <b5b14e2f-c587-a83b-25ff-22f7197c3da9-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Felix Kuehling @ 2017-05-23 19:03 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Christian König


On 17-05-23 12:52 PM, Christian König wrote:
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -395,7 +395,13 @@
>  static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, 
>  	return pte_flag; 
>  }   
> +static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev,
> uint64_t addr)
> +{
> + BUG_ON(addr & 0xFFFFF0000000FFFULL);
> + return addr;
> +}
> +

The mask in the BUG_ON leaves out the highest 4 bits (63-60). Is that
intentional? Same for gmc_v7_0.c.

> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 3b5ea0f..f05c034 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -656,7 +656,13 @@
>  static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, 
>  	return pte_flag; 
>  } 
>   
> +static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev,
> uint64_t addr)
> +{
> + BUG_ON(addr & 0xFFF0000000000FFFULL);
> + return addr;
> +}
> +

This looks like you're allowing 52bit physical addresses for GFX 8.
AFAIK the HW only supports 40 bits. Is that intentional?

Other than that, the patch is Reviewed-by: Felix Kuehling
<Felix.Kuehling@amd.com>

Regards,
  Felix
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM v2
       [not found]     ` <1495558374-3513-8-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-23 19:29       ` Felix Kuehling
       [not found]         ` <bed31c06-f9de-b31e-9972-b4c719f8982e-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Felix Kuehling @ 2017-05-23 19:29 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Christian König

Do you need something in amdgpu_vm_update_level to stop it from
overwriting huge page PTEs?

Regards,
  Felix


On 17-05-23 12:52 PM, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
>
> The hardware can use huge pages to map 2MB of address space with only one PDE.
>
> v2: few cleanups and rebased
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 95 +++++++++++++++++++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 ++
>  2 files changed, 75 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f7afdfa..f07c9b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>  
>  			entry->bo = pt;
>  			entry->addr = 0;
> +			entry->huge_page = false;
>  		}
>  
>  		if (level < adev->vm_manager.num_level) {
> @@ -1013,7 +1014,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>  
>  		pt = amdgpu_bo_gpu_offset(bo);
>  		pt = amdgpu_gart_get_vm_pde(adev, pt);
> -		if (parent->entries[pt_idx].addr == pt)
> +		if (parent->entries[pt_idx].addr == pt ||
> +		    parent->entries[pt_idx].huge_page)
>  			continue;
>  
>  		parent->entries[pt_idx].addr = pt;
> @@ -1145,29 +1147,69 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>  }
>  
>  /**
> - * amdgpu_vm_find_pt - find the page table for an address
> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
>   *
>   * @p: see amdgpu_pte_update_params definition
>   * @addr: virtual address in question
> + * @nptes: number of PTEs updated with this operation
> + * @dst: destination address where the PTEs should point to
> + * @flags: access flags fro the PTEs
> + * @bo: resulting page tables BO
>   *
> - * Find the page table BO for a virtual address, return NULL when none found.
> + * Check if we can update the PD with a huge page. Also finds the page table
> + * BO for a virtual address, returns -ENOENT when nothing found.
>   */
> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
> -					  uint64_t addr)
> +static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
> +				       uint64_t addr, unsigned nptes,
> +				       uint64_t dst, uint64_t flags,
> +				       struct amdgpu_bo **bo)
>  {
> -	struct amdgpu_vm_pt *entry = &p->vm->root;
> -	unsigned idx, level = p->adev->vm_manager.num_level;
> +	unsigned pt_idx, level = p->adev->vm_manager.num_level;
> +	struct amdgpu_vm_pt *entry = &p->vm->root, *parent;
> +	uint64_t pd_addr, pde;
>  
> -	while (entry->entries) {
> -		idx = addr >> (p->adev->vm_manager.block_size * level--);
> -		idx %= amdgpu_bo_size(entry->bo) / 8;
> -		entry = &entry->entries[idx];
> -	}
> +	do {
> +		pt_idx = addr >> (p->adev->vm_manager.block_size * level--);
> +		pt_idx %= amdgpu_bo_size(entry->bo) / 8;
> +		parent = entry;
> +		entry = &entry->entries[pt_idx];
> +	} while (entry->entries);
>  
>  	if (level)
> -		return NULL;
> +		return -ENOENT;
> +
> +	*bo = entry->bo;
> +
> +	/* In the case of a mixed PT the PDE must point to it*/
> +	if (p->adev->asic_type < CHIP_VEGA10 ||
> +	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
> +	    p->func != amdgpu_vm_do_set_ptes ||
> +	    !(flags & AMDGPU_PTE_VALID)) {
> +
> +		dst = amdgpu_bo_gpu_offset(*bo);
> +		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
> +		flags = AMDGPU_PTE_VALID;
> +	} else {
> +		flags |= AMDGPU_PDE_PTE;
> +	}
>  
> -	return entry->bo;
> +	if (entry->addr == dst &&
> +	    entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
> +		return 0;
> +
> +	entry->addr = dst;
> +	entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
> +
> +	if (parent->bo->shadow) {
> +		pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
> +		pde = pd_addr + pt_idx * 8;
> +		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> +	}
> +
> +	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +	pde = pd_addr + pt_idx * 8;
> +	amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
> +	return 0;
>  }
>  
>  /**
> @@ -1193,14 +1235,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>  	uint64_t addr, pe_start;
>  	struct amdgpu_bo *pt;
>  	unsigned nptes;
> +	int r;
>  
>  	/* walk over the address space and update the page tables */
>  	for (addr = start; addr < end; addr += nptes) {
> -		pt = amdgpu_vm_get_pt(params, addr);
> -		if (!pt) {
> -			pr_err("PT not found, aborting update_ptes\n");
> -			return -EINVAL;
> -		}
> +
> +		if ((addr & ~mask) == (end & ~mask))
> +			nptes = end - addr;
> +		else
> +			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> +
> +		r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
> +						dst, flags, &pt);
> +		if (r)
> +			return r;
>  
>  		if (params->shadow) {
>  			if (!pt->shadow)
> @@ -1208,11 +1256,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>  			pt = pt->shadow;
>  		}
>  
> -		if ((addr & ~mask) == (end & ~mask))
> -			nptes = end - addr;
> -		else
> -			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
> -
>  		pe_start = amdgpu_bo_gpu_offset(pt);
>  		pe_start += (addr & mask) * 8;
>  
> @@ -1353,6 +1396,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  	/* padding, etc. */
>  	ndw = 64;
>  
> +	/* one PDE write for each huge page */
> +	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
> +
>  	if (src) {
>  		/* only copy commands needed */
>  		ndw += ncmds * 7;
> @@ -1437,6 +1483,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  
>  error_free:
>  	amdgpu_job_free(job);
> +	amdgpu_vm_invalidate_level(&vm->root);
>  	return r;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 0f83fe3..2940d66 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
>  /* TILED for VEGA10, reserved for older ASICs  */
>  #define AMDGPU_PTE_PRT		(1ULL << 51)
>  
> +/* PDE is handled as PTE for VEGA10 */
> +#define AMDGPU_PDE_PTE		(1ULL << 54)
> +
>  /* VEGA10 only */
>  #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>  #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
> @@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry;
>  struct amdgpu_vm_pt {
>  	struct amdgpu_bo	*bo;
>  	uint64_t		addr;
> +	bool			huge_page;
>  
>  	/* array of page tables, one for each directory entry */
>  	struct amdgpu_vm_pt	*entries;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v3
       [not found]         ` <b5b14e2f-c587-a83b-25ff-22f7197c3da9-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-24 11:22           ` Christian König
  0 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2017-05-24 11:22 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 23.05.2017 um 21:03 schrieb Felix Kuehling:
> On 17-05-23 12:52 PM, Christian König wrote:
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> @@ -395,7 +395,13 @@
>>   static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
>>   	return pte_flag;
>>   }
>> +static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev,
>> uint64_t addr)
>> +{
>> + BUG_ON(addr & 0xFFFFF0000000FFFULL);
>> + return addr;
>> +}
>> +
> The mask in the BUG_ON leaves out the highest 4 bits (63-60). Is that
> intentional? Same for gmc_v7_0.c.
>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 3b5ea0f..f05c034 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -656,7 +656,13 @@
>>   static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
>>   	return pte_flag;
>>   }
>>    
>> +static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev,
>> uint64_t addr)
>> +{
>> + BUG_ON(addr & 0xFFF0000000000FFFULL);
>> + return addr;
>> +}
>> +
> This looks like you're allowing 52bit physical addresses for GFX 8.
> AFAIK the HW only supports 40 bits. Is that intentional?

No, both issues are just typos caused by to much things to do in parallel.

Thanks for pointing that out,
Christian.

>
> Other than that, the patch is Reviewed-by: Felix Kuehling
> <Felix.Kuehling@amd.com>
>
> Regards,
>    Felix
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: SPAM //Re: [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM v2
       [not found]         ` <bed31c06-f9de-b31e-9972-b4c719f8982e-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-30 11:51           ` Christian König
  0 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2017-05-30 11:51 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Sorry for the delayed reply, just had to fish this mail out of my spam 
folder.

> Do you need something in amdgpu_vm_update_level to stop it from
> overwriting huge page PTEs?

Yes, this is exactly what this chunk added to amdgpu_vm_update_level is 
doing:
> -		if (parent->entries[pt_idx].addr == pt)
> +		if (parent->entries[pt_idx].addr == pt ||
> +		    parent->entries[pt_idx].huge_page)
>   			continue;

Regards,
Christian.

Am 23.05.2017 um 21:29 schrieb Felix Kuehling:
> Do you need something in amdgpu_vm_update_level to stop it from
> overwriting huge page PTEs?
>
> Regards,
>    Felix
>
>
> On 17-05-23 12:52 PM, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> The hardware can use huge pages to map 2MB of address space with only one PDE.
>>
>> v2: few cleanups and rebased
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 95 +++++++++++++++++++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 ++
>>   2 files changed, 75 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index f7afdfa..f07c9b8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>>   
>>   			entry->bo = pt;
>>   			entry->addr = 0;
>> +			entry->huge_page = false;
>>   		}
>>   
>>   		if (level < adev->vm_manager.num_level) {
>> @@ -1013,7 +1014,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
>>   
>>   		pt = amdgpu_bo_gpu_offset(bo);
>>   		pt = amdgpu_gart_get_vm_pde(adev, pt);
>> -		if (parent->entries[pt_idx].addr == pt)
>> +		if (parent->entries[pt_idx].addr == pt ||
>> +		    parent->entries[pt_idx].huge_page)
>>   			continue;
>>   
>>   		parent->entries[pt_idx].addr = pt;
>> @@ -1145,29 +1147,69 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
>>   }
>>   
>>   /**
>> - * amdgpu_vm_find_pt - find the page table for an address
>> + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
>>    *
>>    * @p: see amdgpu_pte_update_params definition
>>    * @addr: virtual address in question
>> + * @nptes: number of PTEs updated with this operation
>> + * @dst: destination address where the PTEs should point to
>> + * @flags: access flags fro the PTEs
>> + * @bo: resulting page tables BO
>>    *
>> - * Find the page table BO for a virtual address, return NULL when none found.
>> + * Check if we can update the PD with a huge page. Also finds the page table
>> + * BO for a virtual address, returns -ENOENT when nothing found.
>>    */
>> -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
>> -					  uint64_t addr)
>> +static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
>> +				       uint64_t addr, unsigned nptes,
>> +				       uint64_t dst, uint64_t flags,
>> +				       struct amdgpu_bo **bo)
>>   {
>> -	struct amdgpu_vm_pt *entry = &p->vm->root;
>> -	unsigned idx, level = p->adev->vm_manager.num_level;
>> +	unsigned pt_idx, level = p->adev->vm_manager.num_level;
>> +	struct amdgpu_vm_pt *entry = &p->vm->root, *parent;
>> +	uint64_t pd_addr, pde;
>>   
>> -	while (entry->entries) {
>> -		idx = addr >> (p->adev->vm_manager.block_size * level--);
>> -		idx %= amdgpu_bo_size(entry->bo) / 8;
>> -		entry = &entry->entries[idx];
>> -	}
>> +	do {
>> +		pt_idx = addr >> (p->adev->vm_manager.block_size * level--);
>> +		pt_idx %= amdgpu_bo_size(entry->bo) / 8;
>> +		parent = entry;
>> +		entry = &entry->entries[pt_idx];
>> +	} while (entry->entries);
>>   
>>   	if (level)
>> -		return NULL;
>> +		return -ENOENT;
>> +
>> +	*bo = entry->bo;
>> +
>> +	/* In the case of a mixed PT the PDE must point to it*/
>> +	if (p->adev->asic_type < CHIP_VEGA10 ||
>> +	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
>> +	    p->func != amdgpu_vm_do_set_ptes ||
>> +	    !(flags & AMDGPU_PTE_VALID)) {
>> +
>> +		dst = amdgpu_bo_gpu_offset(*bo);
>> +		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
>> +		flags = AMDGPU_PTE_VALID;
>> +	} else {
>> +		flags |= AMDGPU_PDE_PTE;
>> +	}
>>   
>> -	return entry->bo;
>> +	if (entry->addr == dst &&
>> +	    entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
>> +		return 0;
>> +
>> +	entry->addr = dst;
>> +	entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
>> +
>> +	if (parent->bo->shadow) {
>> +		pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
>> +		pde = pd_addr + pt_idx * 8;
>> +		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
>> +	}
>> +
>> +	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
>> +	pde = pd_addr + pt_idx * 8;
>> +	amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
>> +	return 0;
>>   }
>>   
>>   /**
>> @@ -1193,14 +1235,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>   	uint64_t addr, pe_start;
>>   	struct amdgpu_bo *pt;
>>   	unsigned nptes;
>> +	int r;
>>   
>>   	/* walk over the address space and update the page tables */
>>   	for (addr = start; addr < end; addr += nptes) {
>> -		pt = amdgpu_vm_get_pt(params, addr);
>> -		if (!pt) {
>> -			pr_err("PT not found, aborting update_ptes\n");
>> -			return -EINVAL;
>> -		}
>> +
>> +		if ((addr & ~mask) == (end & ~mask))
>> +			nptes = end - addr;
>> +		else
>> +			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>> +
>> +		r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
>> +						dst, flags, &pt);
>> +		if (r)
>> +			return r;
>>   
>>   		if (params->shadow) {
>>   			if (!pt->shadow)
>> @@ -1208,11 +1256,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>   			pt = pt->shadow;
>>   		}
>>   
>> -		if ((addr & ~mask) == (end & ~mask))
>> -			nptes = end - addr;
>> -		else
>> -			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>> -
>>   		pe_start = amdgpu_bo_gpu_offset(pt);
>>   		pe_start += (addr & mask) * 8;
>>   
>> @@ -1353,6 +1396,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   	/* padding, etc. */
>>   	ndw = 64;
>>   
>> +	/* one PDE write for each huge page */
>> +	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
>> +
>>   	if (src) {
>>   		/* only copy commands needed */
>>   		ndw += ncmds * 7;
>> @@ -1437,6 +1483,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   
>>   error_free:
>>   	amdgpu_job_free(job);
>> +	amdgpu_vm_invalidate_level(&vm->root);
>>   	return r;
>>   }
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 0f83fe3..2940d66 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
>>   /* TILED for VEGA10, reserved for older ASICs  */
>>   #define AMDGPU_PTE_PRT		(1ULL << 51)
>>   
>> +/* PDE is handled as PTE for VEGA10 */
>> +#define AMDGPU_PDE_PTE		(1ULL << 54)
>> +
>>   /* VEGA10 only */
>>   #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
>>   #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
>> @@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry;
>>   struct amdgpu_vm_pt {
>>   	struct amdgpu_bo	*bo;
>>   	uint64_t		addr;
>> +	bool			huge_page;
>>   
>>   	/* array of page tables, one for each directory entry */
>>   	struct amdgpu_vm_pt	*entries;


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-05-30 11:51 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-23 16:52 Huge page support for Vega10 Christian König
     [not found] ` <1495558374-3513-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-23 16:52   ` [PATCH 1/7] drm/amdgpu: cleanup adjust_mc_addr handling v3 Christian König
     [not found]     ` <1495558374-3513-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-23 19:03       ` Felix Kuehling
     [not found]         ` <b5b14e2f-c587-a83b-25ff-22f7197c3da9-5C7GfCeVMHo@public.gmane.org>
2017-05-24 11:22           ` Christian König
2017-05-23 16:52   ` [PATCH 2/7] drm/amdgpu: add some extra VM error handling Christian König
2017-05-23 16:52   ` [PATCH 3/7] drm/amdgpu: Return EINVAL if no PT BO Christian König
2017-05-23 16:52   ` [PATCH 4/7] drm/amdgpu: cache the complete pde Christian König
2017-05-23 16:52   ` [PATCH 5/7] drm/amdgpu: stop joining VM PTE updates Christian König
2017-05-23 16:52   ` [PATCH 6/7] drm/amdgpu: increase fragmentation size for Vega10 Christian König
2017-05-23 16:52   ` [PATCH 7/7] drm/amdgpu: enable huge page handling in the VM v2 Christian König
     [not found]     ` <1495558374-3513-8-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-23 19:29       ` Felix Kuehling
     [not found]         ` <bed31c06-f9de-b31e-9972-b4c719f8982e-5C7GfCeVMHo@public.gmane.org>
2017-05-30 11:51           ` SPAM //Re: " Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.