All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Fixes for multi-level page tables
@ 2017-03-29  1:00 Felix Kuehling
       [not found] ` <1490749231-16157-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Felix Kuehling @ 2017-03-29  1:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Felix Kuehling, Kent.Russell-5C7GfCeVMHo

I worked on these fixes on amd-kfd-staging with a merge of the recent multi-
level page table changes. With these changes KFDTest passes on Vega10,
including some tests that tend to stress VM memory management.

They applied cleanly to current amd-staging-4.9 but are only compile-tested
on that branch.

Felix Kuehling (3):
  drm/amdgpu: Make max_pfn 64-bit
  drm/amdgpu: Fix Vega10 VM initialization
  drm/amdgpu: Fix multi-level page table bugs for large BOs

 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 17 ++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c   |  5 +++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c    |  5 +++--
 6 files changed, 29 insertions(+), 23 deletions(-)

-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/3] drm/amdgpu: Make max_pfn 64-bit
       [not found] ` <1490749231-16157-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  1:00   ` Felix Kuehling
       [not found]     ` <1490749231-16157-2-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-03-29  1:00   ` [PATCH 2/3] drm/amdgpu: Fix Vega10 VM initialization Felix Kuehling
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 22+ messages in thread
From: Felix Kuehling @ 2017-03-29  1:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Felix Kuehling, Kent.Russell-5C7GfCeVMHo

With 4-level page tables the maximum VM size is 256TB. That's 64G
pages, which can't be represented in 32-bit.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 5 +++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 5 +++--
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f4cb7de..818747f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -336,7 +336,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			struct amdgpu_vm *vm,
 			uint64_t saddr, uint64_t size)
 {
-	unsigned last_pfn;
+	uint64_t last_pfn;
 	uint64_t eaddr;
 
 	/* validate the parameters */
@@ -346,7 +346,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	eaddr = saddr + size - 1;
 	last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
 	if (last_pfn >= adev->vm_manager.max_pfn) {
-		dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n",
+		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
 			last_pfn, adev->vm_manager.max_pfn);
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index b5e5cdd..102b1f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -153,7 +153,7 @@ struct amdgpu_vm_manager {
 	u64					fence_context;
 	unsigned				seqno[AMDGPU_MAX_RINGS];
 
-	uint32_t				max_pfn;
+	uint64_t				max_pfn;
 	uint32_t				num_level;
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 5604a53..dd48637 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -227,8 +227,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2,
-				adev->vm_manager.max_pfn - 1);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, 0);
+			lower_32_bits(adev->vm_manager.max_pfn - 1));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2,
+			upper_32_bits(adev->vm_manager.max_pfn - 1));
 	}
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 5903bb0..c1dc8c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -247,8 +247,9 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
 		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
 		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
 		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2,
-				adev->vm_manager.max_pfn - 1);
-		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, 0);
+			lower_32_bits(adev->vm_manager.max_pfn - 1));
+		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2,
+			upper_32_bits(adev->vm_manager.max_pfn - 1));
 	}
 
 	return 0;
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/3] drm/amdgpu: Fix Vega10 VM initialization
       [not found] ` <1490749231-16157-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-03-29  1:00   ` [PATCH 1/3] drm/amdgpu: Make max_pfn 64-bit Felix Kuehling
@ 2017-03-29  1:00   ` Felix Kuehling
       [not found]     ` <1490749231-16157-3-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-03-29  1:00   ` [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs Felix Kuehling
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 22+ messages in thread
From: Felix Kuehling @ 2017-03-29  1:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Felix Kuehling, Kent.Russell-5C7GfCeVMHo

adev->family is not initialized yet when amdgpu_get_block_size is
called. Use adev->asic_type instead.

Minimum VM size is 512GB, not 256GB, for a single page table entry
in the root page table.

gmc_v9_0_vm_init is called after adev->vm_manager.max_pfn is
initialized. Move the minimum VM-size enforcement ahead of max_pfn
initializtion. Cast to 64-bit before the left-shift.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3500da3..57ccac4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1042,10 +1042,10 @@ static bool amdgpu_check_pot_argument(int arg)
 static void amdgpu_get_block_size(struct amdgpu_device *adev)
 {
 	/* from AI, asic starts to support multiple level VMPT */
-	if (adev->family >= AMDGPU_FAMILY_AI) {
+	if (adev->asic_type >= CHIP_VEGA10) {
 		if (amdgpu_vm_block_size != 9)
-			dev_warn(adev->dev, "Multi-VMPT limits block size to"
-				 "one page!\n");
+			dev_warn(adev->dev,
+				 "Multi-VMPT limits block size to one page!\n");
 		amdgpu_vm_block_size = 9;
 		return;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 1e4734d..df69aae 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -511,12 +511,6 @@ static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
 	 * amdkfd will use VMIDs 8-15
 	 */
 	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
-	/* Because of four level VMPTs, vm size at least is 256GB.
-	256TB is OK as well */
-	if (amdgpu_vm_size < 256) {
-		DRM_WARN("vm size at least is 256GB!\n");
-		amdgpu_vm_size = 256;
-	}
 	adev->vm_manager.num_level = 3;
 	amdgpu_vm_manager_init(adev);
 
@@ -563,11 +557,14 @@ static int gmc_v9_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	/* Adjust VM size here.
-	 * Currently default to 64GB ((16 << 20) 4k pages).
-	 * Max GPUVM size is 48 bits.
+	/* Because of four level VMPTs, vm size is at least 512GB.
+	 * The maximum size is 256TB (48bit).
 	 */
-	adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
+	if (amdgpu_vm_size < 512) {
+		DRM_WARN("VM size is at least 512GB!\n");
+		amdgpu_vm_size = 512;
+	}
+	adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
 
 	/* Set the internal MC address mask
 	 * This is the max address of the GPU's
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found] ` <1490749231-16157-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-03-29  1:00   ` [PATCH 1/3] drm/amdgpu: Make max_pfn 64-bit Felix Kuehling
  2017-03-29  1:00   ` [PATCH 2/3] drm/amdgpu: Fix Vega10 VM initialization Felix Kuehling
@ 2017-03-29  1:00   ` Felix Kuehling
       [not found]     ` <1490749231-16157-4-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2017-03-29  2:34   ` [PATCH 0/3] Fixes for multi-level page tables zhoucm1
  2017-03-29  6:00   ` Zhang, Jerry (Junwei)
  4 siblings, 1 reply; 22+ messages in thread
From: Felix Kuehling @ 2017-03-29  1:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Felix Kuehling, Kent.Russell-5C7GfCeVMHo

Fix the start/end address calculation for address ranges that span
multiple page directories in amdgpu_vm_alloc_levels.

Add WARN_ONs if page tables aren't found. Otherwise the page table
update would just fail silently.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 818747f..44aa039 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -278,6 +278,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
 	to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
 
+	WARN_ON(from > to);
+
 	if (to > parent->last_entry_used)
 		parent->last_entry_used = to;
 
@@ -312,8 +314,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 		}
 
 		if (level < adev->vm_manager.num_level) {
-			r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
-						   eaddr, level);
+			uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
+			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
+				((1 << shift) - 1);
+			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
+						   sub_eaddr, level);
 			if (r)
 				return r;
 		}
@@ -957,9 +962,11 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
 		entry = &entry->entries[idx];
 	}
 
-	if (level)
+	if (WARN_ON(level))
 		return NULL;
 
+	WARN_ON(!entry->bo);
+
 	return entry->bo;
 }
 
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] drm/amdgpu: Fix Vega10 VM initialization
       [not found]     ` <1490749231-16157-3-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  1:39       ` Zhang, Jerry (Junwei)
       [not found]         ` <58DB1061.7000403-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-29  1:39 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo

On 03/29/2017 09:00 AM, Felix Kuehling wrote:
> adev->family is not initialized yet when amdgpu_get_block_size is
> called. Use adev->asic_type instead.
>
> Minimum VM size is 512GB, not 256GB, for a single page table entry
> in the root page table.
>
> gmc_v9_0_vm_init is called after adev->vm_manager.max_pfn is
> initialized. Move the minimum VM-size enforcement ahead of max_pfn
> initializtion. Cast to 64-bit before the left-shift.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

Just note:
For now, it's OK to set the minimum vm size 256G,
In this case, there is no PD bit anyway.

Christian also mentioned that PD should be 4k size, then 256T was required.
When reach an agreement, we may update them all.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
>   2 files changed, 10 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 3500da3..57ccac4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1042,10 +1042,10 @@ static bool amdgpu_check_pot_argument(int arg)
>   static void amdgpu_get_block_size(struct amdgpu_device *adev)
>   {
>   	/* from AI, asic starts to support multiple level VMPT */
> -	if (adev->family >= AMDGPU_FAMILY_AI) {
> +	if (adev->asic_type >= CHIP_VEGA10) {
>   		if (amdgpu_vm_block_size != 9)
> -			dev_warn(adev->dev, "Multi-VMPT limits block size to"
> -				 "one page!\n");
> +			dev_warn(adev->dev,
> +				 "Multi-VMPT limits block size to one page!\n");
>   		amdgpu_vm_block_size = 9;
>   		return;
>   	}

Nice catch.

> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 1e4734d..df69aae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -511,12 +511,6 @@ static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
>   	 * amdkfd will use VMIDs 8-15
>   	 */
>   	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
> -	/* Because of four level VMPTs, vm size at least is 256GB.
> -	256TB is OK as well */
> -	if (amdgpu_vm_size < 256) {
> -		DRM_WARN("vm size at least is 256GB!\n");
> -		amdgpu_vm_size = 256;
> -	}

David had a patch to fix it yesterday.
But your patch involves by vm size checking. :)

>   	adev->vm_manager.num_level = 3;
>   	amdgpu_vm_manager_init(adev);
>
> @@ -563,11 +557,14 @@ static int gmc_v9_0_sw_init(void *handle)
>   	if (r)
>   		return r;
>
> -	/* Adjust VM size here.
> -	 * Currently default to 64GB ((16 << 20) 4k pages).
> -	 * Max GPUVM size is 48 bits.
> +	/* Because of four level VMPTs, vm size is at least 512GB.
> +	 * The maximum size is 256TB (48bit).
>   	 */
> -	adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
> +	if (amdgpu_vm_size < 512) {
> +		DRM_WARN("VM size is at least 512GB!\n");
> +		amdgpu_vm_size = 512;
> +	}
> +	adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
>
>   	/* Set the internal MC address mask
>   	 * This is the max address of the GPU's
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] drm/amdgpu: Fix Vega10 VM initialization
       [not found]         ` <58DB1061.7000403-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  1:48           ` Felix Kuehling
       [not found]             ` <0091f779-a045-7009-3746-6957ccacd62f-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Felix Kuehling @ 2017-03-29  1:48 UTC (permalink / raw)
  To: Zhang, Jerry (Junwei),
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo

On 17-03-28 09:39 PM, Zhang, Jerry (Junwei) wrote:
> On 03/29/2017 09:00 AM, Felix Kuehling wrote:
>> adev->family is not initialized yet when amdgpu_get_block_size is
>> called. Use adev->asic_type instead.
>>
>> Minimum VM size is 512GB, not 256GB, for a single page table entry
>> in the root page table.
>>
>> gmc_v9_0_vm_init is called after adev->vm_manager.max_pfn is
>> initialized. Move the minimum VM-size enforcement ahead of max_pfn
>> initializtion. Cast to 64-bit before the left-shift.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>
> Just note:
> For now, it's OK to set the minimum vm size 256G,
> In this case, there is no PD bit anyway.

With VM size of 256GB, the amdgpu_vm_bo_size(adev, 0) calculates 0, and
the page directory allocation with amdgpu_bo_create fails in amdgpu_vm_init.

In other words, amdgpu_vm_num_entries(adev, 0) needs to return at least
1. That means vm_size needs to be at least 512GB.

Regards,
  Felix

>
> Christian also mentioned that PD should be 4k size, then 256T was
> required.
> When reach an agreement, we may update them all.
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
>>   2 files changed, 10 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 3500da3..57ccac4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1042,10 +1042,10 @@ static bool amdgpu_check_pot_argument(int arg)
>>   static void amdgpu_get_block_size(struct amdgpu_device *adev)
>>   {
>>       /* from AI, asic starts to support multiple level VMPT */
>> -    if (adev->family >= AMDGPU_FAMILY_AI) {
>> +    if (adev->asic_type >= CHIP_VEGA10) {
>>           if (amdgpu_vm_block_size != 9)
>> -            dev_warn(adev->dev, "Multi-VMPT limits block size to"
>> -                 "one page!\n");
>> +            dev_warn(adev->dev,
>> +                 "Multi-VMPT limits block size to one page!\n");
>>           amdgpu_vm_block_size = 9;
>>           return;
>>       }
>
> Nice catch.
>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 1e4734d..df69aae 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -511,12 +511,6 @@ static int gmc_v9_0_vm_init(struct amdgpu_device
>> *adev)
>>        * amdkfd will use VMIDs 8-15
>>        */
>>       adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
>> -    /* Because of four level VMPTs, vm size at least is 256GB.
>> -    256TB is OK as well */
>> -    if (amdgpu_vm_size < 256) {
>> -        DRM_WARN("vm size at least is 256GB!\n");
>> -        amdgpu_vm_size = 256;
>> -    }
>
> David had a patch to fix it yesterday.
> But your patch involves by vm size checking. :)
>
>>       adev->vm_manager.num_level = 3;
>>       amdgpu_vm_manager_init(adev);
>>
>> @@ -563,11 +557,14 @@ static int gmc_v9_0_sw_init(void *handle)
>>       if (r)
>>           return r;
>>
>> -    /* Adjust VM size here.
>> -     * Currently default to 64GB ((16 << 20) 4k pages).
>> -     * Max GPUVM size is 48 bits.
>> +    /* Because of four level VMPTs, vm size is at least 512GB.
>> +     * The maximum size is 256TB (48bit).
>>        */
>> -    adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>> +    if (amdgpu_vm_size < 512) {
>> +        DRM_WARN("VM size is at least 512GB!\n");
>> +        amdgpu_vm_size = 512;
>> +    }
>> +    adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
>>
>>       /* Set the internal MC address mask
>>        * This is the max address of the GPU's
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/3] Fixes for multi-level page tables
       [not found] ` <1490749231-16157-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-03-29  1:00   ` [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs Felix Kuehling
@ 2017-03-29  2:34   ` zhoucm1
  2017-03-29  6:00   ` Zhang, Jerry (Junwei)
  4 siblings, 0 replies; 22+ messages in thread
From: zhoucm1 @ 2017-03-29  2:34 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo

Reviewed-by: Chunming Zhou <david1.zhou@amd.com> for the series.

Especially nice for sub level saddr/eaddr calculation.

Regards,
David Zhou

On 2017年03月29日 09:00, Felix Kuehling wrote:
> I worked on these fixes on amd-kfd-staging with a merge of the recent multi-
> level page table changes. With these changes KFDTest passes on Vega10,
> including some tests that tend to stress VM memory management.
>
> They applied cleanly to current amd-staging-4.9 but are only compile-tested
> on that branch.
>
> Felix Kuehling (3):
>    drm/amdgpu: Make max_pfn 64-bit
>    drm/amdgpu: Fix Vega10 VM initialization
>    drm/amdgpu: Fix multi-level page table bugs for large BOs
>
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 17 ++++++++++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  2 +-
>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c   |  5 +++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c    |  5 +++--
>   6 files changed, 29 insertions(+), 23 deletions(-)
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found]     ` <1490749231-16157-4-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  2:54       ` Zhang, Jerry (Junwei)
       [not found]         ` <58DB21CB.4090800-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-29  2:54 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo

On 03/29/2017 09:00 AM, Felix Kuehling wrote:
> Fix the start/end address calculation for address ranges that span
> multiple page directories in amdgpu_vm_alloc_levels.
>
> Add WARN_ONs if page tables aren't found. Otherwise the page table
> update would just fail silently.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++++---
>   1 file changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 818747f..44aa039 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -278,6 +278,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   	from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
>   	to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
>
> +	WARN_ON(from > to);
> +
>   	if (to > parent->last_entry_used)
>   		parent->last_entry_used = to;
>
> @@ -312,8 +314,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>   		}
>
>   		if (level < adev->vm_manager.num_level) {
> -			r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
> -						   eaddr, level);
> +			uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
> +			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
> +				((1 << shift) - 1);
> +			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
> +						   sub_eaddr, level);

Do you mean to create a full sub-pt if pt_idx != from and != to?
I didn't catch it up clear.

In my perspective, we may need to clear the saddr and eaddr upper level bit 
before going on to allocate the next level PT.
Otherwise, the number of next PT entry will be incorrect, more than num_entries 
as the upper level PT number calculated in.

e.g.
there is a address contains
PD + PT1 + PT2 + PT3

for the first round, we could get correct "from" and "to" address with right 
"shift"(3 blocks), then walk over the address space in PD range.

Then for the 2nd round, we cannot get the expected "from" and "to" address with 
"shift"(2 blocks), as it walks over the address space in PD + PT1 range.
While we'd like it's in range PT1 indeed.

The fault way goes on with later round.

Perhaps, that's the root cause about the issue you face.

Jerry.

>   			if (r)
>   				return r;
>   		}
> @@ -957,9 +962,11 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
>   		entry = &entry->entries[idx];
>   	}
>
> -	if (level)
> +	if (WARN_ON(level))
>   		return NULL;
>
> +	WARN_ON(!entry->bo);
> +
>   	return entry->bo;
>   }
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] drm/amdgpu: Fix Vega10 VM initialization
       [not found]             ` <0091f779-a045-7009-3746-6957ccacd62f-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  2:56               ` Zhang, Jerry (Junwei)
  2017-03-29  6:47               ` Christian König
  1 sibling, 0 replies; 22+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-29  2:56 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo

On 03/29/2017 09:48 AM, Felix Kuehling wrote:
> On 17-03-28 09:39 PM, Zhang, Jerry (Junwei) wrote:
>> On 03/29/2017 09:00 AM, Felix Kuehling wrote:
>>> adev->family is not initialized yet when amdgpu_get_block_size is
>>> called. Use adev->asic_type instead.
>>>
>>> Minimum VM size is 512GB, not 256GB, for a single page table entry
>>> in the root page table.
>>>
>>> gmc_v9_0_vm_init is called after adev->vm_manager.max_pfn is
>>> initialized. Move the minimum VM-size enforcement ahead of max_pfn
>>> initializtion. Cast to 64-bit before the left-shift.
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>>
>> Just note:
>> For now, it's OK to set the minimum vm size 256G,
>> In this case, there is no PD bit anyway.
>
> With VM size of 256GB, the amdgpu_vm_bo_size(adev, 0) calculates 0, and
> the page directory allocation with amdgpu_bo_create fails in amdgpu_vm_init.
>
> In other words, amdgpu_vm_num_entries(adev, 0) needs to return at least
> 1. That means vm_size needs to be at least 512GB.

Sorry for the typo, I'd like to say 512GB.
Maybe it will become 256TB, depending on the discussion in the future.

Jerry

>
> Regards,
>    Felix
>
>>
>> Christian also mentioned that PD should be 4k size, then 256T was
>> required.
>> When reach an agreement, we may update them all.
>>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
>>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
>>>    2 files changed, 10 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 3500da3..57ccac4 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -1042,10 +1042,10 @@ static bool amdgpu_check_pot_argument(int arg)
>>>    static void amdgpu_get_block_size(struct amdgpu_device *adev)
>>>    {
>>>        /* from AI, asic starts to support multiple level VMPT */
>>> -    if (adev->family >= AMDGPU_FAMILY_AI) {
>>> +    if (adev->asic_type >= CHIP_VEGA10) {
>>>            if (amdgpu_vm_block_size != 9)
>>> -            dev_warn(adev->dev, "Multi-VMPT limits block size to"
>>> -                 "one page!\n");
>>> +            dev_warn(adev->dev,
>>> +                 "Multi-VMPT limits block size to one page!\n");
>>>            amdgpu_vm_block_size = 9;
>>>            return;
>>>        }
>>
>> Nice catch.
>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index 1e4734d..df69aae 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -511,12 +511,6 @@ static int gmc_v9_0_vm_init(struct amdgpu_device
>>> *adev)
>>>         * amdkfd will use VMIDs 8-15
>>>         */
>>>        adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
>>> -    /* Because of four level VMPTs, vm size at least is 256GB.
>>> -    256TB is OK as well */
>>> -    if (amdgpu_vm_size < 256) {
>>> -        DRM_WARN("vm size at least is 256GB!\n");
>>> -        amdgpu_vm_size = 256;
>>> -    }
>>
>> David had a patch to fix it yesterday.
>> But your patch involves by vm size checking. :)
>>
>>>        adev->vm_manager.num_level = 3;
>>>        amdgpu_vm_manager_init(adev);
>>>
>>> @@ -563,11 +557,14 @@ static int gmc_v9_0_sw_init(void *handle)
>>>        if (r)
>>>            return r;
>>>
>>> -    /* Adjust VM size here.
>>> -     * Currently default to 64GB ((16 << 20) 4k pages).
>>> -     * Max GPUVM size is 48 bits.
>>> +    /* Because of four level VMPTs, vm size is at least 512GB.
>>> +     * The maximum size is 256TB (48bit).
>>>         */
>>> -    adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>>> +    if (amdgpu_vm_size < 512) {
>>> +        DRM_WARN("VM size is at least 512GB!\n");
>>> +        amdgpu_vm_size = 512;
>>> +    }
>>> +    adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
>>>
>>>        /* Set the internal MC address mask
>>>         * This is the max address of the GPU's
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found]         ` <58DB21CB.4090800-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  3:24           ` Kuehling, Felix
       [not found]             ` <DM5PR1201MB0235AB2363725D0D8320265292350-grEf7a3NxMBd8L2jMOIKKmrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Kuehling, Felix @ 2017-03-29  3:24 UTC (permalink / raw)
  To: Zhang, Jerry, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Koenig,
	Christian, Zhou, David(ChunMing)
  Cc: Deucher, Alexander, Russell, Kent


[-- Attachment #1.1: Type: text/plain, Size: 5297 bytes --]

Hi Jerry,

Let me clarify my understanding of the problem and the intention of the fix.

Address range per page: 4KB
Address range per PT (level 3): 2MB
Address range per PD (level 2): 1GB
Address range per PD (level 1): 512GB
Address range per PD (level 0): 256TB

Imagine for example a mapping that spans multiple level 2 page directories. Say from 0.5GB to 2.5GB.

At level 0:
from = 0
to = 0

At level 1:
from = 0
to = 2

So for level 2 amdgpu_vm_alloc_levels gets called 3 times (pt_idx = [0..2]). Without my change, it gets called with the same saddr and eaddr 3 times. So it calculates the same "from" and "to" each time:
from = 256 % 512 = 256
to = 767 % 512 = 255

Obviously that doesn't work. What we really want is this (from..to in level 2 when called for pt_idx values from level 1):

For pt_idx=0 we want to fill the second half of the level 2 page directory with page tables:
from = 256
to = 511

For pt_idx=1 we need to fill the entire level 2 page directories with page tables:
from = 0
to = 511

For pt_idx=2 we want to fill the first half of the level 2 page directory with page tables:
from = 0
to = 255

This creates a contiguous range of page tables across the three level 2 page directories that are spanned by the allocation. My change achieves that.

I think you're right, there are some extra high bits in saddr and eaddr, but they get discarded by the modulo-division in the next recursion level. For added clarity the modulo division (or masking of high bits) could be moved up to the caller.

Regards,
  Felix


--
F e l i x   K u e h l i n g
SMTS Software Development Engineer | Vertical Workstation/Compute
1 Commerce Valley Dr. East, Markham, ON L3T 7X6 Canada
(O) +1(289)695-1597
   _     _   _   _____   _____
  / \   | \ / | |  _  \  \ _  |
 / A \  | \M/ | | |D) )  /|_| |
/_/ \_\ |_| |_| |_____/ |__/ \|   facebook.com/AMD | amd.com
________________________________
From: Zhang, Jerry
Sent: Tuesday, March 28, 2017 10:54:03 PM
To: Kuehling, Felix; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; Koenig, Christian; Zhou, David(ChunMing)
Cc: Deucher, Alexander; Russell, Kent
Subject: Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs

On 03/29/2017 09:00 AM, Felix Kuehling wrote:
> Fix the start/end address calculation for address ranges that span
> multiple page directories in amdgpu_vm_alloc_levels.
>
> Add WARN_ONs if page tables aren't found. Otherwise the page table
> update would just fail silently.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++++---
>   1 file changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 818747f..44aa039 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -278,6 +278,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>        from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
>        to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
>
> +     WARN_ON(from > to);
> +
>        if (to > parent->last_entry_used)
>                parent->last_entry_used = to;
>
> @@ -312,8 +314,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>                }
>
>                if (level < adev->vm_manager.num_level) {
> -                     r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
> -                                                eaddr, level);
> +                     uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
> +                     uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
> +                             ((1 << shift) - 1);
> +                     r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
> +                                                sub_eaddr, level);

Do you mean to create a full sub-pt if pt_idx != from and != to?
I didn't catch it up clear.

In my perspective, we may need to clear the saddr and eaddr upper level bit
before going on to allocate the next level PT.
Otherwise, the number of next PT entry will be incorrect, more than num_entries
as the upper level PT number calculated in.

e.g.
there is a address contains
PD + PT1 + PT2 + PT3

for the first round, we could get correct "from" and "to" address with right
"shift"(3 blocks), then walk over the address space in PD range.

Then for the 2nd round, we cannot get the expected "from" and "to" address with
"shift"(2 blocks), as it walks over the address space in PD + PT1 range.
While we'd like it's in range PT1 indeed.

The fault way goes on with later round.

Perhaps, that's the root cause about the issue you face.

Jerry.

>                        if (r)
>                                return r;
>                }
> @@ -957,9 +962,11 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
>                entry = &entry->entries[idx];
>        }
>
> -     if (level)
> +     if (WARN_ON(level))
>                return NULL;
>
> +     WARN_ON(!entry->bo);
> +
>        return entry->bo;
>   }
>
>

[-- Attachment #1.2: Type: text/html, Size: 9991 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found]             ` <DM5PR1201MB0235AB2363725D0D8320265292350-grEf7a3NxMBd8L2jMOIKKmrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2017-03-29  5:58               ` Zhang, Jerry (Junwei)
       [not found]                 ` <58DB4D02.7020903-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-29  5:58 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Koenig, Christian, Zhou, David(ChunMing)
  Cc: Deucher, Alexander, Russell, Kent

Hi Felix,

Thanks for your illustration with patience.
I got your meaning then, and thanks to fix that.

 > I think you're right, there are some extra high bits in saddr and eaddr, but
 > they get discarded by the modulo-division in the next recursion level. For
 > added clarity the modulo division (or masking of high bits) could be moved up
 > to the caller.

It may be more clear if modulo-division called before the caller.
Anyway,
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>


On 03/29/2017 11:24 AM, Kuehling, Felix wrote:
> Hi Jerry,
>
> Let me clarify my understanding of the problem and the intention of the fix.
>
> Address range per page: 4KB
> Address range per PT (level 3): 2MB
> Address range per PD (level 2): 1GB
> Address range per PD (level 1): 512GB
> Address range per PD (level 0): 256TB
>
> Imagine for example a mapping that spans multiple level 2 page directories. Say
> from 0.5GB to 2.5GB.
>
> At level 0:
> from = 0
> to = 0
>
> At level 1:
> from = 0
> to = 2
>
> So for level 2 amdgpu_vm_alloc_levels gets called 3 times (pt_idx = [0..2]).
> Without my change, it gets called with the same saddr and eaddr 3 times. So it
> calculates the same "from" and "to" each time:
> from = 256 % 512 = 256
> to = 767 % 512 = 255
>
> Obviously that doesn't work. What we really want is this (from..to in level 2
> when called for pt_idx values from level 1):
>
> For pt_idx=0 we want to fill the second half of the level 2 page directory with
> page tables:
> from = 256
> to = 511
>
> For pt_idx=1 we need to fill the entire level 2 page directories with page tables:
> from = 0
> to = 511
>
> For pt_idx=2 we want to fill the first half of the level 2 page directory with
> page tables:
> from = 0
> to = 255
>
> This creates a contiguous range of page tables across the three level 2 page
> directories that are spanned by the allocation. My change achieves that.
>
> I think you're right, there are some extra high bits in saddr and eaddr, but
> they get discarded by the modulo-division in the next recursion level. For
> added clarity the modulo division (or masking of high bits) could be moved up
> to the caller.
>
> Regards,
>    Felix
>
>
> --
> F e l i x   K u e h l i n g
> SMTS Software Development Engineer | Vertical Workstation/Compute
> 1 Commerce Valley Dr. East, Markham, ON L3T 7X6 Canada
> (O) +1(289)695-1597
>     _     _   _   _____   _____
>    / \   | \ / | |  _  \  \ _  |
>   / A \  | \M/ | | |D) )  /|_| |
> /_/ \_\ |_| |_| |_____/ |__/ \|   facebook.com/AMD | amd.com
> -------------------------------------------------------------------------------
> *From:* Zhang, Jerry
> *Sent:* Tuesday, March 28, 2017 10:54:03 PM
> *To:* Kuehling, Felix; amd-gfx@lists.freedesktop.org; Koenig, Christian; Zhou,
> David(ChunMing)
> *Cc:* Deucher, Alexander; Russell, Kent
> *Subject:* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for
> large BOs
> On 03/29/2017 09:00 AM, Felix Kuehling wrote:
>> Fix the start/end address calculation for address ranges that span
>> multiple page directories in amdgpu_vm_alloc_levels.
>>
>> Add WARN_ONs if page tables aren't found. Otherwise the page table
>> update would just fail silently.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++++---
>>   1 file changed, 10 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 818747f..44aa039 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -278,6 +278,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>>        from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>        to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>
>> +     WARN_ON(from > to);
>> +
>>        if (to > parent->last_entry_used)
>>                parent->last_entry_used = to;
>>
>> @@ -312,8 +314,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
>>                }
>>
>>                if (level < adev->vm_manager.num_level) {
>> -                     r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
>> -                                                eaddr, level);
>> +                     uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
>> +                     uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
>> +                             ((1 << shift) - 1);
>> +                     r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
>> +                                                sub_eaddr, level);
>
> Do you mean to create a full sub-pt if pt_idx != from and != to?
> I didn't catch it up clear.
>
> In my perspective, we may need to clear the saddr and eaddr upper level bit
> before going on to allocate the next level PT.
> Otherwise, the number of next PT entry will be incorrect, more than num_entries
> as the upper level PT number calculated in.
>
> e.g.
> there is a address contains
> PD + PT1 + PT2 + PT3
>
> for the first round, we could get correct "from" and "to" address with right
> "shift"(3 blocks), then walk over the address space in PD range.
>
> Then for the 2nd round, we cannot get the expected "from" and "to" address with
> "shift"(2 blocks), as it walks over the address space in PD + PT1 range.
> While we'd like it's in range PT1 indeed.
>
> The fault way goes on with later round.
>
> Perhaps, that's the root cause about the issue you face.
>
> Jerry.
>
>>                        if (r)
>>                                return r;
>>                }
>> @@ -957,9 +962,11 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
>>                entry = &entry->entries[idx];
>>        }
>>
>> -     if (level)
>> +     if (WARN_ON(level))
>>                return NULL;
>>
>> +     WARN_ON(!entry->bo);
>> +
>>        return entry->bo;
>>   }
>>
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/3] Fixes for multi-level page tables
       [not found] ` <1490749231-16157-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-03-29  2:34   ` [PATCH 0/3] Fixes for multi-level page tables zhoucm1
@ 2017-03-29  6:00   ` Zhang, Jerry (Junwei)
       [not found]     ` <58DB4D73.5000408-5C7GfCeVMHo@public.gmane.org>
  4 siblings, 1 reply; 22+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-29  6:00 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo

On 03/29/2017 09:00 AM, Felix Kuehling wrote:
> I worked on these fixes on amd-kfd-staging with a merge of the recent multi-
> level page table changes. With these changes KFDTest passes on Vega10,
> including some tests that tend to stress VM memory management.
>
> They applied cleanly to current amd-staging-4.9 but are only compile-tested
> on that branch.

Patch 1 and 3
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

Sorry I didn't got the patch 2 in mailbox.
Am I missing anything?

>
> Felix Kuehling (3):
>    drm/amdgpu: Make max_pfn 64-bit
>    drm/amdgpu: Fix Vega10 VM initialization
>    drm/amdgpu: Fix multi-level page table bugs for large BOs
>
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 17 ++++++++++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  2 +-
>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c   |  5 +++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c    |  5 +++--
>   6 files changed, 29 insertions(+), 23 deletions(-)
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: Make max_pfn 64-bit
       [not found]     ` <1490749231-16157-2-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  6:44       ` Christian König
  0 siblings, 0 replies; 22+ messages in thread
From: Christian König @ 2017-03-29  6:44 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo

Am 29.03.2017 um 03:00 schrieb Felix Kuehling:
> With 4-level page tables the maximum VM size is 256TB. That's 64G
> pages, which can't be represented in 32-bit.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   | 2 +-
>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 5 +++--
>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 5 +++--
>   4 files changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f4cb7de..818747f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -336,7 +336,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>   			struct amdgpu_vm *vm,
>   			uint64_t saddr, uint64_t size)
>   {
> -	unsigned last_pfn;
> +	uint64_t last_pfn;
>   	uint64_t eaddr;
>   
>   	/* validate the parameters */
> @@ -346,7 +346,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>   	eaddr = saddr + size - 1;
>   	last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
>   	if (last_pfn >= adev->vm_manager.max_pfn) {
> -		dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n",
> +		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
>   			last_pfn, adev->vm_manager.max_pfn);
>   		return -EINVAL;
>   	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index b5e5cdd..102b1f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -153,7 +153,7 @@ struct amdgpu_vm_manager {
>   	u64					fence_context;
>   	unsigned				seqno[AMDGPU_MAX_RINGS];
>   
> -	uint32_t				max_pfn;
> +	uint64_t				max_pfn;
>   	uint32_t				num_level;
>   	/* vram base address for page table entry  */
>   	u64					vram_base_offset;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> index 5604a53..dd48637 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> @@ -227,8 +227,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
>   		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
>   		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
>   		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2,
> -				adev->vm_manager.max_pfn - 1);
> -		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, 0);
> +			lower_32_bits(adev->vm_manager.max_pfn - 1));
> +		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2,
> +			upper_32_bits(adev->vm_manager.max_pfn - 1));
>   	}
>   
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> index 5903bb0..c1dc8c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> @@ -247,8 +247,9 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
>   		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
>   		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
>   		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2,
> -				adev->vm_manager.max_pfn - 1);
> -		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, 0);
> +			lower_32_bits(adev->vm_manager.max_pfn - 1));
> +		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2,
> +			upper_32_bits(adev->vm_manager.max_pfn - 1));
>   	}
>   
>   	return 0;


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] drm/amdgpu: Fix Vega10 VM initialization
       [not found]             ` <0091f779-a045-7009-3746-6957ccacd62f-5C7GfCeVMHo@public.gmane.org>
  2017-03-29  2:56               ` Zhang, Jerry (Junwei)
@ 2017-03-29  6:47               ` Christian König
       [not found]                 ` <b0d2a78d-ab7d-f5b7-a35e-a9d6c80771a7-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 22+ messages in thread
From: Christian König @ 2017-03-29  6:47 UTC (permalink / raw)
  To: Felix Kuehling, Zhang, Jerry (Junwei),
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo

Am 29.03.2017 um 03:48 schrieb Felix Kuehling:
> On 17-03-28 09:39 PM, Zhang, Jerry (Junwei) wrote:
>> On 03/29/2017 09:00 AM, Felix Kuehling wrote:
>>> adev->family is not initialized yet when amdgpu_get_block_size is
>>> called. Use adev->asic_type instead.
>>>
>>> Minimum VM size is 512GB, not 256GB, for a single page table entry
>>> in the root page table.
>>>
>>> gmc_v9_0_vm_init is called after adev->vm_manager.max_pfn is
>>> initialized. Move the minimum VM-size enforcement ahead of max_pfn
>>> initializtion. Cast to 64-bit before the left-shift.
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>>
>> Just note:
>> For now, it's OK to set the minimum vm size 256G,
>> In this case, there is no PD bit anyway.
> With VM size of 256GB, the amdgpu_vm_bo_size(adev, 0) calculates 0, and
> the page directory allocation with amdgpu_bo_create fails in amdgpu_vm_init.
>
> In other words, amdgpu_vm_num_entries(adev, 0) needs to return at least
> 1. That means vm_size needs to be at least 512GB.

Those fixes are correct, but doesn't address the real problem.

The intention of amdgpu_vm_size and amgpu_vm_block_size is to save 
memory by reducing the size of the PD/PTs.

Since we now use 4 level PDs/PTs the size of each is 4KB, which is 
usually the smallest allocation size anyway.

So I suggest to just drop support for amdgpu_vm_size for Vega10 and 
always use 256TB instead.

Jerry do you want to take care of this or should I look into it? Should 
be trivial to do.

Regards,
Christian.

>
> Regards,
>    Felix
>
>> Christian also mentioned that PD should be 4k size, then 256T was
>> required.
>> When reach an agreement, we may update them all.
>>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
>>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
>>>    2 files changed, 10 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 3500da3..57ccac4 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -1042,10 +1042,10 @@ static bool amdgpu_check_pot_argument(int arg)
>>>    static void amdgpu_get_block_size(struct amdgpu_device *adev)
>>>    {
>>>        /* from AI, asic starts to support multiple level VMPT */
>>> -    if (adev->family >= AMDGPU_FAMILY_AI) {
>>> +    if (adev->asic_type >= CHIP_VEGA10) {
>>>            if (amdgpu_vm_block_size != 9)
>>> -            dev_warn(adev->dev, "Multi-VMPT limits block size to"
>>> -                 "one page!\n");
>>> +            dev_warn(adev->dev,
>>> +                 "Multi-VMPT limits block size to one page!\n");
>>>            amdgpu_vm_block_size = 9;
>>>            return;
>>>        }
>> Nice catch.
>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index 1e4734d..df69aae 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -511,12 +511,6 @@ static int gmc_v9_0_vm_init(struct amdgpu_device
>>> *adev)
>>>         * amdkfd will use VMIDs 8-15
>>>         */
>>>        adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
>>> -    /* Because of four level VMPTs, vm size at least is 256GB.
>>> -    256TB is OK as well */
>>> -    if (amdgpu_vm_size < 256) {
>>> -        DRM_WARN("vm size at least is 256GB!\n");
>>> -        amdgpu_vm_size = 256;
>>> -    }
>> David had a patch to fix it yesterday.
>> But your patch involves by vm size checking. :)
>>
>>>        adev->vm_manager.num_level = 3;
>>>        amdgpu_vm_manager_init(adev);
>>>
>>> @@ -563,11 +557,14 @@ static int gmc_v9_0_sw_init(void *handle)
>>>        if (r)
>>>            return r;
>>>
>>> -    /* Adjust VM size here.
>>> -     * Currently default to 64GB ((16 << 20) 4k pages).
>>> -     * Max GPUVM size is 48 bits.
>>> +    /* Because of four level VMPTs, vm size is at least 512GB.
>>> +     * The maximum size is 256TB (48bit).
>>>         */
>>> -    adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>>> +    if (amdgpu_vm_size < 512) {
>>> +        DRM_WARN("VM size is at least 512GB!\n");
>>> +        amdgpu_vm_size = 512;
>>> +    }
>>> +    adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
>>>
>>>        /* Set the internal MC address mask
>>>         * This is the max address of the GPU's
>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found]                 ` <58DB4D02.7020903-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  6:52                   ` Christian König
       [not found]                     ` <8acdbd65-f99d-3e0e-b61b-318cf27d7a3b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Christian König @ 2017-03-29  6:52 UTC (permalink / raw)
  To: Zhang, Jerry (Junwei),
	Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Koenig, Christian, Zhou, David(ChunMing)
  Cc: Deucher, Alexander, Russell, Kent

Am 29.03.2017 um 07:58 schrieb Zhang, Jerry (Junwei):
> Hi Felix,
>
> Thanks for your illustration with patience.
> I got your meaning then, and thanks to fix that.
>
> > I think you're right, there are some extra high bits in saddr and 
> eaddr, but
> > they get discarded by the modulo-division in the next recursion 
> level. For
> > added clarity the modulo division (or masking of high bits) could be 
> moved up
> > to the caller.
>
> It may be more clear if modulo-division called before the caller.

Yeah, indeed that isn't easy to get on first glance.

Please clean that up and also remove all those WARN_ON(), we don't want 
to spam the system log with backtraces like this.

Regards,
Christian.

> Anyway,
> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>
>
> On 03/29/2017 11:24 AM, Kuehling, Felix wrote:
>> Hi Jerry,
>>
>> Let me clarify my understanding of the problem and the intention of 
>> the fix.
>>
>> Address range per page: 4KB
>> Address range per PT (level 3): 2MB
>> Address range per PD (level 2): 1GB
>> Address range per PD (level 1): 512GB
>> Address range per PD (level 0): 256TB
>>
>> Imagine for example a mapping that spans multiple level 2 page 
>> directories. Say
>> from 0.5GB to 2.5GB.
>>
>> At level 0:
>> from = 0
>> to = 0
>>
>> At level 1:
>> from = 0
>> to = 2
>>
>> So for level 2 amdgpu_vm_alloc_levels gets called 3 times (pt_idx = 
>> [0..2]).
>> Without my change, it gets called with the same saddr and eaddr 3 
>> times. So it
>> calculates the same "from" and "to" each time:
>> from = 256 % 512 = 256
>> to = 767 % 512 = 255
>>
>> Obviously that doesn't work. What we really want is this (from..to in 
>> level 2
>> when called for pt_idx values from level 1):
>>
>> For pt_idx=0 we want to fill the second half of the level 2 page 
>> directory with
>> page tables:
>> from = 256
>> to = 511
>>
>> For pt_idx=1 we need to fill the entire level 2 page directories with 
>> page tables:
>> from = 0
>> to = 511
>>
>> For pt_idx=2 we want to fill the first half of the level 2 page 
>> directory with
>> page tables:
>> from = 0
>> to = 255
>>
>> This creates a contiguous range of page tables across the three level 
>> 2 page
>> directories that are spanned by the allocation. My change achieves that.
>>
>> I think you're right, there are some extra high bits in saddr and 
>> eaddr, but
>> they get discarded by the modulo-division in the next recursion 
>> level. For
>> added clarity the modulo division (or masking of high bits) could be 
>> moved up
>> to the caller.
>>
>> Regards,
>>    Felix
>>
>>
>> -- 
>> F e l i x   K u e h l i n g
>> SMTS Software Development Engineer | Vertical Workstation/Compute
>> 1 Commerce Valley Dr. East, Markham, ON L3T 7X6 Canada
>> (O) +1(289)695-1597
>>     _     _   _   _____   _____
>>    / \   | \ / | |  _  \  \ _  |
>>   / A \  | \M/ | | |D) )  /|_| |
>> /_/ \_\ |_| |_| |_____/ |__/ \|   facebook.com/AMD | amd.com
>> ------------------------------------------------------------------------------- 
>>
>> *From:* Zhang, Jerry
>> *Sent:* Tuesday, March 28, 2017 10:54:03 PM
>> *To:* Kuehling, Felix; amd-gfx@lists.freedesktop.org; Koenig, 
>> Christian; Zhou,
>> David(ChunMing)
>> *Cc:* Deucher, Alexander; Russell, Kent
>> *Subject:* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table 
>> bugs for
>> large BOs
>> On 03/29/2017 09:00 AM, Felix Kuehling wrote:
>>> Fix the start/end address calculation for address ranges that span
>>> multiple page directories in amdgpu_vm_alloc_levels.
>>>
>>> Add WARN_ONs if page tables aren't found. Otherwise the page table
>>> update would just fail silently.
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++++---
>>>   1 file changed, 10 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 818747f..44aa039 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -278,6 +278,8 @@ static int amdgpu_vm_alloc_levels(struct 
>>> amdgpu_device *adev,
>>>        from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>>        to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>>
>>> +     WARN_ON(from > to);
>>> +
>>>        if (to > parent->last_entry_used)
>>>                parent->last_entry_used = to;
>>>
>>> @@ -312,8 +314,11 @@ static int amdgpu_vm_alloc_levels(struct 
>>> amdgpu_device *adev,
>>>                }
>>>
>>>                if (level < adev->vm_manager.num_level) {
>>> -                     r = amdgpu_vm_alloc_levels(adev, vm, entry, 
>>> saddr,
>>> -                                                eaddr, level);
>>> +                     uint64_t sub_saddr = (pt_idx == from) ? saddr 
>>> : 0;
>>> +                     uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
>>> +                             ((1 << shift) - 1);
>>> +                     r = amdgpu_vm_alloc_levels(adev, vm, entry, 
>>> sub_saddr,
>>> +                                                sub_eaddr, level);
>>
>> Do you mean to create a full sub-pt if pt_idx != from and != to?
>> I didn't catch it up clear.
>>
>> In my perspective, we may need to clear the saddr and eaddr upper 
>> level bit
>> before going on to allocate the next level PT.
>> Otherwise, the number of next PT entry will be incorrect, more than 
>> num_entries
>> as the upper level PT number calculated in.
>>
>> e.g.
>> there is a address contains
>> PD + PT1 + PT2 + PT3
>>
>> for the first round, we could get correct "from" and "to" address 
>> with right
>> "shift"(3 blocks), then walk over the address space in PD range.
>>
>> Then for the 2nd round, we cannot get the expected "from" and "to" 
>> address with
>> "shift"(2 blocks), as it walks over the address space in PD + PT1 range.
>> While we'd like it's in range PT1 indeed.
>>
>> The fault way goes on with later round.
>>
>> Perhaps, that's the root cause about the issue you face.
>>
>> Jerry.
>>
>>>                        if (r)
>>>                                return r;
>>>                }
>>> @@ -957,9 +962,11 @@ static struct amdgpu_bo 
>>> *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
>>>                entry = &entry->entries[idx];
>>>        }
>>>
>>> -     if (level)
>>> +     if (WARN_ON(level))
>>>                return NULL;
>>>
>>> +     WARN_ON(!entry->bo);
>>> +
>>>        return entry->bo;
>>>   }
>>>
>>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] drm/amdgpu: Fix Vega10 VM initialization
       [not found]                 ` <b0d2a78d-ab7d-f5b7-a35e-a9d6c80771a7-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  7:09                   ` Zhang, Jerry (Junwei)
  0 siblings, 0 replies; 22+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-29  7:09 UTC (permalink / raw)
  To: Christian König, Felix Kuehling,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo

On 03/29/2017 02:47 PM, Christian König wrote:
> Am 29.03.2017 um 03:48 schrieb Felix Kuehling:
>> On 17-03-28 09:39 PM, Zhang, Jerry (Junwei) wrote:
>>> On 03/29/2017 09:00 AM, Felix Kuehling wrote:
>>>> adev->family is not initialized yet when amdgpu_get_block_size is
>>>> called. Use adev->asic_type instead.
>>>>
>>>> Minimum VM size is 512GB, not 256GB, for a single page table entry
>>>> in the root page table.
>>>>
>>>> gmc_v9_0_vm_init is called after adev->vm_manager.max_pfn is
>>>> initialized. Move the minimum VM-size enforcement ahead of max_pfn
>>>> initializtion. Cast to 64-bit before the left-shift.
>>>>
>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>>>
>>> Just note:
>>> For now, it's OK to set the minimum vm size 256G,
>>> In this case, there is no PD bit anyway.
>> With VM size of 256GB, the amdgpu_vm_bo_size(adev, 0) calculates 0, and
>> the page directory allocation with amdgpu_bo_create fails in amdgpu_vm_init.
>>
>> In other words, amdgpu_vm_num_entries(adev, 0) needs to return at least
>> 1. That means vm_size needs to be at least 512GB.
>
> Those fixes are correct, but doesn't address the real problem.
>
> The intention of amdgpu_vm_size and amgpu_vm_block_size is to save memory by
> reducing the size of the PD/PTs.
>
> Since we now use 4 level PDs/PTs the size of each is 4KB, which is usually the
> smallest allocation size anyway.

I remember we use 512B (9-bit) for each PD/PT for Vega10.

>
> So I suggest to just drop support for amdgpu_vm_size for Vega10 and always use
> 256TB instead.
>
> Jerry do you want to take care of this or should I look into it? Should be
> trivial to do.

Do you mean to fix the vm_size for Vega10?
Yes, I will take over it.

Jerry

>
> Regards,
> Christian.
>
>>
>> Regards,
>>    Felix
>>
>>> Christian also mentioned that PD should be 4k size, then 256T was
>>> required.
>>> When reach an agreement, we may update them all.
>>>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
>>>>    drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
>>>>    2 files changed, 10 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 3500da3..57ccac4 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -1042,10 +1042,10 @@ static bool amdgpu_check_pot_argument(int arg)
>>>>    static void amdgpu_get_block_size(struct amdgpu_device *adev)
>>>>    {
>>>>        /* from AI, asic starts to support multiple level VMPT */
>>>> -    if (adev->family >= AMDGPU_FAMILY_AI) {
>>>> +    if (adev->asic_type >= CHIP_VEGA10) {
>>>>            if (amdgpu_vm_block_size != 9)
>>>> -            dev_warn(adev->dev, "Multi-VMPT limits block size to"
>>>> -                 "one page!\n");
>>>> +            dev_warn(adev->dev,
>>>> +                 "Multi-VMPT limits block size to one page!\n");
>>>>            amdgpu_vm_block_size = 9;
>>>>            return;
>>>>        }
>>> Nice catch.
>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> index 1e4734d..df69aae 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> @@ -511,12 +511,6 @@ static int gmc_v9_0_vm_init(struct amdgpu_device
>>>> *adev)
>>>>         * amdkfd will use VMIDs 8-15
>>>>         */
>>>>        adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
>>>> -    /* Because of four level VMPTs, vm size at least is 256GB.
>>>> -    256TB is OK as well */
>>>> -    if (amdgpu_vm_size < 256) {
>>>> -        DRM_WARN("vm size at least is 256GB!\n");
>>>> -        amdgpu_vm_size = 256;
>>>> -    }
>>> David had a patch to fix it yesterday.
>>> But your patch involves by vm size checking. :)
>>>
>>>>        adev->vm_manager.num_level = 3;
>>>>        amdgpu_vm_manager_init(adev);
>>>>
>>>> @@ -563,11 +557,14 @@ static int gmc_v9_0_sw_init(void *handle)
>>>>        if (r)
>>>>            return r;
>>>>
>>>> -    /* Adjust VM size here.
>>>> -     * Currently default to 64GB ((16 << 20) 4k pages).
>>>> -     * Max GPUVM size is 48 bits.
>>>> +    /* Because of four level VMPTs, vm size is at least 512GB.
>>>> +     * The maximum size is 256TB (48bit).
>>>>         */
>>>> -    adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>>>> +    if (amdgpu_vm_size < 512) {
>>>> +        DRM_WARN("VM size is at least 512GB!\n");
>>>> +        amdgpu_vm_size = 512;
>>>> +    }
>>>> +    adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
>>>>
>>>>        /* Set the internal MC address mask
>>>>         * This is the max address of the GPU's
>>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/3] Fixes for multi-level page tables
       [not found]     ` <58DB4D73.5000408-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29  7:10       ` Zhang, Jerry (Junwei)
  0 siblings, 0 replies; 22+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-29  7:10 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Christian.Koenig-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
  Cc: Alexander.Deucher-5C7GfCeVMHo, Kent.Russell-5C7GfCeVMHo


On 03/29/2017 02:00 PM, Zhang, Jerry (Junwei) wrote:
> On 03/29/2017 09:00 AM, Felix Kuehling wrote:
>> I worked on these fixes on amd-kfd-staging with a merge of the recent multi-
>> level page table changes. With these changes KFDTest passes on Vega10,
>> including some tests that tend to stress VM memory management.
>>
>> They applied cleanly to current amd-staging-4.9 but are only compile-tested
>> on that branch.
>
> Patch 1 and 3
> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>
> Sorry I didn't got the patch 2 in mailbox.
> Am I missing anything?

I found the patch 2 in my vega10 folder.
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

Jerry

>
>>
>> Felix Kuehling (3):
>>    drm/amdgpu: Make max_pfn 64-bit
>>    drm/amdgpu: Fix Vega10 VM initialization
>>    drm/amdgpu: Fix multi-level page table bugs for large BOs
>>
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 +++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 17 ++++++++++++-----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c   |  5 +++--
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 17 +++++++----------
>>   drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c    |  5 +++--
>>   6 files changed, 29 insertions(+), 23 deletions(-)
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found]                     ` <8acdbd65-f99d-3e0e-b61b-318cf27d7a3b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-03-29  7:18                       ` Zhang, Jerry (Junwei)
  2017-03-29 14:32                       ` Felix Kuehling
  1 sibling, 0 replies; 22+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-29  7:18 UTC (permalink / raw)
  To: Christian König, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Koenig, Christian,
	Zhou, David(ChunMing)
  Cc: Deucher, Alexander, Russell, Kent

On 03/29/2017 02:52 PM, Christian König wrote:
> Am 29.03.2017 um 07:58 schrieb Zhang, Jerry (Junwei):
>> Hi Felix,
>>
>> Thanks for your illustration with patience.
>> I got your meaning then, and thanks to fix that.
>>
>> > I think you're right, there are some extra high bits in saddr and eaddr, but
>> > they get discarded by the modulo-division in the next recursion level. For
>> > added clarity the modulo division (or masking of high bits) could be moved up
>> > to the caller.
>>
>> It may be more clear if modulo-division called before the caller.
>
> Yeah, indeed that isn't easy to get on first glance.

Understanding :)

>
> Please clean that up and also remove all those WARN_ON(), we don't want to spam
> the system log with backtraces like this.
>
> Regards,
> Christian.
>
>> Anyway,
>> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>>
>>
>> On 03/29/2017 11:24 AM, Kuehling, Felix wrote:
>>> Hi Jerry,
>>>
>>> Let me clarify my understanding of the problem and the intention of the fix.
>>>
>>> Address range per page: 4KB
>>> Address range per PT (level 3): 2MB
>>> Address range per PD (level 2): 1GB
>>> Address range per PD (level 1): 512GB
>>> Address range per PD (level 0): 256TB
>>>
>>> Imagine for example a mapping that spans multiple level 2 page directories. Say
>>> from 0.5GB to 2.5GB.
>>>
>>> At level 0:
>>> from = 0
>>> to = 0
>>>
>>> At level 1:
>>> from = 0
>>> to = 2
>>>
>>> So for level 2 amdgpu_vm_alloc_levels gets called 3 times (pt_idx = [0..2]).
>>> Without my change, it gets called with the same saddr and eaddr 3 times. So it
>>> calculates the same "from" and "to" each time:
>>> from = 256 % 512 = 256
>>> to = 767 % 512 = 255
>>>
>>> Obviously that doesn't work. What we really want is this (from..to in level 2
>>> when called for pt_idx values from level 1):
>>>
>>> For pt_idx=0 we want to fill the second half of the level 2 page directory with
>>> page tables:
>>> from = 256
>>> to = 511
>>>
>>> For pt_idx=1 we need to fill the entire level 2 page directories with page
>>> tables:
>>> from = 0
>>> to = 511
>>>
>>> For pt_idx=2 we want to fill the first half of the level 2 page directory with
>>> page tables:
>>> from = 0
>>> to = 255
>>>
>>> This creates a contiguous range of page tables across the three level 2 page
>>> directories that are spanned by the allocation. My change achieves that.
>>>
>>> I think you're right, there are some extra high bits in saddr and eaddr, but
>>> they get discarded by the modulo-division in the next recursion level. For
>>> added clarity the modulo division (or masking of high bits) could be moved up
>>> to the caller.
>>>
>>> Regards,
>>>    Felix
>>>
>>>
>>> --
>>> F e l i x   K u e h l i n g
>>> SMTS Software Development Engineer | Vertical Workstation/Compute
>>> 1 Commerce Valley Dr. East, Markham, ON L3T 7X6 Canada
>>> (O) +1(289)695-1597
>>>     _     _   _   _____   _____
>>>    / \   | \ / | |  _  \  \ _  |
>>>   / A \  | \M/ | | |D) )  /|_| |
>>> /_/ \_\ |_| |_| |_____/ |__/ \|   facebook.com/AMD | amd.com
>>> -------------------------------------------------------------------------------
>>> *From:* Zhang, Jerry
>>> *Sent:* Tuesday, March 28, 2017 10:54:03 PM
>>> *To:* Kuehling, Felix; amd-gfx@lists.freedesktop.org; Koenig, Christian; Zhou,
>>> David(ChunMing)
>>> *Cc:* Deucher, Alexander; Russell, Kent
>>> *Subject:* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for
>>> large BOs
>>> On 03/29/2017 09:00 AM, Felix Kuehling wrote:
>>>> Fix the start/end address calculation for address ranges that span
>>>> multiple page directories in amdgpu_vm_alloc_levels.
>>>>
>>>> Add WARN_ONs if page tables aren't found. Otherwise the page table
>>>> update would just fail silently.
>>>>
>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++++---
>>>>   1 file changed, 10 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 818747f..44aa039 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -278,6 +278,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device
>>>> *adev,
>>>>        from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>>>        to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>>>
>>>> +     WARN_ON(from > to);
>>>> +
>>>>        if (to > parent->last_entry_used)
>>>>                parent->last_entry_used = to;
>>>>
>>>> @@ -312,8 +314,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device
>>>> *adev,
>>>>                }
>>>>
>>>>                if (level < adev->vm_manager.num_level) {
>>>> -                     r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
>>>> -                                                eaddr, level);
>>>> +                     uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
>>>> +                     uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
>>>> +                             ((1 << shift) - 1);
>>>> +                     r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
>>>> +                                                sub_eaddr, level);
>>>
>>> Do you mean to create a full sub-pt if pt_idx != from and != to?
>>> I didn't catch it up clear.
>>>
>>> In my perspective, we may need to clear the saddr and eaddr upper level bit
>>> before going on to allocate the next level PT.
>>> Otherwise, the number of next PT entry will be incorrect, more than num_entries
>>> as the upper level PT number calculated in.
>>>
>>> e.g.
>>> there is a address contains
>>> PD + PT1 + PT2 + PT3
>>>
>>> for the first round, we could get correct "from" and "to" address with right
>>> "shift"(3 blocks), then walk over the address space in PD range.
>>>
>>> Then for the 2nd round, we cannot get the expected "from" and "to" address with
>>> "shift"(2 blocks), as it walks over the address space in PD + PT1 range.
>>> While we'd like it's in range PT1 indeed.
>>>
>>> The fault way goes on with later round.
>>>
>>> Perhaps, that's the root cause about the issue you face.
>>>
>>> Jerry.
>>>
>>>>                        if (r)
>>>>                                return r;
>>>>                }
>>>> @@ -957,9 +962,11 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct
>>>> amdgpu_pte_update_params *p,
>>>>                entry = &entry->entries[idx];
>>>>        }
>>>>
>>>> -     if (level)
>>>> +     if (WARN_ON(level))
>>>>                return NULL;
>>>>
>>>> +     WARN_ON(!entry->bo);
>>>> +
>>>>        return entry->bo;
>>>>   }
>>>>
>>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found]                     ` <8acdbd65-f99d-3e0e-b61b-318cf27d7a3b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-03-29  7:18                       ` Zhang, Jerry (Junwei)
@ 2017-03-29 14:32                       ` Felix Kuehling
       [not found]                         ` <bdbacee3-2c61-ebcb-1421-5c0a77dc27e0-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 22+ messages in thread
From: Felix Kuehling @ 2017-03-29 14:32 UTC (permalink / raw)
  To: Christian König, Zhang, Jerry (Junwei),
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Koenig, Christian,
	Zhou, David(ChunMing)
  Cc: Deucher, Alexander, Russell, Kent

On 17-03-29 02:52 AM, Christian König wrote:
> Am 29.03.2017 um 07:58 schrieb Zhang, Jerry (Junwei):
>> Hi Felix,
>>
>> Thanks for your illustration with patience.
>> I got your meaning then, and thanks to fix that.
>>
>> > I think you're right, there are some extra high bits in saddr and
>> eaddr, but
>> > they get discarded by the modulo-division in the next recursion
>> level. For
>> > added clarity the modulo division (or masking of high bits) could
>> be moved up
>> > to the caller.
>>
>> It may be more clear if modulo-division called before the caller.
>
> Yeah, indeed that isn't easy to get on first glance.

OK, I'll fix that.

>
> Please clean that up and also remove all those WARN_ON(), we don't
> want to spam the system log with backtraces like this.

I disagree. When you see the backtraces from WARN_ONs in
amdgpu_vm_get_pt, it means the driver is trying to update a page table
that hasn't been allocated. Without those WARN_ONs there is absolutely
no warning of any kind - amdgpu_vm_update_pte just fails silently (it's
a void function).

Therefore I wouldn't call those warnings spam. Given that the VM PT
allocation code for multi-levels is non-trivial, I think that's a good
sanity check to have and really helpful to see in a log when user report
random VM faults.

Regards,
  Felix

>
> Regards,
> Christian.
>
>> Anyway,
>> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>>
>>
>> On 03/29/2017 11:24 AM, Kuehling, Felix wrote:
>>> Hi Jerry,
>>>
>>> Let me clarify my understanding of the problem and the intention of
>>> the fix.
>>>
>>> Address range per page: 4KB
>>> Address range per PT (level 3): 2MB
>>> Address range per PD (level 2): 1GB
>>> Address range per PD (level 1): 512GB
>>> Address range per PD (level 0): 256TB
>>>
>>> Imagine for example a mapping that spans multiple level 2 page
>>> directories. Say
>>> from 0.5GB to 2.5GB.
>>>
>>> At level 0:
>>> from = 0
>>> to = 0
>>>
>>> At level 1:
>>> from = 0
>>> to = 2
>>>
>>> So for level 2 amdgpu_vm_alloc_levels gets called 3 times (pt_idx =
>>> [0..2]).
>>> Without my change, it gets called with the same saddr and eaddr 3
>>> times. So it
>>> calculates the same "from" and "to" each time:
>>> from = 256 % 512 = 256
>>> to = 767 % 512 = 255
>>>
>>> Obviously that doesn't work. What we really want is this (from..to
>>> in level 2
>>> when called for pt_idx values from level 1):
>>>
>>> For pt_idx=0 we want to fill the second half of the level 2 page
>>> directory with
>>> page tables:
>>> from = 256
>>> to = 511
>>>
>>> For pt_idx=1 we need to fill the entire level 2 page directories
>>> with page tables:
>>> from = 0
>>> to = 511
>>>
>>> For pt_idx=2 we want to fill the first half of the level 2 page
>>> directory with
>>> page tables:
>>> from = 0
>>> to = 255
>>>
>>> This creates a contiguous range of page tables across the three
>>> level 2 page
>>> directories that are spanned by the allocation. My change achieves
>>> that.
>>>
>>> I think you're right, there are some extra high bits in saddr and
>>> eaddr, but
>>> they get discarded by the modulo-division in the next recursion
>>> level. For
>>> added clarity the modulo division (or masking of high bits) could be
>>> moved up
>>> to the caller.
>>>
>>> Regards,
>>>    Felix
>>>
>>>
>>> -- 
>>> F e l i x   K u e h l i n g
>>> SMTS Software Development Engineer | Vertical Workstation/Compute
>>> 1 Commerce Valley Dr. East, Markham, ON L3T 7X6 Canada
>>> (O) +1(289)695-1597
>>>     _     _   _   _____   _____
>>>    / \   | \ / | |  _  \  \ _  |
>>>   / A \  | \M/ | | |D) )  /|_| |
>>> /_/ \_\ |_| |_| |_____/ |__/ \|   facebook.com/AMD | amd.com
>>> -------------------------------------------------------------------------------
>>>
>>> *From:* Zhang, Jerry
>>> *Sent:* Tuesday, March 28, 2017 10:54:03 PM
>>> *To:* Kuehling, Felix; amd-gfx@lists.freedesktop.org; Koenig,
>>> Christian; Zhou,
>>> David(ChunMing)
>>> *Cc:* Deucher, Alexander; Russell, Kent
>>> *Subject:* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table
>>> bugs for
>>> large BOs
>>> On 03/29/2017 09:00 AM, Felix Kuehling wrote:
>>>> Fix the start/end address calculation for address ranges that span
>>>> multiple page directories in amdgpu_vm_alloc_levels.
>>>>
>>>> Add WARN_ONs if page tables aren't found. Otherwise the page table
>>>> update would just fail silently.
>>>>
>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++++---
>>>>   1 file changed, 10 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 818747f..44aa039 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -278,6 +278,8 @@ static int amdgpu_vm_alloc_levels(struct
>>>> amdgpu_device *adev,
>>>>        from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>>>        to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>>>
>>>> +     WARN_ON(from > to);
>>>> +
>>>>        if (to > parent->last_entry_used)
>>>>                parent->last_entry_used = to;
>>>>
>>>> @@ -312,8 +314,11 @@ static int amdgpu_vm_alloc_levels(struct
>>>> amdgpu_device *adev,
>>>>                }
>>>>
>>>>                if (level < adev->vm_manager.num_level) {
>>>> -                     r = amdgpu_vm_alloc_levels(adev, vm, entry,
>>>> saddr,
>>>> -                                                eaddr, level);
>>>> +                     uint64_t sub_saddr = (pt_idx == from) ? saddr
>>>> : 0;
>>>> +                     uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
>>>> +                             ((1 << shift) - 1);
>>>> +                     r = amdgpu_vm_alloc_levels(adev, vm, entry,
>>>> sub_saddr,
>>>> +                                                sub_eaddr, level);
>>>
>>> Do you mean to create a full sub-pt if pt_idx != from and != to?
>>> I didn't catch it up clear.
>>>
>>> In my perspective, we may need to clear the saddr and eaddr upper
>>> level bit
>>> before going on to allocate the next level PT.
>>> Otherwise, the number of next PT entry will be incorrect, more than
>>> num_entries
>>> as the upper level PT number calculated in.
>>>
>>> e.g.
>>> there is a address contains
>>> PD + PT1 + PT2 + PT3
>>>
>>> for the first round, we could get correct "from" and "to" address
>>> with right
>>> "shift"(3 blocks), then walk over the address space in PD range.
>>>
>>> Then for the 2nd round, we cannot get the expected "from" and "to"
>>> address with
>>> "shift"(2 blocks), as it walks over the address space in PD + PT1
>>> range.
>>> While we'd like it's in range PT1 indeed.
>>>
>>> The fault way goes on with later round.
>>>
>>> Perhaps, that's the root cause about the issue you face.
>>>
>>> Jerry.
>>>
>>>>                        if (r)
>>>>                                return r;
>>>>                }
>>>> @@ -957,9 +962,11 @@ static struct amdgpu_bo
>>>> *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
>>>>                entry = &entry->entries[idx];
>>>>        }
>>>>
>>>> -     if (level)
>>>> +     if (WARN_ON(level))
>>>>                return NULL;
>>>>
>>>> +     WARN_ON(!entry->bo);
>>>> +
>>>>        return entry->bo;
>>>>   }
>>>>
>>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found]                         ` <bdbacee3-2c61-ebcb-1421-5c0a77dc27e0-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29 14:46                           ` Michel Dänzer
       [not found]                             ` <a2d6c20e-d35f-e135-342b-47f86683c3e3-otUistvHUpPR7s880joybQ@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Michel Dänzer @ 2017-03-29 14:46 UTC (permalink / raw)
  To: Felix Kuehling, Christian König, Zhang, Jerry (Junwei),
	Koenig, Christian, Zhou, David(ChunMing)
  Cc: Deucher, Alexander, Russell, Kent,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 29/03/17 11:32 PM, Felix Kuehling wrote:
> On 17-03-29 02:52 AM, Christian König wrote:
>>
>> Please clean that up and also remove all those WARN_ON(), we don't
>> want to spam the system log with backtraces like this.
> 
> I disagree. When you see the backtraces from WARN_ONs in
> amdgpu_vm_get_pt, it means the driver is trying to update a page table
> that hasn't been allocated. Without those WARN_ONs there is absolutely
> no warning of any kind - amdgpu_vm_update_pte just fails silently (it's
> a void function).
> 
> Therefore I wouldn't call those warnings spam. Given that the VM PT
> allocation code for multi-levels is non-trivial, I think that's a good
> sanity check to have and really helpful to see in a log when user report
> random VM faults.

WARN_ON_ONCE might be a compromise. We'll still get a backtrace, but it
won't cause crazy log spam if the bad condition is consistently hit.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found]                             ` <a2d6c20e-d35f-e135-342b-47f86683c3e3-otUistvHUpPR7s880joybQ@public.gmane.org>
@ 2017-03-29 15:22                               ` Christian König
       [not found]                                 ` <d7279f4d-023e-a8fb-c08f-4e85fec87bd6-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Christian König @ 2017-03-29 15:22 UTC (permalink / raw)
  To: Michel Dänzer, Felix Kuehling, Christian König, Zhang,
	Jerry (Junwei), Zhou, David(ChunMing)
  Cc: Deucher, Alexander, Russell, Kent,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 29.03.2017 um 16:46 schrieb Michel Dänzer:
> On 29/03/17 11:32 PM, Felix Kuehling wrote:
>> On 17-03-29 02:52 AM, Christian König wrote:
>>> Please clean that up and also remove all those WARN_ON(), we don't
>>> want to spam the system log with backtraces like this.
>> I disagree. When you see the backtraces from WARN_ONs in
>> amdgpu_vm_get_pt, it means the driver is trying to update a page table
>> that hasn't been allocated. Without those WARN_ONs there is absolutely
>> no warning of any kind - amdgpu_vm_update_pte just fails silently (it's
>> a void function).
>>
>> Therefore I wouldn't call those warnings spam. Given that the VM PT
>> allocation code for multi-levels is non-trivial, I think that's a good
>> sanity check to have and really helpful to see in a log when user report
>> random VM faults.
> WARN_ON_ONCE might be a compromise. We'll still get a backtrace, but it
> won't cause crazy log spam if the bad condition is consistently hit.
Good idea. We should also only add one at the beginning of the function 
and not multiple catches for each dependent variable.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs
       [not found]                                 ` <d7279f4d-023e-a8fb-c08f-4e85fec87bd6-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29 15:42                                   ` Felix Kuehling
  0 siblings, 0 replies; 22+ messages in thread
From: Felix Kuehling @ 2017-03-29 15:42 UTC (permalink / raw)
  To: Christian König, Michel Dänzer, Christian König,
	Zhang, Jerry (Junwei), Zhou, David(ChunMing)
  Cc: Deucher, Alexander, Russell, Kent,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 17-03-29 11:22 AM, Christian König wrote:
> Am 29.03.2017 um 16:46 schrieb Michel Dänzer:
>> On 29/03/17 11:32 PM, Felix Kuehling wrote:
>>> On 17-03-29 02:52 AM, Christian König wrote:
>>>> Please clean that up and also remove all those WARN_ON(), we don't
>>>> want to spam the system log with backtraces like this.
>>> I disagree. When you see the backtraces from WARN_ONs in
>>> amdgpu_vm_get_pt, it means the driver is trying to update a page table
>>> that hasn't been allocated. Without those WARN_ONs there is absolutely
>>> no warning of any kind - amdgpu_vm_update_pte just fails silently (it's
>>> a void function).
>>>
>>> Therefore I wouldn't call those warnings spam. Given that the VM PT
>>> allocation code for multi-levels is non-trivial, I think that's a good
>>> sanity check to have and really helpful to see in a log when user
>>> report
>>> random VM faults.
>> WARN_ON_ONCE might be a compromise. We'll still get a backtrace, but it
>> won't cause crazy log spam if the bad condition is consistently hit.
> Good idea. We should also only add one at the beginning of the
> function and not multiple catches for each dependent variable.

I don't understand what you mean here. Are we still talking about
amdgpu_vm_update_pte? You'll have to walk the page table before you know
whether a PT BO actually exists. So I don't understand what check would
make sense at the start (before the page table walk).

I removed the WARN_ON from amdgpu_vm_alloc_levels and instead return an
error if "from" or "to" are out of range. I'll send out v2 of my patch
in a minute.

Regards,
  Felix

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2017-03-29 15:42 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-29  1:00 [PATCH 0/3] Fixes for multi-level page tables Felix Kuehling
     [not found] ` <1490749231-16157-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-03-29  1:00   ` [PATCH 1/3] drm/amdgpu: Make max_pfn 64-bit Felix Kuehling
     [not found]     ` <1490749231-16157-2-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-03-29  6:44       ` Christian König
2017-03-29  1:00   ` [PATCH 2/3] drm/amdgpu: Fix Vega10 VM initialization Felix Kuehling
     [not found]     ` <1490749231-16157-3-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-03-29  1:39       ` Zhang, Jerry (Junwei)
     [not found]         ` <58DB1061.7000403-5C7GfCeVMHo@public.gmane.org>
2017-03-29  1:48           ` Felix Kuehling
     [not found]             ` <0091f779-a045-7009-3746-6957ccacd62f-5C7GfCeVMHo@public.gmane.org>
2017-03-29  2:56               ` Zhang, Jerry (Junwei)
2017-03-29  6:47               ` Christian König
     [not found]                 ` <b0d2a78d-ab7d-f5b7-a35e-a9d6c80771a7-5C7GfCeVMHo@public.gmane.org>
2017-03-29  7:09                   ` Zhang, Jerry (Junwei)
2017-03-29  1:00   ` [PATCH 3/3] drm/amdgpu: Fix multi-level page table bugs for large BOs Felix Kuehling
     [not found]     ` <1490749231-16157-4-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-03-29  2:54       ` Zhang, Jerry (Junwei)
     [not found]         ` <58DB21CB.4090800-5C7GfCeVMHo@public.gmane.org>
2017-03-29  3:24           ` Kuehling, Felix
     [not found]             ` <DM5PR1201MB0235AB2363725D0D8320265292350-grEf7a3NxMBd8L2jMOIKKmrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-03-29  5:58               ` Zhang, Jerry (Junwei)
     [not found]                 ` <58DB4D02.7020903-5C7GfCeVMHo@public.gmane.org>
2017-03-29  6:52                   ` Christian König
     [not found]                     ` <8acdbd65-f99d-3e0e-b61b-318cf27d7a3b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-03-29  7:18                       ` Zhang, Jerry (Junwei)
2017-03-29 14:32                       ` Felix Kuehling
     [not found]                         ` <bdbacee3-2c61-ebcb-1421-5c0a77dc27e0-5C7GfCeVMHo@public.gmane.org>
2017-03-29 14:46                           ` Michel Dänzer
     [not found]                             ` <a2d6c20e-d35f-e135-342b-47f86683c3e3-otUistvHUpPR7s880joybQ@public.gmane.org>
2017-03-29 15:22                               ` Christian König
     [not found]                                 ` <d7279f4d-023e-a8fb-c08f-4e85fec87bd6-5C7GfCeVMHo@public.gmane.org>
2017-03-29 15:42                                   ` Felix Kuehling
2017-03-29  2:34   ` [PATCH 0/3] Fixes for multi-level page tables zhoucm1
2017-03-29  6:00   ` Zhang, Jerry (Junwei)
     [not found]     ` <58DB4D73.5000408-5C7GfCeVMHo@public.gmane.org>
2017-03-29  7:10       ` Zhang, Jerry (Junwei)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.