All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/amdgpu: rework shadow handling during PD clear v2
@ 2019-02-26 12:46 Christian König
       [not found] ` <20190226124658.25334-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2019-02-26 12:46 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This way we only deal with the real BO in here.

v2: use a do { ... } while loop instead

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 65 +++++++++++++++-----------
 1 file changed, 38 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 12d51d96491e..1613305610dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -788,39 +788,56 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	if (r)
-		goto error;
+		return r;
 
 	r = amdgpu_ttm_alloc_gart(&bo->tbo);
 	if (r)
 		return r;
 
+	if (bo->shadow) {
+		r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow->placement,
+				    &ctx);
+		if (r)
+			return r;
+
+		r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo);
+		if (r)
+			return r;
+
+	}
+
 	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 	if (r)
-		goto error;
+		return r;
 
-	addr = amdgpu_bo_gpu_offset(bo);
-	if (ats_entries) {
-		uint64_t ats_value;
+	do {
+		addr = amdgpu_bo_gpu_offset(bo);
+		if (ats_entries) {
+			uint64_t ats_value;
 
-		ats_value = AMDGPU_PTE_DEFAULT_ATC;
-		if (level != AMDGPU_VM_PTB)
-			ats_value |= AMDGPU_PDE_PTE;
+			ats_value = AMDGPU_PTE_DEFAULT_ATC;
+			if (level != AMDGPU_VM_PTB)
+				ats_value |= AMDGPU_PDE_PTE;
 
-		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
-				      ats_entries, 0, ats_value);
-		addr += ats_entries * 8;
-	}
+			amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+					      ats_entries, 0, ats_value);
+			addr += ats_entries * 8;
+		}
 
-	if (entries) {
-		uint64_t value = 0;
+		if (entries) {
+			uint64_t value = 0;
 
-		/* Workaround for fault priority problem on GMC9 */
-		if (level == AMDGPU_VM_PTB && adev->asic_type >= CHIP_VEGA10)
-			value = AMDGPU_PTE_EXECUTABLE;
+			/* Workaround for fault priority problem on GMC9 */
+			if (level == AMDGPU_VM_PTB &&
+			    adev->asic_type >= CHIP_VEGA10)
+				value = AMDGPU_PTE_EXECUTABLE;
 
-		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
-				      entries, 0, value);
-	}
+			amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+					      entries, 0, value);
+		}
+
+		bo = bo->shadow;
+	} while (bo);
 
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 
@@ -835,19 +852,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_bo_fence(bo, fence, true);
+	amdgpu_bo_fence(vm->root.base.bo, fence, true);
 	dma_fence_put(fence);
 
-	if (bo->shadow)
-		return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
-					  level, pte_support_ats);
-
 	return 0;
 
 error_free:
 	amdgpu_job_free(job);
-
-error:
 	return r;
 }
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/6] drm/amdgpu: let amdgpu_vm_clear_bo figure out ats status v2
       [not found] ` <20190226124658.25334-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-26 12:46   ` Christian König
       [not found]     ` <20190226124658.25334-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-02-26 12:46   ` [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand Christian König
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2019-02-26 12:46 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Instead of providing it from outside figure out the ats status in the
function itself from the data structures.

v2: simplify finding the right level

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 51 ++++++++++++++------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1613305610dd..362436f4e856 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -747,8 +747,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  * @adev: amdgpu_device pointer
  * @vm: VM to clear BO from
  * @bo: BO to clear
- * @level: level this BO is at
- * @pte_support_ats: indicate ATS support from PTE
  *
  * Root PD needs to be reserved when calling this.
  *
@@ -756,10 +754,11 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  * 0 on success, errno otherwise.
  */
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
-			      struct amdgpu_vm *vm, struct amdgpu_bo *bo,
-			      unsigned level, bool pte_support_ats)
+			      struct amdgpu_vm *vm,
+			      struct amdgpu_bo *bo)
 {
 	struct ttm_operation_ctx ctx = { true, false };
+	unsigned level = adev->vm_manager.root_level;
 	struct dma_fence *fence = NULL;
 	unsigned entries, ats_entries;
 	struct amdgpu_ring *ring;
@@ -768,17 +767,31 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	int r;
 
 	entries = amdgpu_bo_size(bo) / 8;
+	if (vm->pte_support_ats) {
+		ats_entries = amdgpu_vm_level_shift(adev, level);
+		ats_entries += AMDGPU_GPU_PAGE_SHIFT;
+		ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries;
 
-	if (pte_support_ats) {
-		if (level == adev->vm_manager.root_level) {
-			ats_entries = amdgpu_vm_level_shift(adev, level);
-			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
-			ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries;
+		if (!bo->parent) {
 			ats_entries = min(ats_entries, entries);
 			entries -= ats_entries;
 		} else {
-			ats_entries = entries;
-			entries = 0;
+			struct amdgpu_bo *ancestor = bo;
+			struct amdgpu_vm_pt *pt;
+
+			do {
+				++level;
+				ancestor = ancestor->parent;
+			} while (ancestor);
+
+			pt = container_of(ancestor->vm_bo, struct amdgpu_vm_pt,
+					  base);
+			if ((pt - vm->root.entries) >= ats_entries) {
+				ats_entries = 0;
+			} else {
+				ats_entries = entries;
+				entries = 0;
+			}
 		}
 	} else {
 		ats_entries = 0;
@@ -908,7 +921,6 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 {
 	struct amdgpu_vm_pt_cursor cursor;
 	struct amdgpu_bo *pt;
-	bool ats = false;
 	uint64_t eaddr;
 	int r;
 
@@ -918,9 +930,6 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 
 	eaddr = saddr + size - 1;
 
-	if (vm->pte_support_ats)
-		ats = saddr < AMDGPU_GMC_HOLE_START;
-
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
@@ -969,7 +978,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 
 		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 
-		r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats);
+		r = amdgpu_vm_clear_bo(adev, vm, pt);
 		if (r)
 			goto error_free_pt;
 	}
@@ -3044,9 +3053,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
 
-	r = amdgpu_vm_clear_bo(adev, vm, root,
-			       adev->vm_manager.root_level,
-			       vm->pte_support_ats);
+	r = amdgpu_vm_clear_bo(adev, vm, root);
 	if (r)
 		goto error_unreserve;
 
@@ -3141,9 +3148,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 	 * changing any other state, in case it fails.
 	 */
 	if (pte_support_ats != vm->pte_support_ats) {
-		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
-			       adev->vm_manager.root_level,
-			       pte_support_ats);
+		vm->pte_support_ats = pte_support_ats;
+		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo);
 		if (r)
 			goto free_idr;
 	}
@@ -3151,7 +3157,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 	/* Update VM state */
 	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
-	vm->pte_support_ats = pte_support_ats;
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
 	WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found] ` <20190226124658.25334-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-02-26 12:46   ` [PATCH 2/6] drm/amdgpu: let amdgpu_vm_clear_bo figure out ats status v2 Christian König
@ 2019-02-26 12:46   ` Christian König
       [not found]     ` <20190226124658.25334-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-02-26 12:46   ` [PATCH 4/6] drm/amdgpu: free " Christian König
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2019-02-26 12:46 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
them during mapping.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
 5 files changed, 39 insertions(+), 129 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 31e3953dcb6e..088e9b6b765b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 	if (p_bo_va_entry)
 		*p_bo_va_entry = bo_va_entry;
 
-	/* Allocate new page tables if needed and validate
-	 * them.
-	 */
-	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
-	if (ret) {
-		pr_err("Failed to allocate pts, err=%d\n", ret);
-		goto err_alloc_pts;
-	}
-
+	/* Allocate validate page tables if needed */
 	ret = vm_validate_pt_pd_bos(vm);
 	if (ret) {
 		pr_err("validate_pt_pd_bos() failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 7e22be7ca68a..54dd02a898b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		return -ENOMEM;
 	}
 
-	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
-				size);
-	if (r) {
-		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
-		amdgpu_vm_bo_rmv(adev, *bo_va);
-		ttm_eu_backoff_reservation(&ticket, &list);
-		return r;
-	}
-
 	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
 			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
 			     AMDGPU_PTE_EXECUTABLE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 555285e329ed..fcaaac30e84b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	switch (args->operation) {
 	case AMDGPU_VA_OP_MAP:
-		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
-					args->map_size);
-		if (r)
-			goto error_backoff;
-
 		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
 		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
 				     args->offset_in_bo, args->map_size,
@@ -645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 						args->map_size);
 		break;
 	case AMDGPU_VA_OP_REPLACE:
-		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
-					args->map_size);
-		if (r)
-			goto error_backoff;
-
 		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
 		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
 					     args->offset_in_bo, args->map_size,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 362436f4e856..dfad543fc000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
 	}
 }
 
-/**
- * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
- *
- * @adev: amdgpu_device pointer
- * @vm: amdgpu_vm structure
- * @start: start addr of the walk
- * @cursor: state to initialize
- *
- * Start a walk and go directly to the leaf node.
- */
-static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
-				    struct amdgpu_vm *vm, uint64_t start,
-				    struct amdgpu_vm_pt_cursor *cursor)
-{
-	amdgpu_vm_pt_start(adev, vm, start, cursor);
-	while (amdgpu_vm_pt_descendant(adev, cursor));
-}
-
-/**
- * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
- *
- * Walk the PD/PT tree to the next leaf node.
- */
-static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
-				   struct amdgpu_vm_pt_cursor *cursor)
-{
-	amdgpu_vm_pt_next(adev, cursor);
-	if (cursor->pfn != ~0ll)
-		while (amdgpu_vm_pt_descendant(adev, cursor));
-}
-
-/**
- * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
- */
-#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)		\
-	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));		\
-	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
-
 /**
  * amdgpu_vm_pt_first_dfs - start a deep first search
  *
@@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  * Returns:
  * 0 on success, errno otherwise.
  */
-int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
-			struct amdgpu_vm *vm,
-			uint64_t saddr, uint64_t size)
+static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm,
+			       struct amdgpu_vm_pt_cursor *cursor)
 {
-	struct amdgpu_vm_pt_cursor cursor;
+	struct amdgpu_vm_pt *entry = cursor->entry;
+	struct amdgpu_bo_param bp;
 	struct amdgpu_bo *pt;
-	uint64_t eaddr;
 	int r;
 
-	/* validate the parameters */
-	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
-		return -EINVAL;
+	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
+		unsigned num_entries;
 
-	eaddr = saddr + size - 1;
-
-	saddr /= AMDGPU_GPU_PAGE_SIZE;
-	eaddr /= AMDGPU_GPU_PAGE_SIZE;
-
-	if (eaddr >= adev->vm_manager.max_pfn) {
-		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
-			eaddr, adev->vm_manager.max_pfn);
-		return -EINVAL;
+		num_entries = amdgpu_vm_num_entries(adev, cursor->level);
+		entry->entries = kvmalloc_array(num_entries,
+						sizeof(*entry->entries),
+						GFP_KERNEL | __GFP_ZERO);
+		if (!entry->entries)
+			return -ENOMEM;
 	}
 
-	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
-		struct amdgpu_vm_pt *entry = cursor.entry;
-		struct amdgpu_bo_param bp;
-
-		if (cursor.level < AMDGPU_VM_PTB) {
-			unsigned num_entries;
-
-			num_entries = amdgpu_vm_num_entries(adev, cursor.level);
-			entry->entries = kvmalloc_array(num_entries,
-							sizeof(*entry->entries),
-							GFP_KERNEL |
-							__GFP_ZERO);
-			if (!entry->entries)
-				return -ENOMEM;
-		}
-
-
-		if (entry->base.bo)
-			continue;
-
-		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
-
-		r = amdgpu_bo_create(adev, &bp, &pt);
-		if (r)
-			return r;
-
-		if (vm->use_cpu_for_update) {
-			r = amdgpu_bo_kmap(pt, NULL);
-			if (r)
-				goto error_free_pt;
-		}
+	if (entry->base.bo)
+		return 0;
 
-		/* Keep a reference to the root directory to avoid
-		* freeing them up in the wrong order.
-		*/
-		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
+	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
 
-		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+	r = amdgpu_bo_create(adev, &bp, &pt);
+	if (r)
+		return r;
 
-		r = amdgpu_vm_clear_bo(adev, vm, pt);
+	if (vm->use_cpu_for_update) {
+		r = amdgpu_bo_kmap(pt, NULL);
 		if (r)
 			goto error_free_pt;
 	}
 
+	/* Keep a reference to the root directory to avoid
+	 * freeing them up in the wrong order.
+	 */
+	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
+	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+
+	r = amdgpu_vm_clear_bo(adev, vm, pt);
+	if (r)
+		goto error_free_pt;
+
 	return 0;
 
 error_free_pt:
@@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	struct amdgpu_vm_pt_cursor cursor;
 	uint64_t frag_start = start, frag_end;
 	unsigned int frag;
+	int r;
 
 	/* figure out the initial fragment */
 	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
@@ -1634,12 +1571,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	/* walk over the address space and update the PTs */
 	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
 	while (cursor.pfn < end) {
-		struct amdgpu_bo *pt = cursor.entry->base.bo;
 		unsigned shift, parent_shift, mask;
 		uint64_t incr, entry_end, pe_start;
+		struct amdgpu_bo *pt;
 
-		if (!pt)
-			return -ENOENT;
+		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
+		if (r)
+			return r;
+
+		pt = cursor.entry->base.bo;
 
 		/* The root level can't be a huge page */
 		if (cursor.level == adev->vm_manager.root_level) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 81ff8177f092..116605c038d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
-int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
-			struct amdgpu_vm *vm,
-			uint64_t saddr, uint64_t size);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 4/6] drm/amdgpu: free PDs/PTs on demand
       [not found] ` <20190226124658.25334-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-02-26 12:46   ` [PATCH 2/6] drm/amdgpu: let amdgpu_vm_clear_bo figure out ats status v2 Christian König
  2019-02-26 12:46   ` [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand Christian König
@ 2019-02-26 12:46   ` Christian König
  2019-02-26 12:46   ` [PATCH 5/6] drm/amdgpu: drop the huge page flag Christian König
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 26+ messages in thread
From: Christian König @ 2019-02-26 12:46 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

When something is unmapped we now free the affected PDs/PTs again.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 71 +++++++++++++++++++-------
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dfad543fc000..dad6dc31496a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -515,12 +515,31 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
  */
 static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
 				   struct amdgpu_vm *vm,
+				   struct amdgpu_vm_pt_cursor *start,
 				   struct amdgpu_vm_pt_cursor *cursor)
 {
-	amdgpu_vm_pt_start(adev, vm, 0, cursor);
+	if (start)
+		*cursor = *start;
+	else
+		amdgpu_vm_pt_start(adev, vm, 0, cursor);
 	while (amdgpu_vm_pt_descendant(adev, cursor));
 }
 
+/**
+ * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
+ *
+ * @start: starting point for the search
+ * @entry: current entry
+ *
+ * Returns:
+ * True when the search should continue, false otherwise.
+ */
+static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
+				      struct amdgpu_vm_pt *entry)
+{
+	return entry && (!start || entry != start->entry);
+}
+
 /**
  * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
  *
@@ -546,11 +565,11 @@ static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
 /**
  * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
  */
-#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry)			\
-	for (amdgpu_vm_pt_first_dfs((adev), (vm), &(cursor)),			\
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)		\
+	for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)),		\
 	     (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
-	     (entry); (entry) = (cursor).entry,					\
-	     amdgpu_vm_pt_next_dfs((adev), &(cursor)))
+	     amdgpu_vm_pt_continue_dfs((start), (entry));			\
+	     (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
 
 /**
  * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
@@ -927,32 +946,46 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	return r;
 }
 
+/**
+ * amdgpu_vm_free_table - fre one PD/PT
+ *
+ * @entry: PDE to free
+ */
+static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
+{
+	if (entry->base.bo) {
+		entry->base.bo->vm_bo = NULL;
+		list_del(&entry->base.vm_status);
+		amdgpu_bo_unref(&entry->base.bo->shadow);
+		amdgpu_bo_unref(&entry->base.bo);
+	}
+	kvfree(entry->entries);
+	entry->entries = NULL;
+}
+
 /**
  * amdgpu_vm_free_pts - free PD/PT levels
  *
  * @adev: amdgpu device structure
  * @vm: amdgpu vm structure
+ * @start: optional cursor where to start freeing PDs/PTs
  *
  * Free the page directory or page table level and all sub levels.
  */
 static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
-			       struct amdgpu_vm *vm)
+			       struct amdgpu_vm *vm,
+			       struct amdgpu_vm_pt_cursor *start)
 {
 	struct amdgpu_vm_pt_cursor cursor;
 	struct amdgpu_vm_pt *entry;
 
-	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) {
+	vm->bulk_moveable = false;
 
-		if (entry->base.bo) {
-			entry->base.bo->vm_bo = NULL;
-			list_del(&entry->base.vm_status);
-			amdgpu_bo_unref(&entry->base.bo->shadow);
-			amdgpu_bo_unref(&entry->base.bo);
-		}
-		kvfree(entry->entries);
-	}
+	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
+		amdgpu_vm_free_table(entry);
 
-	BUG_ON(vm->root.base.bo);
+	if (start)
+		amdgpu_vm_free_table(start->entry);
 }
 
 /**
@@ -1348,7 +1381,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
 	struct amdgpu_vm_pt_cursor cursor;
 	struct amdgpu_vm_pt *entry;
 
-	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry)
+	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry)
 		if (entry->base.bo && !entry->base.moved)
 			amdgpu_vm_bo_relocated(&entry->base);
 }
@@ -1656,6 +1689,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			/* Mark all child entries as huge */
 			while (cursor.pfn < frag_start) {
 				cursor.entry->huge = true;
+				amdgpu_vm_free_pts(adev, params->vm, &cursor);
 				amdgpu_vm_pt_next(adev, &cursor);
 			}
 
@@ -3219,10 +3253,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (r) {
 		dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
 	} else {
-		amdgpu_vm_free_pts(adev, vm);
+		amdgpu_vm_free_pts(adev, vm, NULL);
 		amdgpu_bo_unreserve(root);
 	}
 	amdgpu_bo_unref(&root);
+	WARN_ON(vm->root.base.bo);
 	dma_fence_put(vm->last_update);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		amdgpu_vmid_free_reserved(adev, vm, i);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 5/6] drm/amdgpu: drop the huge page flag
       [not found] ` <20190226124658.25334-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2019-02-26 12:46   ` [PATCH 4/6] drm/amdgpu: free " Christian König
@ 2019-02-26 12:46   ` Christian König
  2019-02-26 12:46   ` [PATCH 6/6] drm/amdgpu: allow huge invalid mappings on GMC8 Christian König
  2019-02-27 11:49   ` [PATCH 1/6] drm/amdgpu: rework shadow handling during PD clear v2 Huang, Ray
  5 siblings, 0 replies; 26+ messages in thread
From: Christian König @ 2019-02-26 12:46 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Not needed any more since we now free PDs/PTs on demand.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 +-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 -
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dad6dc31496a..73e2f8e8a604 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1354,10 +1354,6 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 	uint64_t pde, pt, flags;
 	unsigned level;
 
-	/* Don't update huge pages here */
-	if (entry->huge)
-		return;
-
 	for (level = 0, pbo = bo->parent; pbo; ++level)
 		pbo = pbo->parent;
 
@@ -1621,13 +1617,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			continue;
 		}
 
-		/* If it isn't already handled it can't be a huge page */
-		if (cursor.entry->huge) {
-			/* Add the entry to the relocated list to update it. */
-			cursor.entry->huge = false;
-			amdgpu_vm_bo_relocated(&cursor.entry->base);
-		}
-
 		shift = amdgpu_vm_level_shift(adev, cursor.level);
 		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
 		if (adev->asic_type < CHIP_VEGA10) {
@@ -1686,9 +1675,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		} while (frag_start < entry_end);
 
 		if (amdgpu_vm_pt_descendant(adev, &cursor)) {
-			/* Mark all child entries as huge */
+			/* Free all child entries */
 			while (cursor.pfn < frag_start) {
-				cursor.entry->huge = true;
 				amdgpu_vm_free_pts(adev, params->vm, &cursor);
 				amdgpu_vm_pt_next(adev, &cursor);
 			}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 116605c038d2..3c6537ef659c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -140,7 +140,6 @@ struct amdgpu_vm_bo_base {
 
 struct amdgpu_vm_pt {
 	struct amdgpu_vm_bo_base	base;
-	bool				huge;
 
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt		*entries;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 6/6] drm/amdgpu: allow huge invalid mappings on GMC8
       [not found] ` <20190226124658.25334-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2019-02-26 12:46   ` [PATCH 5/6] drm/amdgpu: drop the huge page flag Christian König
@ 2019-02-26 12:46   ` Christian König
  2019-02-27 11:49   ` [PATCH 1/6] drm/amdgpu: rework shadow handling during PD clear v2 Huang, Ray
  5 siblings, 0 replies; 26+ messages in thread
From: Christian König @ 2019-02-26 12:46 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Only GMC9 supports true huge pages, but we can still free invalid mappings
on GMC8.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 73e2f8e8a604..f89b29cfb3e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1619,7 +1619,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 
 		shift = amdgpu_vm_level_shift(adev, cursor.level);
 		parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
-		if (adev->asic_type < CHIP_VEGA10) {
+		if (adev->asic_type < CHIP_VEGA10 &&
+		    (flags & AMDGPU_PTE_VALID)) {
 			/* No huge page support before GMC v9 */
 			if (cursor.level != AMDGPU_VM_PTB) {
 				if (!amdgpu_vm_pt_descendant(adev, &cursor))
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* RE: [PATCH 1/6] drm/amdgpu: rework shadow handling during PD clear v2
       [not found] ` <20190226124658.25334-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2019-02-26 12:46   ` [PATCH 6/6] drm/amdgpu: allow huge invalid mappings on GMC8 Christian König
@ 2019-02-27 11:49   ` Huang, Ray
  5 siblings, 0 replies; 26+ messages in thread
From: Huang, Ray @ 2019-02-27 11:49 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Christian K?nig
> Sent: Tuesday, February 26, 2019 8:47 PM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH 1/6] drm/amdgpu: rework shadow handling during PD clear
> v2
> 
> This way we only deal with the real BO in here.
> 
> v2: use a do { ... } while loop instead
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Acked-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 65 +++++++++++++++------
> -----
>  1 file changed, 38 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 12d51d96491e..1613305610dd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -788,39 +788,56 @@ static int amdgpu_vm_clear_bo(struct
> amdgpu_device *adev,
> 
>  	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>  	if (r)
> -		goto error;
> +		return r;
> 
>  	r = amdgpu_ttm_alloc_gart(&bo->tbo);
>  	if (r)
>  		return r;
> 
> +	if (bo->shadow) {
> +		r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow-
> >placement,
> +				    &ctx);
> +		if (r)
> +			return r;
> +
> +		r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo);
> +		if (r)
> +			return r;
> +
> +	}
> +
>  	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
>  	if (r)
> -		goto error;
> +		return r;
> 
> -	addr = amdgpu_bo_gpu_offset(bo);
> -	if (ats_entries) {
> -		uint64_t ats_value;
> +	do {
> +		addr = amdgpu_bo_gpu_offset(bo);
> +		if (ats_entries) {
> +			uint64_t ats_value;
> 
> -		ats_value = AMDGPU_PTE_DEFAULT_ATC;
> -		if (level != AMDGPU_VM_PTB)
> -			ats_value |= AMDGPU_PDE_PTE;
> +			ats_value = AMDGPU_PTE_DEFAULT_ATC;
> +			if (level != AMDGPU_VM_PTB)
> +				ats_value |= AMDGPU_PDE_PTE;
> 
> -		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
> -				      ats_entries, 0, ats_value);
> -		addr += ats_entries * 8;
> -	}
> +			amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr,
> 0,
> +					      ats_entries, 0, ats_value);
> +			addr += ats_entries * 8;
> +		}
> 
> -	if (entries) {
> -		uint64_t value = 0;
> +		if (entries) {
> +			uint64_t value = 0;
> 
> -		/* Workaround for fault priority problem on GMC9 */
> -		if (level == AMDGPU_VM_PTB && adev->asic_type >=
> CHIP_VEGA10)
> -			value = AMDGPU_PTE_EXECUTABLE;
> +			/* Workaround for fault priority problem on GMC9 */
> +			if (level == AMDGPU_VM_PTB &&
> +			    adev->asic_type >= CHIP_VEGA10)
> +				value = AMDGPU_PTE_EXECUTABLE;
> 
> -		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
> -				      entries, 0, value);
> -	}
> +			amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr,
> 0,
> +					      entries, 0, value);
> +		}
> +
> +		bo = bo->shadow;
> +	} while (bo);
> 
>  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> 
> @@ -835,19 +852,13 @@ static int amdgpu_vm_clear_bo(struct
> amdgpu_device *adev,
>  	if (r)
>  		goto error_free;
> 
> -	amdgpu_bo_fence(bo, fence, true);
> +	amdgpu_bo_fence(vm->root.base.bo, fence, true);
>  	dma_fence_put(fence);
> 
> -	if (bo->shadow)
> -		return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
> -					  level, pte_support_ats);
> -
>  	return 0;
> 
>  error_free:
>  	amdgpu_job_free(job);
> -
> -error:
>  	return r;
>  }
> 
> --
> 2.17.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 2/6] drm/amdgpu: let amdgpu_vm_clear_bo figure out ats status v2
       [not found]     ` <20190226124658.25334-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-27 19:25       ` Zeng, Oak
  0 siblings, 0 replies; 26+ messages in thread
From: Zeng, Oak @ 2019-02-27 19:25 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Making some codes into functions would help people to understand. At least for me the codes is not very easy to understand. See inline comments. But those suggested new functions are only used in this clear_bo function, so it is your call to do it or not. Anyway this patch is Reviewed-by: Oak Zeng <Oak.Zeng@amd.com>

Regards,
Oak

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Christian König
Sent: Tuesday, February 26, 2019 7:47 AM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 2/6] drm/amdgpu: let amdgpu_vm_clear_bo figure out ats status v2

Instead of providing it from outside figure out the ats status in the function itself from the data structures.

v2: simplify finding the right level

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 51 ++++++++++++++------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1613305610dd..362436f4e856 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -747,8 +747,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  * @adev: amdgpu_device pointer
  * @vm: VM to clear BO from
  * @bo: BO to clear
- * @level: level this BO is at
- * @pte_support_ats: indicate ATS support from PTE
  *
  * Root PD needs to be reserved when calling this.
  *
@@ -756,10 +754,11 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  * 0 on success, errno otherwise.
  */
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
-			      struct amdgpu_vm *vm, struct amdgpu_bo *bo,
-			      unsigned level, bool pte_support_ats)
+			      struct amdgpu_vm *vm,
+			      struct amdgpu_bo *bo)
 {
 	struct ttm_operation_ctx ctx = { true, false };
+	unsigned level = adev->vm_manager.root_level;
 	struct dma_fence *fence = NULL;
 	unsigned entries, ats_entries;
 	struct amdgpu_ring *ring;
@@ -768,17 +767,31 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	int r;
 
 	entries = amdgpu_bo_size(bo) / 8;
+	if (vm->pte_support_ats) {
+		ats_entries = amdgpu_vm_level_shift(adev, level);
+		ats_entries += AMDGPU_GPU_PAGE_SHIFT;
+		ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries;
 [Oak] This can be make a function or macro: GET_MAX_ATS_ENTRIES(level)
-	if (pte_support_ats) {
-		if (level == adev->vm_manager.root_level) {
-			ats_entries = amdgpu_vm_level_shift(adev, level);
-			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
-			ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries;
+		if (!bo->parent) {
 			ats_entries = min(ats_entries, entries);
 			entries -= ats_entries;
 		} else {
-			ats_entries = entries;
-			entries = 0;
+			struct amdgpu_bo *ancestor = bo;
+			struct amdgpu_vm_pt *pt;
+
+			do {
+				++level;
+				ancestor = ancestor->parent;
+			} while (ancestor);
+
+			pt = container_of(ancestor->vm_bo, struct amdgpu_vm_pt,
+					  base);
[Oak]: above 10 lines can be make a function: amdgpu_vm_get_bo_level_and_top_level_pt(struct amdgpu_bo * bo, unsigned *level, amdgpu_vm **pt)
+			if ((pt - vm->root.entries) >= ats_entries) {
+				ats_entries = 0;
+			} else {
+				ats_entries = entries;
+				entries = 0;
+			}
 		}
 	} else {
 		ats_entries = 0;
@@ -908,7 +921,6 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,  {
 	struct amdgpu_vm_pt_cursor cursor;
 	struct amdgpu_bo *pt;
-	bool ats = false;
 	uint64_t eaddr;
 	int r;
 
@@ -918,9 +930,6 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 
 	eaddr = saddr + size - 1;
 
-	if (vm->pte_support_ats)
-		ats = saddr < AMDGPU_GMC_HOLE_START;
-
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
@@ -969,7 +978,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 
 		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 
-		r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats);
+		r = amdgpu_vm_clear_bo(adev, vm, pt);
 		if (r)
 			goto error_free_pt;
 	}
@@ -3044,9 +3053,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
 
-	r = amdgpu_vm_clear_bo(adev, vm, root,
-			       adev->vm_manager.root_level,
-			       vm->pte_support_ats);
+	r = amdgpu_vm_clear_bo(adev, vm, root);
 	if (r)
 		goto error_unreserve;
 
@@ -3141,9 +3148,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 	 * changing any other state, in case it fails.
 	 */
 	if (pte_support_ats != vm->pte_support_ats) {
-		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
-			       adev->vm_manager.root_level,
-			       pte_support_ats);
+		vm->pte_support_ats = pte_support_ats;
+		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo);
 		if (r)
 			goto free_idr;
 	}
@@ -3151,7 +3157,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 	/* Update VM state */
 	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
 				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
-	vm->pte_support_ats = pte_support_ats;
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
 	WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
--
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]     ` <20190226124658.25334-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-08 14:04       ` Russell, Kent
       [not found]         ` <BN6PR12MB1618B9E60AC6E9B323042F1D854D0-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Russell, Kent @ 2019-03-08 14:04 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi Christian,

This patch ended up causing a VM Fault in KFDTest. Reverting just this patch addressed the issue:
[   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146 0x0000480c for process  pid 0 thread  pid 0
[   82.703512] amdgpu 0000:0c:00.0:   VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
[   82.703516] amdgpu 0000:0c:00.0:   VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
[   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid 32769) at page 4096, read from 'TC0' (0x54433000) (72)
[   82.703585] Evicting PASID 32769 queues

I am looking into it, but if you have any insight that would be great in helping to resolve it quickly.

 Kent
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Christian König
> Sent: Tuesday, February 26, 2019 7:47 AM
> To: amd-gfx@lists.freedesktop.org
> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> Let's start to allocate VM PDs/PTs on demand instead of pre-allocating them
> during mapping.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>  5 files changed, 39 insertions(+), 129 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 31e3953dcb6e..088e9b6b765b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
> *adev, struct kgd_mem *mem,
>  	if (p_bo_va_entry)
>  		*p_bo_va_entry = bo_va_entry;
> 
> -	/* Allocate new page tables if needed and validate
> -	 * them.
> -	 */
> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
> -	if (ret) {
> -		pr_err("Failed to allocate pts, err=%d\n", ret);
> -		goto err_alloc_pts;
> -	}
> -
> +	/* Allocate validate page tables if needed */
>  	ret = vm_validate_pt_pd_bos(vm);
>  	if (ret) {
>  		pr_err("validate_pt_pd_bos() failed\n"); diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> index 7e22be7ca68a..54dd02a898b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
>  		return -ENOMEM;
>  	}
> 
> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
> -				size);
> -	if (r) {
> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n",
> r);
> -		amdgpu_vm_bo_rmv(adev, *bo_va);
> -		ttm_eu_backoff_reservation(&ticket, &list);
> -		return r;
> -	}
> -
>  	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>  			     AMDGPU_PTE_READABLE |
> AMDGPU_PTE_WRITEABLE |
>  			     AMDGPU_PTE_EXECUTABLE);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 555285e329ed..fcaaac30e84b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> void *data,
> 
>  	switch (args->operation) {
>  	case AMDGPU_VA_OP_MAP:
> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >va_address,
> -					args->map_size);
> -		if (r)
> -			goto error_backoff;
> -
>  		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>  		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>  				     args->offset_in_bo, args->map_size, @@ -
> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
> *data,
>  						args->map_size);
>  		break;
>  	case AMDGPU_VA_OP_REPLACE:
> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >va_address,
> -					args->map_size);
> -		if (r)
> -			goto error_backoff;
> -
>  		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>  		r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
> >va_address,
>  					     args->offset_in_bo, args-
> >map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 362436f4e856..dfad543fc000 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
> amdgpu_device *adev,
>  	}
>  }
> 
> -/**
> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
> - *
> - * @adev: amdgpu_device pointer
> - * @vm: amdgpu_vm structure
> - * @start: start addr of the walk
> - * @cursor: state to initialize
> - *
> - * Start a walk and go directly to the leaf node.
> - */
> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
> -				    struct amdgpu_vm *vm, uint64_t start,
> -				    struct amdgpu_vm_pt_cursor *cursor)
> -{
> -	amdgpu_vm_pt_start(adev, vm, start, cursor);
> -	while (amdgpu_vm_pt_descendant(adev, cursor));
> -}
> -
> -/**
> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
> - *
> - * @adev: amdgpu_device pointer
> - * @cursor: current state
> - *
> - * Walk the PD/PT tree to the next leaf node.
> - */
> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
> -				   struct amdgpu_vm_pt_cursor *cursor)
> -{
> -	amdgpu_vm_pt_next(adev, cursor);
> -	if (cursor->pfn != ~0ll)
> -		while (amdgpu_vm_pt_descendant(adev, cursor));
> -}
> -
> -/**
> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
> hierarchy
> - */
> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
> 	\
> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));
> 		\
> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
> &(cursor)))
> -
>  /**
>   * amdgpu_vm_pt_first_dfs - start a deep first search
>   *
> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
> amdgpu_device *adev, struct amdgpu_vm *vm,
>   * Returns:
>   * 0 on success, errno otherwise.
>   */
> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> -			struct amdgpu_vm *vm,
> -			uint64_t saddr, uint64_t size)
> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> +			       struct amdgpu_vm *vm,
> +			       struct amdgpu_vm_pt_cursor *cursor)
>  {
> -	struct amdgpu_vm_pt_cursor cursor;
> +	struct amdgpu_vm_pt *entry = cursor->entry;
> +	struct amdgpu_bo_param bp;
>  	struct amdgpu_bo *pt;
> -	uint64_t eaddr;
>  	int r;
> 
> -	/* validate the parameters */
> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size &
> AMDGPU_GPU_PAGE_MASK)
> -		return -EINVAL;
> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
> +		unsigned num_entries;
> 
> -	eaddr = saddr + size - 1;
> -
> -	saddr /= AMDGPU_GPU_PAGE_SIZE;
> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;
> -
> -	if (eaddr >= adev->vm_manager.max_pfn) {
> -		dev_err(adev->dev, "va above limit (0x%08llX >=
> 0x%08llX)\n",
> -			eaddr, adev->vm_manager.max_pfn);
> -		return -EINVAL;
> +		num_entries = amdgpu_vm_num_entries(adev, cursor-
> >level);
> +		entry->entries = kvmalloc_array(num_entries,
> +						sizeof(*entry->entries),
> +						GFP_KERNEL | __GFP_ZERO);
> +		if (!entry->entries)
> +			return -ENOMEM;
>  	}
> 
> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
> -		struct amdgpu_vm_pt *entry = cursor.entry;
> -		struct amdgpu_bo_param bp;
> -
> -		if (cursor.level < AMDGPU_VM_PTB) {
> -			unsigned num_entries;
> -
> -			num_entries = amdgpu_vm_num_entries(adev,
> cursor.level);
> -			entry->entries = kvmalloc_array(num_entries,
> -							sizeof(*entry-
> >entries),
> -							GFP_KERNEL |
> -							__GFP_ZERO);
> -			if (!entry->entries)
> -				return -ENOMEM;
> -		}
> -
> -
> -		if (entry->base.bo)
> -			continue;
> -
> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
> -
> -		r = amdgpu_bo_create(adev, &bp, &pt);
> -		if (r)
> -			return r;
> -
> -		if (vm->use_cpu_for_update) {
> -			r = amdgpu_bo_kmap(pt, NULL);
> -			if (r)
> -				goto error_free_pt;
> -		}
> +	if (entry->base.bo)
> +		return 0;
> 
> -		/* Keep a reference to the root directory to avoid
> -		* freeing them up in the wrong order.
> -		*/
> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
> 
> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> +	r = amdgpu_bo_create(adev, &bp, &pt);
> +	if (r)
> +		return r;
> 
> -		r = amdgpu_vm_clear_bo(adev, vm, pt);
> +	if (vm->use_cpu_for_update) {
> +		r = amdgpu_bo_kmap(pt, NULL);
>  		if (r)
>  			goto error_free_pt;
>  	}
> 
> +	/* Keep a reference to the root directory to avoid
> +	 * freeing them up in the wrong order.
> +	 */
> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> +
> +	r = amdgpu_vm_clear_bo(adev, vm, pt);
> +	if (r)
> +		goto error_free_pt;
> +
>  	return 0;
> 
>  error_free_pt:
> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
> amdgpu_pte_update_params *params,
>  	struct amdgpu_vm_pt_cursor cursor;
>  	uint64_t frag_start = start, frag_end;
>  	unsigned int frag;
> +	int r;
> 
>  	/* figure out the initial fragment */
>  	amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
> &frag_end); @@ -1634,12 +1571,15 @@ static int
> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>  	/* walk over the address space and update the PTs */
>  	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>  	while (cursor.pfn < end) {
> -		struct amdgpu_bo *pt = cursor.entry->base.bo;
>  		unsigned shift, parent_shift, mask;
>  		uint64_t incr, entry_end, pe_start;
> +		struct amdgpu_bo *pt;
> 
> -		if (!pt)
> -			return -ENOENT;
> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm,
> &cursor);
> +		if (r)
> +			return r;
> +
> +		pt = cursor.entry->base.bo;
> 
>  		/* The root level can't be a huge page */
>  		if (cursor.level == adev->vm_manager.root_level) { diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 81ff8177f092..116605c038d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
> int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
> amdgpu_vm *vm,
>  			      int (*callback)(void *p, struct amdgpu_bo *bo),
>  			      void *param);
> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> -			struct amdgpu_vm *vm,
> -			uint64_t saddr, uint64_t size);
>  int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
> bool need_pipe_sync);  int amdgpu_vm_update_directories(struct
> amdgpu_device *adev,
>  				 struct amdgpu_vm *vm);
> --
> 2.17.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]         ` <BN6PR12MB1618B9E60AC6E9B323042F1D854D0-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-03-08 14:14           ` Christian König
       [not found]             ` <2bd27a3b-9f96-b2e4-5070-3413a14e9c7f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2019-03-08 14:14 UTC (permalink / raw)
  To: Russell, Kent, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

My best guess is that we forget somewhere to update the PDs. What 
hardware is that on?

Felix already mentioned that this could be problematic for the KFD.

Maybe he has an idea,
Christian.

Am 08.03.19 um 15:04 schrieb Russell, Kent:
> Hi Christian,
>
> This patch ended up causing a VM Fault in KFDTest. Reverting just this patch addressed the issue:
> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146 0x0000480c for process  pid 0 thread  pid 0
> [   82.703512] amdgpu 0000:0c:00.0:   VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
> [   82.703516] amdgpu 0000:0c:00.0:   VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid 32769) at page 4096, read from 'TC0' (0x54433000) (72)
> [   82.703585] Evicting PASID 32769 queues
>
> I am looking into it, but if you have any insight that would be great in helping to resolve it quickly.
>
>   Kent
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>> Christian König
>> Sent: Tuesday, February 26, 2019 7:47 AM
>> To: amd-gfx@lists.freedesktop.org
>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Let's start to allocate VM PDs/PTs on demand instead of pre-allocating them
>> during mapping.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>   5 files changed, 39 insertions(+), 129 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 31e3953dcb6e..088e9b6b765b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
>> *adev, struct kgd_mem *mem,
>>   	if (p_bo_va_entry)
>>   		*p_bo_va_entry = bo_va_entry;
>>
>> -	/* Allocate new page tables if needed and validate
>> -	 * them.
>> -	 */
>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>> -	if (ret) {
>> -		pr_err("Failed to allocate pts, err=%d\n", ret);
>> -		goto err_alloc_pts;
>> -	}
>> -
>> +	/* Allocate validate page tables if needed */
>>   	ret = vm_validate_pt_pd_bos(vm);
>>   	if (ret) {
>>   		pr_err("validate_pt_pd_bos() failed\n"); diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> index 7e22be7ca68a..54dd02a898b9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device
>> *adev, struct amdgpu_vm *vm,
>>   		return -ENOMEM;
>>   	}
>>
>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>> -				size);
>> -	if (r) {
>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n",
>> r);
>> -		amdgpu_vm_bo_rmv(adev, *bo_va);
>> -		ttm_eu_backoff_reservation(&ticket, &list);
>> -		return r;
>> -	}
>> -
>>   	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>   			     AMDGPU_PTE_READABLE |
>> AMDGPU_PTE_WRITEABLE |
>>   			     AMDGPU_PTE_EXECUTABLE);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 555285e329ed..fcaaac30e84b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>> void *data,
>>
>>   	switch (args->operation) {
>>   	case AMDGPU_VA_OP_MAP:
>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>> va_address,
>> -					args->map_size);
>> -		if (r)
>> -			goto error_backoff;
>> -
>>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>   		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>   				     args->offset_in_bo, args->map_size, @@ -
>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void
>> *data,
>>   						args->map_size);
>>   		break;
>>   	case AMDGPU_VA_OP_REPLACE:
>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>> va_address,
>> -					args->map_size);
>> -		if (r)
>> -			goto error_backoff;
>> -
>>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>   		r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>> va_address,
>>   					     args->offset_in_bo, args-
>>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 362436f4e856..dfad543fc000 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>> amdgpu_device *adev,
>>   	}
>>   }
>>
>> -/**
>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>> - *
>> - * @adev: amdgpu_device pointer
>> - * @vm: amdgpu_vm structure
>> - * @start: start addr of the walk
>> - * @cursor: state to initialize
>> - *
>> - * Start a walk and go directly to the leaf node.
>> - */
>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>> -				    struct amdgpu_vm *vm, uint64_t start,
>> -				    struct amdgpu_vm_pt_cursor *cursor)
>> -{
>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);
>> -	while (amdgpu_vm_pt_descendant(adev, cursor));
>> -}
>> -
>> -/**
>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>> - *
>> - * @adev: amdgpu_device pointer
>> - * @cursor: current state
>> - *
>> - * Walk the PD/PT tree to the next leaf node.
>> - */
>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>> -				   struct amdgpu_vm_pt_cursor *cursor)
>> -{
>> -	amdgpu_vm_pt_next(adev, cursor);
>> -	if (cursor->pfn != ~0ll)
>> -		while (amdgpu_vm_pt_descendant(adev, cursor));
>> -}
>> -
>> -/**
>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
>> hierarchy
>> - */
>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
>> 	\
>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));
>> 		\
>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>> &(cursor)))
>> -
>>   /**
>>    * amdgpu_vm_pt_first_dfs - start a deep first search
>>    *
>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>    * Returns:
>>    * 0 on success, errno otherwise.
>>    */
>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> -			struct amdgpu_vm *vm,
>> -			uint64_t saddr, uint64_t size)
>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> +			       struct amdgpu_vm *vm,
>> +			       struct amdgpu_vm_pt_cursor *cursor)
>>   {
>> -	struct amdgpu_vm_pt_cursor cursor;
>> +	struct amdgpu_vm_pt *entry = cursor->entry;
>> +	struct amdgpu_bo_param bp;
>>   	struct amdgpu_bo *pt;
>> -	uint64_t eaddr;
>>   	int r;
>>
>> -	/* validate the parameters */
>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>> AMDGPU_GPU_PAGE_MASK)
>> -		return -EINVAL;
>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>> +		unsigned num_entries;
>>
>> -	eaddr = saddr + size - 1;
>> -
>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;
>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;
>> -
>> -	if (eaddr >= adev->vm_manager.max_pfn) {
>> -		dev_err(adev->dev, "va above limit (0x%08llX >=
>> 0x%08llX)\n",
>> -			eaddr, adev->vm_manager.max_pfn);
>> -		return -EINVAL;
>> +		num_entries = amdgpu_vm_num_entries(adev, cursor-
>>> level);
>> +		entry->entries = kvmalloc_array(num_entries,
>> +						sizeof(*entry->entries),
>> +						GFP_KERNEL | __GFP_ZERO);
>> +		if (!entry->entries)
>> +			return -ENOMEM;
>>   	}
>>
>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>> -		struct amdgpu_vm_pt *entry = cursor.entry;
>> -		struct amdgpu_bo_param bp;
>> -
>> -		if (cursor.level < AMDGPU_VM_PTB) {
>> -			unsigned num_entries;
>> -
>> -			num_entries = amdgpu_vm_num_entries(adev,
>> cursor.level);
>> -			entry->entries = kvmalloc_array(num_entries,
>> -							sizeof(*entry-
>>> entries),
>> -							GFP_KERNEL |
>> -							__GFP_ZERO);
>> -			if (!entry->entries)
>> -				return -ENOMEM;
>> -		}
>> -
>> -
>> -		if (entry->base.bo)
>> -			continue;
>> -
>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>> -
>> -		r = amdgpu_bo_create(adev, &bp, &pt);
>> -		if (r)
>> -			return r;
>> -
>> -		if (vm->use_cpu_for_update) {
>> -			r = amdgpu_bo_kmap(pt, NULL);
>> -			if (r)
>> -				goto error_free_pt;
>> -		}
>> +	if (entry->base.bo)
>> +		return 0;
>>
>> -		/* Keep a reference to the root directory to avoid
>> -		* freeing them up in the wrong order.
>> -		*/
>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>
>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>> +	r = amdgpu_bo_create(adev, &bp, &pt);
>> +	if (r)
>> +		return r;
>>
>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);
>> +	if (vm->use_cpu_for_update) {
>> +		r = amdgpu_bo_kmap(pt, NULL);
>>   		if (r)
>>   			goto error_free_pt;
>>   	}
>>
>> +	/* Keep a reference to the root directory to avoid
>> +	 * freeing them up in the wrong order.
>> +	 */
>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>> +
>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);
>> +	if (r)
>> +		goto error_free_pt;
>> +
>>   	return 0;
>>
>>   error_free_pt:
>> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
>> amdgpu_pte_update_params *params,
>>   	struct amdgpu_vm_pt_cursor cursor;
>>   	uint64_t frag_start = start, frag_end;
>>   	unsigned int frag;
>> +	int r;
>>
>>   	/* figure out the initial fragment */
>>   	amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
>> &frag_end); @@ -1634,12 +1571,15 @@ static int
>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>   	/* walk over the address space and update the PTs */
>>   	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>   	while (cursor.pfn < end) {
>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;
>>   		unsigned shift, parent_shift, mask;
>>   		uint64_t incr, entry_end, pe_start;
>> +		struct amdgpu_bo *pt;
>>
>> -		if (!pt)
>> -			return -ENOENT;
>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>> &cursor);
>> +		if (r)
>> +			return r;
>> +
>> +		pt = cursor.entry->base.bo;
>>
>>   		/* The root level can't be a huge page */
>>   		if (cursor.level == adev->vm_manager.root_level) { diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 81ff8177f092..116605c038d2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>> int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
>> amdgpu_vm *vm,
>>   			      int (*callback)(void *p, struct amdgpu_bo *bo),
>>   			      void *param);
>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> -			struct amdgpu_vm *vm,
>> -			uint64_t saddr, uint64_t size);
>>   int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
>> bool need_pipe_sync);  int amdgpu_vm_update_directories(struct
>> amdgpu_device *adev,
>>   				 struct amdgpu_vm *vm);
>> --
>> 2.17.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]             ` <2bd27a3b-9f96-b2e4-5070-3413a14e9c7f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-03-08 14:17               ` Russell, Kent
  2019-03-09  4:15               ` Kuehling, Felix
  1 sibling, 0 replies; 26+ messages in thread
From: Russell, Kent @ 2019-03-08 14:17 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

That dmesg was from Fiji, but it occurred on Vega10 as well. 

 Kent

> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Friday, March 08, 2019 9:14 AM
> To: Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> My best guess is that we forget somewhere to update the PDs. What
> hardware is that on?
> 
> Felix already mentioned that this could be problematic for the KFD.
> 
> Maybe he has an idea,
> Christian.
> 
> Am 08.03.19 um 15:04 schrieb Russell, Kent:
> > Hi Christian,
> >
> > This patch ended up causing a VM Fault in KFDTest. Reverting just this
> patch addressed the issue:
> > [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146 0x0000480c for
> process  pid 0 thread  pid 0
> > [   82.703512] amdgpu 0000:0c:00.0:
> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
> > [   82.703516] amdgpu 0000:0c:00.0:
> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
> > [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid 32769) at
> page 4096, read from 'TC0' (0x54433000) (72)
> > [   82.703585] Evicting PASID 32769 queues
> >
> > I am looking into it, but if you have any insight that would be great in
> helping to resolve it quickly.
> >
> >   Kent
> >> -----Original Message-----
> >> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> >> Christian König
> >> Sent: Tuesday, February 26, 2019 7:47 AM
> >> To: amd-gfx@lists.freedesktop.org
> >> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>
> >> Let's start to allocate VM PDs/PTs on demand instead of
> >> pre-allocating them during mapping.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >> ---
> >>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++------------
> -
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
> >>   5 files changed, 39 insertions(+), 129 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >> index 31e3953dcb6e..088e9b6b765b 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
> >> *adev, struct kgd_mem *mem,
> >>   	if (p_bo_va_entry)
> >>   		*p_bo_va_entry = bo_va_entry;
> >>
> >> -	/* Allocate new page tables if needed and validate
> >> -	 * them.
> >> -	 */
> >> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
> >> -	if (ret) {
> >> -		pr_err("Failed to allocate pts, err=%d\n", ret);
> >> -		goto err_alloc_pts;
> >> -	}
> >> -
> >> +	/* Allocate validate page tables if needed */
> >>   	ret = vm_validate_pt_pd_bos(vm);
> >>   	if (ret) {
> >>   		pr_err("validate_pt_pd_bos() failed\n"); diff --git
> >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >> index 7e22be7ca68a..54dd02a898b9 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device
> >> *adev, struct amdgpu_vm *vm,
> >>   		return -ENOMEM;
> >>   	}
> >>
> >> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
> >> -				size);
> >> -	if (r) {
> >> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n",
> >> r);
> >> -		amdgpu_vm_bo_rmv(adev, *bo_va);
> >> -		ttm_eu_backoff_reservation(&ticket, &list);
> >> -		return r;
> >> -	}
> >> -
> >>   	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
> >>   			     AMDGPU_PTE_READABLE |
> >> AMDGPU_PTE_WRITEABLE |
> >>   			     AMDGPU_PTE_EXECUTABLE);
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> index 555285e329ed..fcaaac30e84b 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
> *dev,
> >> void *data,
> >>
> >>   	switch (args->operation) {
> >>   	case AMDGPU_VA_OP_MAP:
> >> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>> va_address,
> >> -					args->map_size);
> >> -		if (r)
> >> -			goto error_backoff;
> >> -
> >>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
> >>   		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
> >>   				     args->offset_in_bo, args->map_size, @@ -
> >> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> void
> >> *data,
> >>   						args->map_size);
> >>   		break;
> >>   	case AMDGPU_VA_OP_REPLACE:
> >> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>> va_address,
> >> -					args->map_size);
> >> -		if (r)
> >> -			goto error_backoff;
> >> -
> >>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
> >>   		r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
> >>> va_address,
> >>   					     args->offset_in_bo, args-
> >>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> index 362436f4e856..dfad543fc000 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
> >> amdgpu_device *adev,
> >>   	}
> >>   }
> >>
> >> -/**
> >> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
> >> - *
> >> - * @adev: amdgpu_device pointer
> >> - * @vm: amdgpu_vm structure
> >> - * @start: start addr of the walk
> >> - * @cursor: state to initialize
> >> - *
> >> - * Start a walk and go directly to the leaf node.
> >> - */
> >> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
> >> -				    struct amdgpu_vm *vm, uint64_t start,
> >> -				    struct amdgpu_vm_pt_cursor *cursor)
> >> -{
> >> -	amdgpu_vm_pt_start(adev, vm, start, cursor);
> >> -	while (amdgpu_vm_pt_descendant(adev, cursor));
> >> -}
> >> -
> >> -/**
> >> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
> >> - *
> >> - * @adev: amdgpu_device pointer
> >> - * @cursor: current state
> >> - *
> >> - * Walk the PD/PT tree to the next leaf node.
> >> - */
> >> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
> >> -				   struct amdgpu_vm_pt_cursor *cursor)
> >> -{
> >> -	amdgpu_vm_pt_next(adev, cursor);
> >> -	if (cursor->pfn != ~0ll)
> >> -		while (amdgpu_vm_pt_descendant(adev, cursor));
> >> -}
> >> -
> >> -/**
> >> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
> >> hierarchy
> >> - */
> >> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
> >> 	\
> >> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));
> >> 		\
> >> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
> >> &(cursor)))
> >> -
> >>   /**
> >>    * amdgpu_vm_pt_first_dfs - start a deep first search
> >>    *
> >> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
> >> amdgpu_device *adev, struct amdgpu_vm *vm,
> >>    * Returns:
> >>    * 0 on success, errno otherwise.
> >>    */
> >> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >> -			struct amdgpu_vm *vm,
> >> -			uint64_t saddr, uint64_t size)
> >> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >> +			       struct amdgpu_vm *vm,
> >> +			       struct amdgpu_vm_pt_cursor *cursor)
> >>   {
> >> -	struct amdgpu_vm_pt_cursor cursor;
> >> +	struct amdgpu_vm_pt *entry = cursor->entry;
> >> +	struct amdgpu_bo_param bp;
> >>   	struct amdgpu_bo *pt;
> >> -	uint64_t eaddr;
> >>   	int r;
> >>
> >> -	/* validate the parameters */
> >> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size &
> >> AMDGPU_GPU_PAGE_MASK)
> >> -		return -EINVAL;
> >> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
> >> +		unsigned num_entries;
> >>
> >> -	eaddr = saddr + size - 1;
> >> -
> >> -	saddr /= AMDGPU_GPU_PAGE_SIZE;
> >> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;
> >> -
> >> -	if (eaddr >= adev->vm_manager.max_pfn) {
> >> -		dev_err(adev->dev, "va above limit (0x%08llX >=
> >> 0x%08llX)\n",
> >> -			eaddr, adev->vm_manager.max_pfn);
> >> -		return -EINVAL;
> >> +		num_entries = amdgpu_vm_num_entries(adev, cursor-
> >>> level);
> >> +		entry->entries = kvmalloc_array(num_entries,
> >> +						sizeof(*entry->entries),
> >> +						GFP_KERNEL | __GFP_ZERO);
> >> +		if (!entry->entries)
> >> +			return -ENOMEM;
> >>   	}
> >>
> >> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
> >> -		struct amdgpu_vm_pt *entry = cursor.entry;
> >> -		struct amdgpu_bo_param bp;
> >> -
> >> -		if (cursor.level < AMDGPU_VM_PTB) {
> >> -			unsigned num_entries;
> >> -
> >> -			num_entries = amdgpu_vm_num_entries(adev,
> >> cursor.level);
> >> -			entry->entries = kvmalloc_array(num_entries,
> >> -							sizeof(*entry-
> >>> entries),
> >> -							GFP_KERNEL |
> >> -							__GFP_ZERO);
> >> -			if (!entry->entries)
> >> -				return -ENOMEM;
> >> -		}
> >> -
> >> -
> >> -		if (entry->base.bo)
> >> -			continue;
> >> -
> >> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
> >> -
> >> -		r = amdgpu_bo_create(adev, &bp, &pt);
> >> -		if (r)
> >> -			return r;
> >> -
> >> -		if (vm->use_cpu_for_update) {
> >> -			r = amdgpu_bo_kmap(pt, NULL);
> >> -			if (r)
> >> -				goto error_free_pt;
> >> -		}
> >> +	if (entry->base.bo)
> >> +		return 0;
> >>
> >> -		/* Keep a reference to the root directory to avoid
> >> -		* freeing them up in the wrong order.
> >> -		*/
> >> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
> >> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
> >>
> >> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >> +	r = amdgpu_bo_create(adev, &bp, &pt);
> >> +	if (r)
> >> +		return r;
> >>
> >> -		r = amdgpu_vm_clear_bo(adev, vm, pt);
> >> +	if (vm->use_cpu_for_update) {
> >> +		r = amdgpu_bo_kmap(pt, NULL);
> >>   		if (r)
> >>   			goto error_free_pt;
> >>   	}
> >>
> >> +	/* Keep a reference to the root directory to avoid
> >> +	 * freeing them up in the wrong order.
> >> +	 */
> >> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
> >> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >> +
> >> +	r = amdgpu_vm_clear_bo(adev, vm, pt);
> >> +	if (r)
> >> +		goto error_free_pt;
> >> +
> >>   	return 0;
> >>
> >>   error_free_pt:
> >> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
> >> amdgpu_pte_update_params *params,
> >>   	struct amdgpu_vm_pt_cursor cursor;
> >>   	uint64_t frag_start = start, frag_end;
> >>   	unsigned int frag;
> >> +	int r;
> >>
> >>   	/* figure out the initial fragment */
> >>   	amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
> >> &frag_end); @@ -1634,12 +1571,15 @@ static int
> >> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> >>   	/* walk over the address space and update the PTs */
> >>   	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
> >>   	while (cursor.pfn < end) {
> >> -		struct amdgpu_bo *pt = cursor.entry->base.bo;
> >>   		unsigned shift, parent_shift, mask;
> >>   		uint64_t incr, entry_end, pe_start;
> >> +		struct amdgpu_bo *pt;
> >>
> >> -		if (!pt)
> >> -			return -ENOENT;
> >> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm,
> >> &cursor);
> >> +		if (r)
> >> +			return r;
> >> +
> >> +		pt = cursor.entry->base.bo;
> >>
> >>   		/* The root level can't be a huge page */
> >>   		if (cursor.level == adev->vm_manager.root_level) { diff --git
> >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> index 81ff8177f092..116605c038d2 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
> int
> >> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
> >> amdgpu_vm *vm,
> >>   			      int (*callback)(void *p, struct amdgpu_bo *bo),
> >>   			      void *param);
> >> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >> -			struct amdgpu_vm *vm,
> >> -			uint64_t saddr, uint64_t size);
> >>   int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
> >> *job, bool need_pipe_sync);  int amdgpu_vm_update_directories(struct
> >> amdgpu_device *adev,
> >>   				 struct amdgpu_vm *vm);
> >> --
> >> 2.17.1
> >>
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]             ` <2bd27a3b-9f96-b2e4-5070-3413a14e9c7f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2019-03-08 14:17               ` Russell, Kent
@ 2019-03-09  4:15               ` Kuehling, Felix
       [not found]                 ` <BYAPR12MB3176F9AA995D5C7DFAD5ABC1924E0-ZGDeBxoHBPks/z/azo2A2AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  1 sibling, 1 reply; 26+ messages in thread
From: Kuehling, Felix @ 2019-03-09  4:15 UTC (permalink / raw)
  To: Koenig, Christian, Russell, Kent,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

My concerns were related to eviction fence handing. It would manifest by unnecessary eviction callbacks into KFD that aren't cause by real evictions. I addressed that with a previous patch series that removed the need to remove eviction fences and add them back around page table updates in amdgpu_amdkfd_gpuvm.c.

I don't know what's going on here. I can probably take a look on Monday. I haven't considered what changed with respect to PD updates.

Kent, can we temporarily revert the offending change in amd-kfd-staging just to unblock the merge?

Christian, I think KFD is currently broken on amd-staging-drm-next. If we're serious about supporting KFD upstream, you may also want to consider reverting your change there for now. Also consider building the Thunk and kfdtest so you can do quick smoke tests locally whenever you make amdgpu_vm changes that can affect KFD. https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Christian König
Sent: Friday, March 08, 2019 9:14 AM
To: Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand

My best guess is that we forget somewhere to update the PDs. What hardware is that on?

Felix already mentioned that this could be problematic for the KFD.

Maybe he has an idea,
Christian.

Am 08.03.19 um 15:04 schrieb Russell, Kent:
> Hi Christian,
>
> This patch ended up causing a VM Fault in KFDTest. Reverting just this patch addressed the issue:
> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146 0x0000480c for process  pid 0 thread  pid 0
> [   82.703512] amdgpu 0000:0c:00.0:   VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
> [   82.703516] amdgpu 0000:0c:00.0:   VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid 32769) at page 4096, read from 'TC0' (0x54433000) (72)
> [   82.703585] Evicting PASID 32769 queues
>
> I am looking into it, but if you have any insight that would be great in helping to resolve it quickly.
>
>   Kent
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of 
>> Christian König
>> Sent: Tuesday, February 26, 2019 7:47 AM
>> To: amd-gfx@lists.freedesktop.org
>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Let's start to allocate VM PDs/PTs on demand instead of 
>> pre-allocating them during mapping.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++-------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>   5 files changed, 39 insertions(+), 129 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 31e3953dcb6e..088e9b6b765b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device 
>> *adev, struct kgd_mem *mem,
>>   	if (p_bo_va_entry)
>>   		*p_bo_va_entry = bo_va_entry;
>>
>> -	/* Allocate new page tables if needed and validate
>> -	 * them.
>> -	 */
>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>> -	if (ret) {
>> -		pr_err("Failed to allocate pts, err=%d\n", ret);
>> -		goto err_alloc_pts;
>> -	}
>> -
>> +	/* Allocate validate page tables if needed */
>>   	ret = vm_validate_pt_pd_bos(vm);
>>   	if (ret) {
>>   		pr_err("validate_pt_pd_bos() failed\n"); diff --git 
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> index 7e22be7ca68a..54dd02a898b9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm,
>>   		return -ENOMEM;
>>   	}
>>
>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>> -				size);
>> -	if (r) {
>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n",
>> r);
>> -		amdgpu_vm_bo_rmv(adev, *bo_va);
>> -		ttm_eu_backoff_reservation(&ticket, &list);
>> -		return r;
>> -	}
>> -
>>   	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>   			     AMDGPU_PTE_READABLE |
>> AMDGPU_PTE_WRITEABLE |
>>   			     AMDGPU_PTE_EXECUTABLE);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 555285e329ed..fcaaac30e84b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>> void *data,
>>
>>   	switch (args->operation) {
>>   	case AMDGPU_VA_OP_MAP:
>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>> va_address,
>> -					args->map_size);
>> -		if (r)
>> -			goto error_backoff;
>> -
>>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>   		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>   				     args->offset_in_bo, args->map_size, @@ -
>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
>> *data,
>>   						args->map_size);
>>   		break;
>>   	case AMDGPU_VA_OP_REPLACE:
>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>> va_address,
>> -					args->map_size);
>> -		if (r)
>> -			goto error_backoff;
>> -
>>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>   		r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>> va_address,
>>   					     args->offset_in_bo, args-
>>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 362436f4e856..dfad543fc000 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct 
>> amdgpu_device *adev,
>>   	}
>>   }
>>
>> -/**
>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>> - *
>> - * @adev: amdgpu_device pointer
>> - * @vm: amdgpu_vm structure
>> - * @start: start addr of the walk
>> - * @cursor: state to initialize
>> - *
>> - * Start a walk and go directly to the leaf node.
>> - */
>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>> -				    struct amdgpu_vm *vm, uint64_t start,
>> -				    struct amdgpu_vm_pt_cursor *cursor)
>> -{
>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);
>> -	while (amdgpu_vm_pt_descendant(adev, cursor));
>> -}
>> -
>> -/**
>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>> - *
>> - * @adev: amdgpu_device pointer
>> - * @cursor: current state
>> - *
>> - * Walk the PD/PT tree to the next leaf node.
>> - */
>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>> -				   struct amdgpu_vm_pt_cursor *cursor)
>> -{
>> -	amdgpu_vm_pt_next(adev, cursor);
>> -	if (cursor->pfn != ~0ll)
>> -		while (amdgpu_vm_pt_descendant(adev, cursor));
>> -}
>> -
>> -/**
>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the 
>> hierarchy
>> - */
>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
>> 	\
>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));
>> 		\
>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>> &(cursor)))
>> -
>>   /**
>>    * amdgpu_vm_pt_first_dfs - start a deep first search
>>    *
>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct 
>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>    * Returns:
>>    * 0 on success, errno otherwise.
>>    */
>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> -			struct amdgpu_vm *vm,
>> -			uint64_t saddr, uint64_t size)
>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> +			       struct amdgpu_vm *vm,
>> +			       struct amdgpu_vm_pt_cursor *cursor)
>>   {
>> -	struct amdgpu_vm_pt_cursor cursor;
>> +	struct amdgpu_vm_pt *entry = cursor->entry;
>> +	struct amdgpu_bo_param bp;
>>   	struct amdgpu_bo *pt;
>> -	uint64_t eaddr;
>>   	int r;
>>
>> -	/* validate the parameters */
>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>> AMDGPU_GPU_PAGE_MASK)
>> -		return -EINVAL;
>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>> +		unsigned num_entries;
>>
>> -	eaddr = saddr + size - 1;
>> -
>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;
>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;
>> -
>> -	if (eaddr >= adev->vm_manager.max_pfn) {
>> -		dev_err(adev->dev, "va above limit (0x%08llX >=
>> 0x%08llX)\n",
>> -			eaddr, adev->vm_manager.max_pfn);
>> -		return -EINVAL;
>> +		num_entries = amdgpu_vm_num_entries(adev, cursor-
>>> level);
>> +		entry->entries = kvmalloc_array(num_entries,
>> +						sizeof(*entry->entries),
>> +						GFP_KERNEL | __GFP_ZERO);
>> +		if (!entry->entries)
>> +			return -ENOMEM;
>>   	}
>>
>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>> -		struct amdgpu_vm_pt *entry = cursor.entry;
>> -		struct amdgpu_bo_param bp;
>> -
>> -		if (cursor.level < AMDGPU_VM_PTB) {
>> -			unsigned num_entries;
>> -
>> -			num_entries = amdgpu_vm_num_entries(adev,
>> cursor.level);
>> -			entry->entries = kvmalloc_array(num_entries,
>> -							sizeof(*entry-
>>> entries),
>> -							GFP_KERNEL |
>> -							__GFP_ZERO);
>> -			if (!entry->entries)
>> -				return -ENOMEM;
>> -		}
>> -
>> -
>> -		if (entry->base.bo)
>> -			continue;
>> -
>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>> -
>> -		r = amdgpu_bo_create(adev, &bp, &pt);
>> -		if (r)
>> -			return r;
>> -
>> -		if (vm->use_cpu_for_update) {
>> -			r = amdgpu_bo_kmap(pt, NULL);
>> -			if (r)
>> -				goto error_free_pt;
>> -		}
>> +	if (entry->base.bo)
>> +		return 0;
>>
>> -		/* Keep a reference to the root directory to avoid
>> -		* freeing them up in the wrong order.
>> -		*/
>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>
>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>> +	r = amdgpu_bo_create(adev, &bp, &pt);
>> +	if (r)
>> +		return r;
>>
>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);
>> +	if (vm->use_cpu_for_update) {
>> +		r = amdgpu_bo_kmap(pt, NULL);
>>   		if (r)
>>   			goto error_free_pt;
>>   	}
>>
>> +	/* Keep a reference to the root directory to avoid
>> +	 * freeing them up in the wrong order.
>> +	 */
>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>> +
>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);
>> +	if (r)
>> +		goto error_free_pt;
>> +
>>   	return 0;
>>
>>   error_free_pt:
>> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_pte_update_params *params,
>>   	struct amdgpu_vm_pt_cursor cursor;
>>   	uint64_t frag_start = start, frag_end;
>>   	unsigned int frag;
>> +	int r;
>>
>>   	/* figure out the initial fragment */
>>   	amdgpu_vm_fragment(params, frag_start, end, flags, &frag, 
>> &frag_end); @@ -1634,12 +1571,15 @@ static int 
>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>   	/* walk over the address space and update the PTs */
>>   	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>   	while (cursor.pfn < end) {
>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;
>>   		unsigned shift, parent_shift, mask;
>>   		uint64_t incr, entry_end, pe_start;
>> +		struct amdgpu_bo *pt;
>>
>> -		if (!pt)
>> -			return -ENOENT;
>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>> &cursor);
>> +		if (r)
>> +			return r;
>> +
>> +		pt = cursor.entry->base.bo;
>>
>>   		/* The root level can't be a huge page */
>>   		if (cursor.level == adev->vm_manager.root_level) { diff --git 
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 81ff8177f092..116605c038d2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm); int 
>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct 
>> amdgpu_vm *vm,
>>   			      int (*callback)(void *p, struct amdgpu_bo *bo),
>>   			      void *param);
>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> -			struct amdgpu_vm *vm,
>> -			uint64_t saddr, uint64_t size);
>>   int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job 
>> *job, bool need_pipe_sync);  int amdgpu_vm_update_directories(struct
>> amdgpu_device *adev,
>>   				 struct amdgpu_vm *vm);
>> --
>> 2.17.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                 ` <BYAPR12MB3176F9AA995D5C7DFAD5ABC1924E0-ZGDeBxoHBPks/z/azo2A2AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-03-11  9:49                   ` Russell, Kent
       [not found]                     ` <BN6PR12MB16181059CE65EF869EB044A985480-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Russell, Kent @ 2019-03-11  9:49 UTC (permalink / raw)
  To: Kuehling, Felix, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From what I've been able to dig through, the VM Fault seems to occur right after a doorbell mmap, but that's as far as I got. I can try to revert it in today's merge and see how things go.

 Kent

> -----Original Message-----
> From: Kuehling, Felix
> Sent: Friday, March 08, 2019 11:16 PM
> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> My concerns were related to eviction fence handing. It would manifest by
> unnecessary eviction callbacks into KFD that aren't cause by real evictions. I
> addressed that with a previous patch series that removed the need to
> remove eviction fences and add them back around page table updates in
> amdgpu_amdkfd_gpuvm.c.
> 
> I don't know what's going on here. I can probably take a look on Monday. I
> haven't considered what changed with respect to PD updates.
> 
> Kent, can we temporarily revert the offending change in amd-kfd-staging
> just to unblock the merge?
> 
> Christian, I think KFD is currently broken on amd-staging-drm-next. If we're
> serious about supporting KFD upstream, you may also want to consider
> reverting your change there for now. Also consider building the Thunk and
> kfdtest so you can do quick smoke tests locally whenever you make
> amdgpu_vm changes that can affect KFD.
> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
> 
> Regards,
>   Felix
> 
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Christian König
> Sent: Friday, March 08, 2019 9:14 AM
> To: Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> My best guess is that we forget somewhere to update the PDs. What
> hardware is that on?
> 
> Felix already mentioned that this could be problematic for the KFD.
> 
> Maybe he has an idea,
> Christian.
> 
> Am 08.03.19 um 15:04 schrieb Russell, Kent:
> > Hi Christian,
> >
> > This patch ended up causing a VM Fault in KFDTest. Reverting just this
> patch addressed the issue:
> > [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146 0x0000480c for
> process  pid 0 thread  pid 0
> > [   82.703512] amdgpu 0000:0c:00.0:
> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
> > [   82.703516] amdgpu 0000:0c:00.0:
> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
> > [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid 32769) at
> page 4096, read from 'TC0' (0x54433000) (72)
> > [   82.703585] Evicting PASID 32769 queues
> >
> > I am looking into it, but if you have any insight that would be great in
> helping to resolve it quickly.
> >
> >   Kent
> >> -----Original Message-----
> >> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> >> Christian König
> >> Sent: Tuesday, February 26, 2019 7:47 AM
> >> To: amd-gfx@lists.freedesktop.org
> >> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>
> >> Let's start to allocate VM PDs/PTs on demand instead of
> >> pre-allocating them during mapping.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >> ---
> >>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++------------
> -
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
> >>   5 files changed, 39 insertions(+), 129 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >> index 31e3953dcb6e..088e9b6b765b 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
> >> *adev, struct kgd_mem *mem,
> >>   	if (p_bo_va_entry)
> >>   		*p_bo_va_entry = bo_va_entry;
> >>
> >> -	/* Allocate new page tables if needed and validate
> >> -	 * them.
> >> -	 */
> >> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
> >> -	if (ret) {
> >> -		pr_err("Failed to allocate pts, err=%d\n", ret);
> >> -		goto err_alloc_pts;
> >> -	}
> >> -
> >> +	/* Allocate validate page tables if needed */
> >>   	ret = vm_validate_pt_pd_bos(vm);
> >>   	if (ret) {
> >>   		pr_err("validate_pt_pd_bos() failed\n"); diff --git
> >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >> index 7e22be7ca68a..54dd02a898b9 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device
> >> *adev, struct amdgpu_vm *vm,
> >>   		return -ENOMEM;
> >>   	}
> >>
> >> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
> >> -				size);
> >> -	if (r) {
> >> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n",
> >> r);
> >> -		amdgpu_vm_bo_rmv(adev, *bo_va);
> >> -		ttm_eu_backoff_reservation(&ticket, &list);
> >> -		return r;
> >> -	}
> >> -
> >>   	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
> >>   			     AMDGPU_PTE_READABLE |
> >> AMDGPU_PTE_WRITEABLE |
> >>   			     AMDGPU_PTE_EXECUTABLE);
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> index 555285e329ed..fcaaac30e84b 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
> *dev,
> >> void *data,
> >>
> >>   	switch (args->operation) {
> >>   	case AMDGPU_VA_OP_MAP:
> >> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>> va_address,
> >> -					args->map_size);
> >> -		if (r)
> >> -			goto error_backoff;
> >> -
> >>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
> >>   		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
> >>   				     args->offset_in_bo, args->map_size, @@ -
> >> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> void
> >> *data,
> >>   						args->map_size);
> >>   		break;
> >>   	case AMDGPU_VA_OP_REPLACE:
> >> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>> va_address,
> >> -					args->map_size);
> >> -		if (r)
> >> -			goto error_backoff;
> >> -
> >>   		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
> >>   		r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
> >>> va_address,
> >>   					     args->offset_in_bo, args-
> >>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> index 362436f4e856..dfad543fc000 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
> >> amdgpu_device *adev,
> >>   	}
> >>   }
> >>
> >> -/**
> >> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
> >> - *
> >> - * @adev: amdgpu_device pointer
> >> - * @vm: amdgpu_vm structure
> >> - * @start: start addr of the walk
> >> - * @cursor: state to initialize
> >> - *
> >> - * Start a walk and go directly to the leaf node.
> >> - */
> >> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
> >> -				    struct amdgpu_vm *vm, uint64_t start,
> >> -				    struct amdgpu_vm_pt_cursor *cursor)
> >> -{
> >> -	amdgpu_vm_pt_start(adev, vm, start, cursor);
> >> -	while (amdgpu_vm_pt_descendant(adev, cursor));
> >> -}
> >> -
> >> -/**
> >> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
> >> - *
> >> - * @adev: amdgpu_device pointer
> >> - * @cursor: current state
> >> - *
> >> - * Walk the PD/PT tree to the next leaf node.
> >> - */
> >> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
> >> -				   struct amdgpu_vm_pt_cursor *cursor)
> >> -{
> >> -	amdgpu_vm_pt_next(adev, cursor);
> >> -	if (cursor->pfn != ~0ll)
> >> -		while (amdgpu_vm_pt_descendant(adev, cursor));
> >> -}
> >> -
> >> -/**
> >> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
> >> hierarchy
> >> - */
> >> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
> >> 	\
> >> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));
> >> 		\
> >> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
> >> &(cursor)))
> >> -
> >>   /**
> >>    * amdgpu_vm_pt_first_dfs - start a deep first search
> >>    *
> >> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
> >> amdgpu_device *adev, struct amdgpu_vm *vm,
> >>    * Returns:
> >>    * 0 on success, errno otherwise.
> >>    */
> >> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >> -			struct amdgpu_vm *vm,
> >> -			uint64_t saddr, uint64_t size)
> >> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >> +			       struct amdgpu_vm *vm,
> >> +			       struct amdgpu_vm_pt_cursor *cursor)
> >>   {
> >> -	struct amdgpu_vm_pt_cursor cursor;
> >> +	struct amdgpu_vm_pt *entry = cursor->entry;
> >> +	struct amdgpu_bo_param bp;
> >>   	struct amdgpu_bo *pt;
> >> -	uint64_t eaddr;
> >>   	int r;
> >>
> >> -	/* validate the parameters */
> >> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size &
> >> AMDGPU_GPU_PAGE_MASK)
> >> -		return -EINVAL;
> >> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
> >> +		unsigned num_entries;
> >>
> >> -	eaddr = saddr + size - 1;
> >> -
> >> -	saddr /= AMDGPU_GPU_PAGE_SIZE;
> >> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;
> >> -
> >> -	if (eaddr >= adev->vm_manager.max_pfn) {
> >> -		dev_err(adev->dev, "va above limit (0x%08llX >=
> >> 0x%08llX)\n",
> >> -			eaddr, adev->vm_manager.max_pfn);
> >> -		return -EINVAL;
> >> +		num_entries = amdgpu_vm_num_entries(adev, cursor-
> >>> level);
> >> +		entry->entries = kvmalloc_array(num_entries,
> >> +						sizeof(*entry->entries),
> >> +						GFP_KERNEL | __GFP_ZERO);
> >> +		if (!entry->entries)
> >> +			return -ENOMEM;
> >>   	}
> >>
> >> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
> >> -		struct amdgpu_vm_pt *entry = cursor.entry;
> >> -		struct amdgpu_bo_param bp;
> >> -
> >> -		if (cursor.level < AMDGPU_VM_PTB) {
> >> -			unsigned num_entries;
> >> -
> >> -			num_entries = amdgpu_vm_num_entries(adev,
> >> cursor.level);
> >> -			entry->entries = kvmalloc_array(num_entries,
> >> -							sizeof(*entry-
> >>> entries),
> >> -							GFP_KERNEL |
> >> -							__GFP_ZERO);
> >> -			if (!entry->entries)
> >> -				return -ENOMEM;
> >> -		}
> >> -
> >> -
> >> -		if (entry->base.bo)
> >> -			continue;
> >> -
> >> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
> >> -
> >> -		r = amdgpu_bo_create(adev, &bp, &pt);
> >> -		if (r)
> >> -			return r;
> >> -
> >> -		if (vm->use_cpu_for_update) {
> >> -			r = amdgpu_bo_kmap(pt, NULL);
> >> -			if (r)
> >> -				goto error_free_pt;
> >> -		}
> >> +	if (entry->base.bo)
> >> +		return 0;
> >>
> >> -		/* Keep a reference to the root directory to avoid
> >> -		* freeing them up in the wrong order.
> >> -		*/
> >> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
> >> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
> >>
> >> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >> +	r = amdgpu_bo_create(adev, &bp, &pt);
> >> +	if (r)
> >> +		return r;
> >>
> >> -		r = amdgpu_vm_clear_bo(adev, vm, pt);
> >> +	if (vm->use_cpu_for_update) {
> >> +		r = amdgpu_bo_kmap(pt, NULL);
> >>   		if (r)
> >>   			goto error_free_pt;
> >>   	}
> >>
> >> +	/* Keep a reference to the root directory to avoid
> >> +	 * freeing them up in the wrong order.
> >> +	 */
> >> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
> >> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >> +
> >> +	r = amdgpu_vm_clear_bo(adev, vm, pt);
> >> +	if (r)
> >> +		goto error_free_pt;
> >> +
> >>   	return 0;
> >>
> >>   error_free_pt:
> >> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
> >> amdgpu_pte_update_params *params,
> >>   	struct amdgpu_vm_pt_cursor cursor;
> >>   	uint64_t frag_start = start, frag_end;
> >>   	unsigned int frag;
> >> +	int r;
> >>
> >>   	/* figure out the initial fragment */
> >>   	amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
> >> &frag_end); @@ -1634,12 +1571,15 @@ static int
> >> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> >>   	/* walk over the address space and update the PTs */
> >>   	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
> >>   	while (cursor.pfn < end) {
> >> -		struct amdgpu_bo *pt = cursor.entry->base.bo;
> >>   		unsigned shift, parent_shift, mask;
> >>   		uint64_t incr, entry_end, pe_start;
> >> +		struct amdgpu_bo *pt;
> >>
> >> -		if (!pt)
> >> -			return -ENOENT;
> >> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm,
> >> &cursor);
> >> +		if (r)
> >> +			return r;
> >> +
> >> +		pt = cursor.entry->base.bo;
> >>
> >>   		/* The root level can't be a huge page */
> >>   		if (cursor.level == adev->vm_manager.root_level) { diff --git
> >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> index 81ff8177f092..116605c038d2 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
> int
> >> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
> >> amdgpu_vm *vm,
> >>   			      int (*callback)(void *p, struct amdgpu_bo *bo),
> >>   			      void *param);
> >> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >> -			struct amdgpu_vm *vm,
> >> -			uint64_t saddr, uint64_t size);
> >>   int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
> >> *job, bool need_pipe_sync);  int amdgpu_vm_update_directories(struct
> >> amdgpu_device *adev,
> >>   				 struct amdgpu_vm *vm);
> >> --
> >> 2.17.1
> >>
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                     ` <BN6PR12MB16181059CE65EF869EB044A985480-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-03-11 16:55                       ` Christian König
       [not found]                         ` <b0383667-53ef-31c5-5a73-270bc90ab8c8-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2019-03-11 16:55 UTC (permalink / raw)
  To: Russell, Kent, Kuehling, Felix, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi guys,

well it's most likely some missing handling in the KFD, so I'm rather 
reluctant to revert the change immediately.

Problem is that I don't have time right now to look into it immediately. 
So Kent can you continue to take a look?

Sounds like its crashing immediately, so it should be something obvious.

Christian.

Am 11.03.19 um 10:49 schrieb Russell, Kent:
>  From what I've been able to dig through, the VM Fault seems to occur right after a doorbell mmap, but that's as far as I got. I can try to revert it in today's merge and see how things go.
>
>   Kent
>
>> -----Original Message-----
>> From: Kuehling, Felix
>> Sent: Friday, March 08, 2019 11:16 PM
>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> My concerns were related to eviction fence handing. It would manifest by
>> unnecessary eviction callbacks into KFD that aren't cause by real evictions. I
>> addressed that with a previous patch series that removed the need to
>> remove eviction fences and add them back around page table updates in
>> amdgpu_amdkfd_gpuvm.c.
>>
>> I don't know what's going on here. I can probably take a look on Monday. I
>> haven't considered what changed with respect to PD updates.
>>
>> Kent, can we temporarily revert the offending change in amd-kfd-staging
>> just to unblock the merge?
>>
>> Christian, I think KFD is currently broken on amd-staging-drm-next. If we're
>> serious about supporting KFD upstream, you may also want to consider
>> reverting your change there for now. Also consider building the Thunk and
>> kfdtest so you can do quick smoke tests locally whenever you make
>> amdgpu_vm changes that can affect KFD.
>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>
>> Regards,
>>    Felix
>>
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>> Christian König
>> Sent: Friday, March 08, 2019 9:14 AM
>> To: Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> My best guess is that we forget somewhere to update the PDs. What
>> hardware is that on?
>>
>> Felix already mentioned that this could be problematic for the KFD.
>>
>> Maybe he has an idea,
>> Christian.
>>
>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>>> Hi Christian,
>>>
>>> This patch ended up causing a VM Fault in KFDTest. Reverting just this
>> patch addressed the issue:
>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146 0x0000480c for
>> process  pid 0 thread  pid 0
>>> [   82.703512] amdgpu 0000:0c:00.0:
>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
>>> [   82.703516] amdgpu 0000:0c:00.0:
>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid 32769) at
>> page 4096, read from 'TC0' (0x54433000) (72)
>>> [   82.703585] Evicting PASID 32769 queues
>>>
>>> I am looking into it, but if you have any insight that would be great in
>> helping to resolve it quickly.
>>>    Kent
>>>> -----Original Message-----
>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>>> Christian König
>>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>>> To: amd-gfx@lists.freedesktop.org
>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>
>>>> Let's start to allocate VM PDs/PTs on demand instead of
>>>> pre-allocating them during mapping.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> ---
>>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 +++++------------
>> -
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>>>    5 files changed, 39 insertions(+), 129 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> index 31e3953dcb6e..088e9b6b765b 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
>>>> *adev, struct kgd_mem *mem,
>>>>    	if (p_bo_va_entry)
>>>>    		*p_bo_va_entry = bo_va_entry;
>>>>
>>>> -	/* Allocate new page tables if needed and validate
>>>> -	 * them.
>>>> -	 */
>>>> -	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>>>> -	if (ret) {
>>>> -		pr_err("Failed to allocate pts, err=%d\n", ret);
>>>> -		goto err_alloc_pts;
>>>> -	}
>>>> -
>>>> +	/* Allocate validate page tables if needed */
>>>>    	ret = vm_validate_pt_pd_bos(vm);
>>>>    	if (ret) {
>>>>    		pr_err("validate_pt_pd_bos() failed\n"); diff --git
>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>> index 7e22be7ca68a..54dd02a898b9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device
>>>> *adev, struct amdgpu_vm *vm,
>>>>    		return -ENOMEM;
>>>>    	}
>>>>
>>>> -	r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>>>> -				size);
>>>> -	if (r) {
>>>> -		DRM_ERROR("failed to allocate pts for static CSA, err=%d\n",
>>>> r);
>>>> -		amdgpu_vm_bo_rmv(adev, *bo_va);
>>>> -		ttm_eu_backoff_reservation(&ticket, &list);
>>>> -		return r;
>>>> -	}
>>>> -
>>>>    	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>>    			     AMDGPU_PTE_READABLE |
>>>> AMDGPU_PTE_WRITEABLE |
>>>>    			     AMDGPU_PTE_EXECUTABLE);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> index 555285e329ed..fcaaac30e84b 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
>> *dev,
>>>> void *data,
>>>>
>>>>    	switch (args->operation) {
>>>>    	case AMDGPU_VA_OP_MAP:
>>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>> va_address,
>>>> -					args->map_size);
>>>> -		if (r)
>>>> -			goto error_backoff;
>>>> -
>>>>    		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>    		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>>    				     args->offset_in_bo, args->map_size, @@ -
>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>> void
>>>> *data,
>>>>    						args->map_size);
>>>>    		break;
>>>>    	case AMDGPU_VA_OP_REPLACE:
>>>> -		r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>> va_address,
>>>> -					args->map_size);
>>>> -		if (r)
>>>> -			goto error_backoff;
>>>> -
>>>>    		va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>    		r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>>>> va_address,
>>>>    					     args->offset_in_bo, args-
>>>>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 362436f4e856..dfad543fc000 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>>>> amdgpu_device *adev,
>>>>    	}
>>>>    }
>>>>
>>>> -/**
>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>>> - *
>>>> - * @adev: amdgpu_device pointer
>>>> - * @vm: amdgpu_vm structure
>>>> - * @start: start addr of the walk
>>>> - * @cursor: state to initialize
>>>> - *
>>>> - * Start a walk and go directly to the leaf node.
>>>> - */
>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>>>> -				    struct amdgpu_vm *vm, uint64_t start,
>>>> -				    struct amdgpu_vm_pt_cursor *cursor)
>>>> -{
>>>> -	amdgpu_vm_pt_start(adev, vm, start, cursor);
>>>> -	while (amdgpu_vm_pt_descendant(adev, cursor));
>>>> -}
>>>> -
>>>> -/**
>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>>> - *
>>>> - * @adev: amdgpu_device pointer
>>>> - * @cursor: current state
>>>> - *
>>>> - * Walk the PD/PT tree to the next leaf node.
>>>> - */
>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>>>> -				   struct amdgpu_vm_pt_cursor *cursor)
>>>> -{
>>>> -	amdgpu_vm_pt_next(adev, cursor);
>>>> -	if (cursor->pfn != ~0ll)
>>>> -		while (amdgpu_vm_pt_descendant(adev, cursor));
>>>> -}
>>>> -
>>>> -/**
>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
>>>> hierarchy
>>>> - */
>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
>>>> 	\
>>>> -	for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));
>>>> 		\
>>>> -	     (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>>>> &(cursor)))
>>>> -
>>>>    /**
>>>>     * amdgpu_vm_pt_first_dfs - start a deep first search
>>>>     *
>>>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>     * Returns:
>>>>     * 0 on success, errno otherwise.
>>>>     */
>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>> -			struct amdgpu_vm *vm,
>>>> -			uint64_t saddr, uint64_t size)
>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>> +			       struct amdgpu_vm *vm,
>>>> +			       struct amdgpu_vm_pt_cursor *cursor)
>>>>    {
>>>> -	struct amdgpu_vm_pt_cursor cursor;
>>>> +	struct amdgpu_vm_pt *entry = cursor->entry;
>>>> +	struct amdgpu_bo_param bp;
>>>>    	struct amdgpu_bo *pt;
>>>> -	uint64_t eaddr;
>>>>    	int r;
>>>>
>>>> -	/* validate the parameters */
>>>> -	if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>>>> AMDGPU_GPU_PAGE_MASK)
>>>> -		return -EINVAL;
>>>> +	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>>> +		unsigned num_entries;
>>>>
>>>> -	eaddr = saddr + size - 1;
>>>> -
>>>> -	saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>> -	eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>> -
>>>> -	if (eaddr >= adev->vm_manager.max_pfn) {
>>>> -		dev_err(adev->dev, "va above limit (0x%08llX >=
>>>> 0x%08llX)\n",
>>>> -			eaddr, adev->vm_manager.max_pfn);
>>>> -		return -EINVAL;
>>>> +		num_entries = amdgpu_vm_num_entries(adev, cursor-
>>>>> level);
>>>> +		entry->entries = kvmalloc_array(num_entries,
>>>> +						sizeof(*entry->entries),
>>>> +						GFP_KERNEL | __GFP_ZERO);
>>>> +		if (!entry->entries)
>>>> +			return -ENOMEM;
>>>>    	}
>>>>
>>>> -	for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>>>> -		struct amdgpu_vm_pt *entry = cursor.entry;
>>>> -		struct amdgpu_bo_param bp;
>>>> -
>>>> -		if (cursor.level < AMDGPU_VM_PTB) {
>>>> -			unsigned num_entries;
>>>> -
>>>> -			num_entries = amdgpu_vm_num_entries(adev,
>>>> cursor.level);
>>>> -			entry->entries = kvmalloc_array(num_entries,
>>>> -							sizeof(*entry-
>>>>> entries),
>>>> -							GFP_KERNEL |
>>>> -							__GFP_ZERO);
>>>> -			if (!entry->entries)
>>>> -				return -ENOMEM;
>>>> -		}
>>>> -
>>>> -
>>>> -		if (entry->base.bo)
>>>> -			continue;
>>>> -
>>>> -		amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>>> -
>>>> -		r = amdgpu_bo_create(adev, &bp, &pt);
>>>> -		if (r)
>>>> -			return r;
>>>> -
>>>> -		if (vm->use_cpu_for_update) {
>>>> -			r = amdgpu_bo_kmap(pt, NULL);
>>>> -			if (r)
>>>> -				goto error_free_pt;
>>>> -		}
>>>> +	if (entry->base.bo)
>>>> +		return 0;
>>>>
>>>> -		/* Keep a reference to the root directory to avoid
>>>> -		* freeing them up in the wrong order.
>>>> -		*/
>>>> -		pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>>> +	amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>>
>>>> -		amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>> +	r = amdgpu_bo_create(adev, &bp, &pt);
>>>> +	if (r)
>>>> +		return r;
>>>>
>>>> -		r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>> +	if (vm->use_cpu_for_update) {
>>>> +		r = amdgpu_bo_kmap(pt, NULL);
>>>>    		if (r)
>>>>    			goto error_free_pt;
>>>>    	}
>>>>
>>>> +	/* Keep a reference to the root directory to avoid
>>>> +	 * freeing them up in the wrong order.
>>>> +	 */
>>>> +	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>>> +	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>> +
>>>> +	r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>> +	if (r)
>>>> +		goto error_free_pt;
>>>> +
>>>>    	return 0;
>>>>
>>>>    error_free_pt:
>>>> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
>>>> amdgpu_pte_update_params *params,
>>>>    	struct amdgpu_vm_pt_cursor cursor;
>>>>    	uint64_t frag_start = start, frag_end;
>>>>    	unsigned int frag;
>>>> +	int r;
>>>>
>>>>    	/* figure out the initial fragment */
>>>>    	amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
>>>> &frag_end); @@ -1634,12 +1571,15 @@ static int
>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>>>    	/* walk over the address space and update the PTs */
>>>>    	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>    	while (cursor.pfn < end) {
>>>> -		struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>>    		unsigned shift, parent_shift, mask;
>>>>    		uint64_t incr, entry_end, pe_start;
>>>> +		struct amdgpu_bo *pt;
>>>>
>>>> -		if (!pt)
>>>> -			return -ENOENT;
>>>> +		r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>> &cursor);
>>>> +		if (r)
>>>> +			return r;
>>>> +
>>>> +		pt = cursor.entry->base.bo;
>>>>
>>>>    		/* The root level can't be a huge page */
>>>>    		if (cursor.level == adev->vm_manager.root_level) { diff --git
>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index 81ff8177f092..116605c038d2 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>> int
>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
>>>> amdgpu_vm *vm,
>>>>    			      int (*callback)(void *p, struct amdgpu_bo *bo),
>>>>    			      void *param);
>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>> -			struct amdgpu_vm *vm,
>>>> -			uint64_t saddr, uint64_t size);
>>>>    int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
>>>> *job, bool need_pipe_sync);  int amdgpu_vm_update_directories(struct
>>>> amdgpu_device *adev,
>>>>    				 struct amdgpu_vm *vm);
>>>> --
>>>> 2.17.1
>>>>
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                         ` <b0383667-53ef-31c5-5a73-270bc90ab8c8-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-03-12 13:13                           ` Christian König
       [not found]                             ` <6b1cfc95-fde8-e25f-121e-cbb03592ec3a-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2019-03-12 19:02                           ` Kuehling, Felix
  1 sibling, 1 reply; 26+ messages in thread
From: Christian König @ 2019-03-12 13:13 UTC (permalink / raw)
  To: Russell, Kent, Kuehling, Felix, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi guys,

so found a few minutes today to compile kfdtest.

Problem is that during the compile I get a lots of this:
> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion 
> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57: 
> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«

Any idea?

Christian.

Am 11.03.19 um 17:55 schrieb Christian König:
> Hi guys,
>
> well it's most likely some missing handling in the KFD, so I'm rather 
> reluctant to revert the change immediately.
>
> Problem is that I don't have time right now to look into it 
> immediately. So Kent can you continue to take a look?
>
> Sounds like its crashing immediately, so it should be something obvious.
>
> Christian.
>
> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>  From what I've been able to dig through, the VM Fault seems to occur 
>> right after a doorbell mmap, but that's as far as I got. I can try to 
>> revert it in today's merge and see how things go.
>>
>>   Kent
>>
>>> -----Original Message-----
>>> From: Kuehling, Felix
>>> Sent: Friday, March 08, 2019 11:16 PM
>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>
>>> My concerns were related to eviction fence handing. It would 
>>> manifest by
>>> unnecessary eviction callbacks into KFD that aren't cause by real 
>>> evictions. I
>>> addressed that with a previous patch series that removed the need to
>>> remove eviction fences and add them back around page table updates in
>>> amdgpu_amdkfd_gpuvm.c.
>>>
>>> I don't know what's going on here. I can probably take a look on 
>>> Monday. I
>>> haven't considered what changed with respect to PD updates.
>>>
>>> Kent, can we temporarily revert the offending change in amd-kfd-staging
>>> just to unblock the merge?
>>>
>>> Christian, I think KFD is currently broken on amd-staging-drm-next. 
>>> If we're
>>> serious about supporting KFD upstream, you may also want to consider
>>> reverting your change there for now. Also consider building the 
>>> Thunk and
>>> kfdtest so you can do quick smoke tests locally whenever you make
>>> amdgpu_vm changes that can affect KFD.
>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>>
>>> Regards,
>>>    Felix
>>>
>>> -----Original Message-----
>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>> Christian König
>>> Sent: Friday, March 08, 2019 9:14 AM
>>> To: Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>
>>> My best guess is that we forget somewhere to update the PDs. What
>>> hardware is that on?
>>>
>>> Felix already mentioned that this could be problematic for the KFD.
>>>
>>> Maybe he has an idea,
>>> Christian.
>>>
>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>>>> Hi Christian,
>>>>
>>>> This patch ended up causing a VM Fault in KFDTest. Reverting just this
>>> patch addressed the issue:
>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146 
>>>> 0x0000480c for
>>> process  pid 0 thread  pid 0
>>>> [   82.703512] amdgpu 0000:0c:00.0:
>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
>>>> [   82.703516] amdgpu 0000:0c:00.0:
>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid 
>>>> 32769) at
>>> page 4096, read from 'TC0' (0x54433000) (72)
>>>> [   82.703585] Evicting PASID 32769 queues
>>>>
>>>> I am looking into it, but if you have any insight that would be 
>>>> great in
>>> helping to resolve it quickly.
>>>>    Kent
>>>>> -----Original Message-----
>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>>>> Christian König
>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>>>> To: amd-gfx@lists.freedesktop.org
>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>>
>>>>> Let's start to allocate VM PDs/PTs on demand instead of
>>>>> pre-allocating them during mapping.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>> ---
>>>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 
>>>>> +++++------------
>>> -
>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>>>>    5 files changed, 39 insertions(+), 129 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> index 31e3953dcb6e..088e9b6b765b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
>>>>> *adev, struct kgd_mem *mem,
>>>>>        if (p_bo_va_entry)
>>>>>            *p_bo_va_entry = bo_va_entry;
>>>>>
>>>>> -    /* Allocate new page tables if needed and validate
>>>>> -     * them.
>>>>> -     */
>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>>>>> -    if (ret) {
>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
>>>>> -        goto err_alloc_pts;
>>>>> -    }
>>>>> -
>>>>> +    /* Allocate validate page tables if needed */
>>>>>        ret = vm_validate_pt_pd_bos(vm);
>>>>>        if (ret) {
>>>>>            pr_err("validate_pt_pd_bos() failed\n"); diff --git
>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>> index 7e22be7ca68a..54dd02a898b9 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device
>>>>> *adev, struct amdgpu_vm *vm,
>>>>>            return -ENOMEM;
>>>>>        }
>>>>>
>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>>>>> -                size);
>>>>> -    if (r) {
>>>>> -        DRM_ERROR("failed to allocate pts for static CSA, err=%d\n",
>>>>> r);
>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
>>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
>>>>> -        return r;
>>>>> -    }
>>>>> -
>>>>>        r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>>>                     AMDGPU_PTE_READABLE |
>>>>> AMDGPU_PTE_WRITEABLE |
>>>>>                     AMDGPU_PTE_EXECUTABLE);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> index 555285e329ed..fcaaac30e84b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>> *dev,
>>>>> void *data,
>>>>>
>>>>>        switch (args->operation) {
>>>>>        case AMDGPU_VA_OP_MAP:
>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>> va_address,
>>>>> -                    args->map_size);
>>>>> -        if (r)
>>>>> -            goto error_backoff;
>>>>> -
>>>>>            va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>>            r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>>>                         args->offset_in_bo, args->map_size, @@ -
>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void
>>>>> *data,
>>>>>                            args->map_size);
>>>>>            break;
>>>>>        case AMDGPU_VA_OP_REPLACE:
>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>> va_address,
>>>>> -                    args->map_size);
>>>>> -        if (r)
>>>>> -            goto error_backoff;
>>>>> -
>>>>>            va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>>            r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>>>>> va_address,
>>>>>                             args->offset_in_bo, args-
>>>>>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 362436f4e856..dfad543fc000 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>>>>> amdgpu_device *adev,
>>>>>        }
>>>>>    }
>>>>>
>>>>> -/**
>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>>>> - *
>>>>> - * @adev: amdgpu_device pointer
>>>>> - * @vm: amdgpu_vm structure
>>>>> - * @start: start addr of the walk
>>>>> - * @cursor: state to initialize
>>>>> - *
>>>>> - * Start a walk and go directly to the leaf node.
>>>>> - */
>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>>>>> -                    struct amdgpu_vm *vm, uint64_t start,
>>>>> -                    struct amdgpu_vm_pt_cursor *cursor)
>>>>> -{
>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor));
>>>>> -}
>>>>> -
>>>>> -/**
>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>>>> - *
>>>>> - * @adev: amdgpu_device pointer
>>>>> - * @cursor: current state
>>>>> - *
>>>>> - * Walk the PD/PT tree to the next leaf node.
>>>>> - */
>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>>>>> -                   struct amdgpu_vm_pt_cursor *cursor)
>>>>> -{
>>>>> -    amdgpu_vm_pt_next(adev, cursor);
>>>>> -    if (cursor->pfn != ~0ll)
>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor));
>>>>> -}
>>>>> -
>>>>> -/**
>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
>>>>> hierarchy
>>>>> - */
>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
>>>>>     \
>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));
>>>>>         \
>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>>>>> &(cursor)))
>>>>> -
>>>>>    /**
>>>>>     * amdgpu_vm_pt_first_dfs - start a deep first search
>>>>>     *
>>>>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>     * Returns:
>>>>>     * 0 on success, errno otherwise.
>>>>>     */
>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>> -            struct amdgpu_vm *vm,
>>>>> -            uint64_t saddr, uint64_t size)
>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>> +                   struct amdgpu_vm *vm,
>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
>>>>>    {
>>>>> -    struct amdgpu_vm_pt_cursor cursor;
>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
>>>>> +    struct amdgpu_bo_param bp;
>>>>>        struct amdgpu_bo *pt;
>>>>> -    uint64_t eaddr;
>>>>>        int r;
>>>>>
>>>>> -    /* validate the parameters */
>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>>>>> AMDGPU_GPU_PAGE_MASK)
>>>>> -        return -EINVAL;
>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>>>> +        unsigned num_entries;
>>>>>
>>>>> -    eaddr = saddr + size - 1;
>>>>> -
>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>> -
>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
>>>>> 0x%08llX)\n",
>>>>> -            eaddr, adev->vm_manager.max_pfn);
>>>>> -        return -EINVAL;
>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
>>>>>> level);
>>>>> +        entry->entries = kvmalloc_array(num_entries,
>>>>> +                        sizeof(*entry->entries),
>>>>> +                        GFP_KERNEL | __GFP_ZERO);
>>>>> +        if (!entry->entries)
>>>>> +            return -ENOMEM;
>>>>>        }
>>>>>
>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
>>>>> -        struct amdgpu_bo_param bp;
>>>>> -
>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
>>>>> -            unsigned num_entries;
>>>>> -
>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
>>>>> cursor.level);
>>>>> -            entry->entries = kvmalloc_array(num_entries,
>>>>> -                            sizeof(*entry-
>>>>>> entries),
>>>>> -                            GFP_KERNEL |
>>>>> -                            __GFP_ZERO);
>>>>> -            if (!entry->entries)
>>>>> -                return -ENOMEM;
>>>>> -        }
>>>>> -
>>>>> -
>>>>> -        if (entry->base.bo)
>>>>> -            continue;
>>>>> -
>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>>>> -
>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
>>>>> -        if (r)
>>>>> -            return r;
>>>>> -
>>>>> -        if (vm->use_cpu_for_update) {
>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
>>>>> -            if (r)
>>>>> -                goto error_free_pt;
>>>>> -        }
>>>>> +    if (entry->base.bo)
>>>>> +        return 0;
>>>>>
>>>>> -        /* Keep a reference to the root directory to avoid
>>>>> -        * freeing them up in the wrong order.
>>>>> -        */
>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>>>
>>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
>>>>> +    if (r)
>>>>> +        return r;
>>>>>
>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>> +    if (vm->use_cpu_for_update) {
>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
>>>>>            if (r)
>>>>>                goto error_free_pt;
>>>>>        }
>>>>>
>>>>> +    /* Keep a reference to the root directory to avoid
>>>>> +     * freeing them up in the wrong order.
>>>>> +     */
>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>> +
>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>> +    if (r)
>>>>> +        goto error_free_pt;
>>>>> +
>>>>>        return 0;
>>>>>
>>>>>    error_free_pt:
>>>>> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
>>>>> amdgpu_pte_update_params *params,
>>>>>        struct amdgpu_vm_pt_cursor cursor;
>>>>>        uint64_t frag_start = start, frag_end;
>>>>>        unsigned int frag;
>>>>> +    int r;
>>>>>
>>>>>        /* figure out the initial fragment */
>>>>>        amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
>>>>> &frag_end); @@ -1634,12 +1571,15 @@ static int
>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>>>>        /* walk over the address space and update the PTs */
>>>>>        amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>>        while (cursor.pfn < end) {
>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>>>            unsigned shift, parent_shift, mask;
>>>>>            uint64_t incr, entry_end, pe_start;
>>>>> +        struct amdgpu_bo *pt;
>>>>>
>>>>> -        if (!pt)
>>>>> -            return -ENOENT;
>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>>> &cursor);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +
>>>>> +        pt = cursor.entry->base.bo;
>>>>>
>>>>>            /* The root level can't be a huge page */
>>>>>            if (cursor.level == adev->vm_manager.root_level) { diff 
>>>>> --git
>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> index 81ff8177f092..116605c038d2 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>>> int
>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
>>>>> amdgpu_vm *vm,
>>>>>                      int (*callback)(void *p, struct amdgpu_bo *bo),
>>>>>                      void *param);
>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>> -            struct amdgpu_vm *vm,
>>>>> -            uint64_t saddr, uint64_t size);
>>>>>    int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
>>>>> *job, bool need_pipe_sync);  int amdgpu_vm_update_directories(struct
>>>>> amdgpu_device *adev,
>>>>>                     struct amdgpu_vm *vm);
>>>>> -- 
>>>>> 2.17.1
>>>>>
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                             ` <6b1cfc95-fde8-e25f-121e-cbb03592ec3a-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-03-12 14:47                               ` Russell, Kent
       [not found]                                 ` <CY4PR12MB16224445596C88A29C3AE63D85490-rpdhrqHFk05g4+I42y6h/AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Russell, Kent @ 2019-03-12 14:47 UTC (permalink / raw)
  To: Koenig, Christian, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

The README.txt file inside the tests/kfdtest folder has instructions on how to do it if you don't have the libhsakmt package installed on your system:

export LIBHSAKMT_PATH=/*your local libhsakmt folder*/
With that, the headers and libraries are searched under
LIBHSAKMT_PATH/include and LIBHSAKMT_PATH/lib respectively.

So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one containing include, src, tests, etc), then that should cover it.

 Kent


> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, March 12, 2019 9:13 AM
> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
> <Felix.Kuehling@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> Hi guys,
> 
> so found a few minutes today to compile kfdtest.
> 
> Problem is that during the compile I get a lots of this:
> > CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
> > »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
> > /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
> > Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
> 
> Any idea?
> 
> Christian.
> 
> Am 11.03.19 um 17:55 schrieb Christian König:
> > Hi guys,
> >
> > well it's most likely some missing handling in the KFD, so I'm rather
> > reluctant to revert the change immediately.
> >
> > Problem is that I don't have time right now to look into it
> > immediately. So Kent can you continue to take a look?
> >
> > Sounds like its crashing immediately, so it should be something obvious.
> >
> > Christian.
> >
> > Am 11.03.19 um 10:49 schrieb Russell, Kent:
> >>  From what I've been able to dig through, the VM Fault seems to occur
> >> right after a doorbell mmap, but that's as far as I got. I can try to
> >> revert it in today's merge and see how things go.
> >>
> >>   Kent
> >>
> >>> -----Original Message-----
> >>> From: Kuehling, Felix
> >>> Sent: Friday, March 08, 2019 11:16 PM
> >>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
> >>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
> >>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>>
> >>> My concerns were related to eviction fence handing. It would
> >>> manifest by unnecessary eviction callbacks into KFD that aren't
> >>> cause by real evictions. I addressed that with a previous patch
> >>> series that removed the need to remove eviction fences and add them
> >>> back around page table updates in amdgpu_amdkfd_gpuvm.c.
> >>>
> >>> I don't know what's going on here. I can probably take a look on
> >>> Monday. I haven't considered what changed with respect to PD
> >>> updates.
> >>>
> >>> Kent, can we temporarily revert the offending change in
> >>> amd-kfd-staging just to unblock the merge?
> >>>
> >>> Christian, I think KFD is currently broken on amd-staging-drm-next.
> >>> If we're
> >>> serious about supporting KFD upstream, you may also want to consider
> >>> reverting your change there for now. Also consider building the
> >>> Thunk and kfdtest so you can do quick smoke tests locally whenever
> >>> you make amdgpu_vm changes that can affect KFD.
> >>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
> >>>
> >>> Regards,
> >>>    Felix
> >>>
> >>> -----Original Message-----
> >>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> >>> Christian König
> >>> Sent: Friday, March 08, 2019 9:14 AM
> >>> To: Russell, Kent <Kent.Russell@amd.com>;
> >>> amd-gfx@lists.freedesktop.org
> >>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>>
> >>> My best guess is that we forget somewhere to update the PDs. What
> >>> hardware is that on?
> >>>
> >>> Felix already mentioned that this could be problematic for the KFD.
> >>>
> >>> Maybe he has an idea,
> >>> Christian.
> >>>
> >>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
> >>>> Hi Christian,
> >>>>
> >>>> This patch ended up causing a VM Fault in KFDTest. Reverting just
> >>>> this
> >>> patch addressed the issue:
> >>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146
> >>>> 0x0000480c for
> >>> process  pid 0 thread  pid 0
> >>>> [   82.703512] amdgpu 0000:0c:00.0:
> >>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
> >>>> [   82.703516] amdgpu 0000:0c:00.0:
> >>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
> >>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid
> >>>> 32769) at
> >>> page 4096, read from 'TC0' (0x54433000) (72)
> >>>> [   82.703585] Evicting PASID 32769 queues
> >>>>
> >>>> I am looking into it, but if you have any insight that would be
> >>>> great in
> >>> helping to resolve it quickly.
> >>>>    Kent
> >>>>> -----Original Message-----
> >>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf
> Of
> >>>>> Christian König
> >>>>> Sent: Tuesday, February 26, 2019 7:47 AM
> >>>>> To: amd-gfx@lists.freedesktop.org
> >>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>>>>
> >>>>> Let's start to allocate VM PDs/PTs on demand instead of
> >>>>> pre-allocating them during mapping.
> >>>>>
> >>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >>>>> ---
> >>>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
> >>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
> >>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
> >>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136
> >>>>> +++++------------
> >>> -
> >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
> >>>>>    5 files changed, 39 insertions(+), 129 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>> index 31e3953dcb6e..088e9b6b765b 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct
> amdgpu_device
> >>>>> *adev, struct kgd_mem *mem,
> >>>>>        if (p_bo_va_entry)
> >>>>>            *p_bo_va_entry = bo_va_entry;
> >>>>>
> >>>>> -    /* Allocate new page tables if needed and validate
> >>>>> -     * them.
> >>>>> -     */
> >>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
> >>>>> -    if (ret) {
> >>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
> >>>>> -        goto err_alloc_pts;
> >>>>> -    }
> >>>>> -
> >>>>> +    /* Allocate validate page tables if needed */
> >>>>>        ret = vm_validate_pt_pd_bos(vm);
> >>>>>        if (ret) {
> >>>>>            pr_err("validate_pt_pd_bos() failed\n"); diff --git
> >>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>> index 7e22be7ca68a..54dd02a898b9 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct
> amdgpu_device
> >>>>> *adev, struct amdgpu_vm *vm,
> >>>>>            return -ENOMEM;
> >>>>>        }
> >>>>>
> >>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
> >>>>> -                size);
> >>>>> -    if (r) {
> >>>>> -        DRM_ERROR("failed to allocate pts for static CSA,
> >>>>> err=%d\n", r);
> >>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
> >>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
> >>>>> -        return r;
> >>>>> -    }
> >>>>> -
> >>>>>        r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
> >>>>>                     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
> >>>>>                     AMDGPU_PTE_EXECUTABLE); diff --git
> >>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>> index 555285e329ed..fcaaac30e84b 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
> >>> *dev,
> >>>>> void *data,
> >>>>>
> >>>>>        switch (args->operation) {
> >>>>>        case AMDGPU_VA_OP_MAP:
> >>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>>>>> va_address,
> >>>>> -                    args->map_size);
> >>>>> -        if (r)
> >>>>> -            goto error_backoff;
> >>>>> -
> >>>>>            va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
> >>>>>            r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
> >>>>>                         args->offset_in_bo, args->map_size, @@ -
> >>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> >>> void
> >>>>> *data,
> >>>>>                            args->map_size);
> >>>>>            break;
> >>>>>        case AMDGPU_VA_OP_REPLACE:
> >>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>>>>> va_address,
> >>>>> -                    args->map_size);
> >>>>> -        if (r)
> >>>>> -            goto error_backoff;
> >>>>> -
> >>>>>            va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
> >>>>>            r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
> >>>>>> va_address,
> >>>>>                             args->offset_in_bo, args-
> >>>>>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>> index 362436f4e856..dfad543fc000 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
> >>>>> amdgpu_device *adev,
> >>>>>        }
> >>>>>    }
> >>>>>
> >>>>> -/**
> >>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
> >>>>> - *
> >>>>> - * @adev: amdgpu_device pointer
> >>>>> - * @vm: amdgpu_vm structure
> >>>>> - * @start: start addr of the walk
> >>>>> - * @cursor: state to initialize
> >>>>> - *
> >>>>> - * Start a walk and go directly to the leaf node.
> >>>>> - */
> >>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
> >>>>> -                    struct amdgpu_vm *vm, uint64_t start,
> >>>>> -                    struct amdgpu_vm_pt_cursor *cursor) -{
> >>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
> >>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor)); -}
> >>>>> -
> >>>>> -/**
> >>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
> >>>>> - *
> >>>>> - * @adev: amdgpu_device pointer
> >>>>> - * @cursor: current state
> >>>>> - *
> >>>>> - * Walk the PD/PT tree to the next leaf node.
> >>>>> - */
> >>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
> >>>>> -                   struct amdgpu_vm_pt_cursor *cursor) -{
> >>>>> -    amdgpu_vm_pt_next(adev, cursor);
> >>>>> -    if (cursor->pfn != ~0ll)
> >>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor)); -}
> >>>>> -
> >>>>> -/**
> >>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
> >>>>> hierarchy
> >>>>> - */
> >>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
> >>>>>     \
> >>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start),
> >>>>> &(cursor));
> >>>>>         \
> >>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
> >>>>> &(cursor)))
> >>>>> -
> >>>>>    /**
> >>>>>     * amdgpu_vm_pt_first_dfs - start a deep first search
> >>>>>     *
> >>>>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
> >>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
> >>>>>     * Returns:
> >>>>>     * 0 on success, errno otherwise.
> >>>>>     */
> >>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>> -            struct amdgpu_vm *vm,
> >>>>> -            uint64_t saddr, uint64_t size)
> >>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>> +                   struct amdgpu_vm *vm,
> >>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
> >>>>>    {
> >>>>> -    struct amdgpu_vm_pt_cursor cursor;
> >>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
> >>>>> +    struct amdgpu_bo_param bp;
> >>>>>        struct amdgpu_bo *pt;
> >>>>> -    uint64_t eaddr;
> >>>>>        int r;
> >>>>>
> >>>>> -    /* validate the parameters */
> >>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
> >>>>> AMDGPU_GPU_PAGE_MASK)
> >>>>> -        return -EINVAL;
> >>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
> >>>>> +        unsigned num_entries;
> >>>>>
> >>>>> -    eaddr = saddr + size - 1;
> >>>>> -
> >>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
> >>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
> >>>>> -
> >>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
> >>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
> >>>>> 0x%08llX)\n",
> >>>>> -            eaddr, adev->vm_manager.max_pfn);
> >>>>> -        return -EINVAL;
> >>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
> >>>>>> level);
> >>>>> +        entry->entries = kvmalloc_array(num_entries,
> >>>>> +                        sizeof(*entry->entries),
> >>>>> +                        GFP_KERNEL | __GFP_ZERO);
> >>>>> +        if (!entry->entries)
> >>>>> +            return -ENOMEM;
> >>>>>        }
> >>>>>
> >>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
> >>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
> >>>>> -        struct amdgpu_bo_param bp;
> >>>>> -
> >>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
> >>>>> -            unsigned num_entries;
> >>>>> -
> >>>>> -            num_entries = amdgpu_vm_num_entries(adev,
> >>>>> cursor.level);
> >>>>> -            entry->entries = kvmalloc_array(num_entries,
> >>>>> -                            sizeof(*entry-
> >>>>>> entries),
> >>>>> -                            GFP_KERNEL |
> >>>>> -                            __GFP_ZERO);
> >>>>> -            if (!entry->entries)
> >>>>> -                return -ENOMEM;
> >>>>> -        }
> >>>>> -
> >>>>> -
> >>>>> -        if (entry->base.bo)
> >>>>> -            continue;
> >>>>> -
> >>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
> >>>>> -
> >>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
> >>>>> -        if (r)
> >>>>> -            return r;
> >>>>> -
> >>>>> -        if (vm->use_cpu_for_update) {
> >>>>> -            r = amdgpu_bo_kmap(pt, NULL);
> >>>>> -            if (r)
> >>>>> -                goto error_free_pt;
> >>>>> -        }
> >>>>> +    if (entry->base.bo)
> >>>>> +        return 0;
> >>>>>
> >>>>> -        /* Keep a reference to the root directory to avoid
> >>>>> -        * freeing them up in the wrong order.
> >>>>> -        */
> >>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
> >>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
> >>>>>
> >>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
> >>>>> +    if (r)
> >>>>> +        return r;
> >>>>>
> >>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
> >>>>> +    if (vm->use_cpu_for_update) {
> >>>>> +        r = amdgpu_bo_kmap(pt, NULL);
> >>>>>            if (r)
> >>>>>                goto error_free_pt;
> >>>>>        }
> >>>>>
> >>>>> +    /* Keep a reference to the root directory to avoid
> >>>>> +     * freeing them up in the wrong order.
> >>>>> +     */
> >>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
> >>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >>>>> +
> >>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
> >>>>> +    if (r)
> >>>>> +        goto error_free_pt;
> >>>>> +
> >>>>>        return 0;
> >>>>>
> >>>>>    error_free_pt:
> >>>>> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
> >>>>> amdgpu_pte_update_params *params,
> >>>>>        struct amdgpu_vm_pt_cursor cursor;
> >>>>>        uint64_t frag_start = start, frag_end;
> >>>>>        unsigned int frag;
> >>>>> +    int r;
> >>>>>
> >>>>>        /* figure out the initial fragment */
> >>>>>        amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
> >>>>> &frag_end); @@ -1634,12 +1571,15 @@ static int
> >>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
> *params,
> >>>>>        /* walk over the address space and update the PTs */
> >>>>>        amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
> >>>>>        while (cursor.pfn < end) {
> >>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
> >>>>>            unsigned shift, parent_shift, mask;
> >>>>>            uint64_t incr, entry_end, pe_start;
> >>>>> +        struct amdgpu_bo *pt;
> >>>>>
> >>>>> -        if (!pt)
> >>>>> -            return -ENOENT;
> >>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
> >>>>> &cursor);
> >>>>> +        if (r)
> >>>>> +            return r;
> >>>>> +
> >>>>> +        pt = cursor.entry->base.bo;
> >>>>>
> >>>>>            /* The root level can't be a huge page */
> >>>>>            if (cursor.level == adev->vm_manager.root_level) { diff
> >>>>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>> index 81ff8177f092..116605c038d2 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm
> *vm);
> >>> int
> >>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
> >>>>> amdgpu_vm *vm,
> >>>>>                      int (*callback)(void *p, struct amdgpu_bo
> >>>>> *bo),
> >>>>>                      void *param); -int amdgpu_vm_alloc_pts(struct
> >>>>> amdgpu_device *adev,
> >>>>> -            struct amdgpu_vm *vm,
> >>>>> -            uint64_t saddr, uint64_t size);
> >>>>>    int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
> >>>>> *job, bool need_pipe_sync);  int
> >>>>> amdgpu_vm_update_directories(struct
> >>>>> amdgpu_device *adev,
> >>>>>                     struct amdgpu_vm *vm);
> >>>>> --
> >>>>> 2.17.1
> >>>>>
> >>>>> _______________________________________________
> >>>>> amd-gfx mailing list
> >>>>> amd-gfx@lists.freedesktop.org
> >>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>> _______________________________________________
> >>> amd-gfx mailing list
> >>> amd-gfx@lists.freedesktop.org
> >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                                 ` <CY4PR12MB16224445596C88A29C3AE63D85490-rpdhrqHFk05g4+I42y6h/AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-03-12 14:49                                   ` Koenig, Christian
       [not found]                                     ` <5b125e82-e106-bc60-b8a2-37161aac4260-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Koenig, Christian @ 2019-03-12 14:49 UTC (permalink / raw)
  To: Russell, Kent, Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Yeah, the problem is I do have the libhsakmt installed.

Going to give it a try to specify the directory directly.

Christian.

Am 12.03.19 um 15:47 schrieb Russell, Kent:
> The README.txt file inside the tests/kfdtest folder has instructions on how to do it if you don't have the libhsakmt package installed on your system:
>
> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/
> With that, the headers and libraries are searched under
> LIBHSAKMT_PATH/include and LIBHSAKMT_PATH/lib respectively.
>
> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one containing include, src, tests, etc), then that should cover it.
>
>   Kent
>
>
>> -----Original Message-----
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>> Sent: Tuesday, March 12, 2019 9:13 AM
>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
>> <Felix.Kuehling@amd.com>; Koenig, Christian
>> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Hi guys,
>>
>> so found a few minutes today to compile kfdtest.
>>
>> Problem is that during the compile I get a lots of this:
>>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
>>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
>>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
>>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
>> Any idea?
>>
>> Christian.
>>
>> Am 11.03.19 um 17:55 schrieb Christian König:
>>> Hi guys,
>>>
>>> well it's most likely some missing handling in the KFD, so I'm rather
>>> reluctant to revert the change immediately.
>>>
>>> Problem is that I don't have time right now to look into it
>>> immediately. So Kent can you continue to take a look?
>>>
>>> Sounds like its crashing immediately, so it should be something obvious.
>>>
>>> Christian.
>>>
>>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>>>   From what I've been able to dig through, the VM Fault seems to occur
>>>> right after a doorbell mmap, but that's as far as I got. I can try to
>>>> revert it in today's merge and see how things go.
>>>>
>>>>    Kent
>>>>
>>>>> -----Original Message-----
>>>>> From: Kuehling, Felix
>>>>> Sent: Friday, March 08, 2019 11:16 PM
>>>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
>>>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>>
>>>>> My concerns were related to eviction fence handing. It would
>>>>> manifest by unnecessary eviction callbacks into KFD that aren't
>>>>> cause by real evictions. I addressed that with a previous patch
>>>>> series that removed the need to remove eviction fences and add them
>>>>> back around page table updates in amdgpu_amdkfd_gpuvm.c.
>>>>>
>>>>> I don't know what's going on here. I can probably take a look on
>>>>> Monday. I haven't considered what changed with respect to PD
>>>>> updates.
>>>>>
>>>>> Kent, can we temporarily revert the offending change in
>>>>> amd-kfd-staging just to unblock the merge?
>>>>>
>>>>> Christian, I think KFD is currently broken on amd-staging-drm-next.
>>>>> If we're
>>>>> serious about supporting KFD upstream, you may also want to consider
>>>>> reverting your change there for now. Also consider building the
>>>>> Thunk and kfdtest so you can do quick smoke tests locally whenever
>>>>> you make amdgpu_vm changes that can affect KFD.
>>>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>>>>
>>>>> Regards,
>>>>>     Felix
>>>>>
>>>>> -----Original Message-----
>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>>>> Christian König
>>>>> Sent: Friday, March 08, 2019 9:14 AM
>>>>> To: Russell, Kent <Kent.Russell@amd.com>;
>>>>> amd-gfx@lists.freedesktop.org
>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>>
>>>>> My best guess is that we forget somewhere to update the PDs. What
>>>>> hardware is that on?
>>>>>
>>>>> Felix already mentioned that this could be problematic for the KFD.
>>>>>
>>>>> Maybe he has an idea,
>>>>> Christian.
>>>>>
>>>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>>>>>> Hi Christian,
>>>>>>
>>>>>> This patch ended up causing a VM Fault in KFDTest. Reverting just
>>>>>> this
>>>>> patch addressed the issue:
>>>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146
>>>>>> 0x0000480c for
>>>>> process  pid 0 thread  pid 0
>>>>>> [   82.703512] amdgpu 0000:0c:00.0:
>>>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
>>>>>> [   82.703516] amdgpu 0000:0c:00.0:
>>>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>>>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid
>>>>>> 32769) at
>>>>> page 4096, read from 'TC0' (0x54433000) (72)
>>>>>> [   82.703585] Evicting PASID 32769 queues
>>>>>>
>>>>>> I am looking into it, but if you have any insight that would be
>>>>>> great in
>>>>> helping to resolve it quickly.
>>>>>>     Kent
>>>>>>> -----Original Message-----
>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf
>> Of
>>>>>>> Christian König
>>>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>>>>
>>>>>>> Let's start to allocate VM PDs/PTs on demand instead of
>>>>>>> pre-allocating them during mapping.
>>>>>>>
>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>>>> ---
>>>>>>>     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136
>>>>>>> +++++------------
>>>>> -
>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>>>>>>     5 files changed, 39 insertions(+), 129 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>> index 31e3953dcb6e..088e9b6b765b 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct
>> amdgpu_device
>>>>>>> *adev, struct kgd_mem *mem,
>>>>>>>         if (p_bo_va_entry)
>>>>>>>             *p_bo_va_entry = bo_va_entry;
>>>>>>>
>>>>>>> -    /* Allocate new page tables if needed and validate
>>>>>>> -     * them.
>>>>>>> -     */
>>>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>>>>>>> -    if (ret) {
>>>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
>>>>>>> -        goto err_alloc_pts;
>>>>>>> -    }
>>>>>>> -
>>>>>>> +    /* Allocate validate page tables if needed */
>>>>>>>         ret = vm_validate_pt_pd_bos(vm);
>>>>>>>         if (ret) {
>>>>>>>             pr_err("validate_pt_pd_bos() failed\n"); diff --git
>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>> index 7e22be7ca68a..54dd02a898b9 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct
>> amdgpu_device
>>>>>>> *adev, struct amdgpu_vm *vm,
>>>>>>>             return -ENOMEM;
>>>>>>>         }
>>>>>>>
>>>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>>>>>>> -                size);
>>>>>>> -    if (r) {
>>>>>>> -        DRM_ERROR("failed to allocate pts for static CSA,
>>>>>>> err=%d\n", r);
>>>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
>>>>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
>>>>>>> -        return r;
>>>>>>> -    }
>>>>>>> -
>>>>>>>         r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>>>>>                      AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
>>>>>>>                      AMDGPU_PTE_EXECUTABLE); diff --git
>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>> index 555285e329ed..fcaaac30e84b 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>>>> *dev,
>>>>>>> void *data,
>>>>>>>
>>>>>>>         switch (args->operation) {
>>>>>>>         case AMDGPU_VA_OP_MAP:
>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>> va_address,
>>>>>>> -                    args->map_size);
>>>>>>> -        if (r)
>>>>>>> -            goto error_backoff;
>>>>>>> -
>>>>>>>             va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>>>>             r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>>>>>                          args->offset_in_bo, args->map_size, @@ -
>>>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>>> void
>>>>>>> *data,
>>>>>>>                             args->map_size);
>>>>>>>             break;
>>>>>>>         case AMDGPU_VA_OP_REPLACE:
>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>> va_address,
>>>>>>> -                    args->map_size);
>>>>>>> -        if (r)
>>>>>>> -            goto error_backoff;
>>>>>>> -
>>>>>>>             va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>>>>             r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>>>>>>> va_address,
>>>>>>>                              args->offset_in_bo, args-
>>>>>>>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> index 362436f4e856..dfad543fc000 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>>>>>>> amdgpu_device *adev,
>>>>>>>         }
>>>>>>>     }
>>>>>>>
>>>>>>> -/**
>>>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>>>>>> - *
>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>> - * @vm: amdgpu_vm structure
>>>>>>> - * @start: start addr of the walk
>>>>>>> - * @cursor: state to initialize
>>>>>>> - *
>>>>>>> - * Start a walk and go directly to the leaf node.
>>>>>>> - */
>>>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>>>>>>> -                    struct amdgpu_vm *vm, uint64_t start,
>>>>>>> -                    struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
>>>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>> -
>>>>>>> -/**
>>>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>>>>>> - *
>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>> - * @cursor: current state
>>>>>>> - *
>>>>>>> - * Walk the PD/PT tree to the next leaf node.
>>>>>>> - */
>>>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>>>>>>> -                   struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>> -    amdgpu_vm_pt_next(adev, cursor);
>>>>>>> -    if (cursor->pfn != ~0ll)
>>>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>> -
>>>>>>> -/**
>>>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
>>>>>>> hierarchy
>>>>>>> - */
>>>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
>>>>>>>      \
>>>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start),
>>>>>>> &(cursor));
>>>>>>>          \
>>>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>>>>>>> &(cursor)))
>>>>>>> -
>>>>>>>     /**
>>>>>>>      * amdgpu_vm_pt_first_dfs - start a deep first search
>>>>>>>      *
>>>>>>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>>      * Returns:
>>>>>>>      * 0 on success, errno otherwise.
>>>>>>>      */
>>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>> -            uint64_t saddr, uint64_t size)
>>>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>> +                   struct amdgpu_vm *vm,
>>>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
>>>>>>>     {
>>>>>>> -    struct amdgpu_vm_pt_cursor cursor;
>>>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
>>>>>>> +    struct amdgpu_bo_param bp;
>>>>>>>         struct amdgpu_bo *pt;
>>>>>>> -    uint64_t eaddr;
>>>>>>>         int r;
>>>>>>>
>>>>>>> -    /* validate the parameters */
>>>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>>>>>>> AMDGPU_GPU_PAGE_MASK)
>>>>>>> -        return -EINVAL;
>>>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>>>>>> +        unsigned num_entries;
>>>>>>>
>>>>>>> -    eaddr = saddr + size - 1;
>>>>>>> -
>>>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>> -
>>>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
>>>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
>>>>>>> 0x%08llX)\n",
>>>>>>> -            eaddr, adev->vm_manager.max_pfn);
>>>>>>> -        return -EINVAL;
>>>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
>>>>>>>> level);
>>>>>>> +        entry->entries = kvmalloc_array(num_entries,
>>>>>>> +                        sizeof(*entry->entries),
>>>>>>> +                        GFP_KERNEL | __GFP_ZERO);
>>>>>>> +        if (!entry->entries)
>>>>>>> +            return -ENOMEM;
>>>>>>>         }
>>>>>>>
>>>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>>>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
>>>>>>> -        struct amdgpu_bo_param bp;
>>>>>>> -
>>>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
>>>>>>> -            unsigned num_entries;
>>>>>>> -
>>>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
>>>>>>> cursor.level);
>>>>>>> -            entry->entries = kvmalloc_array(num_entries,
>>>>>>> -                            sizeof(*entry-
>>>>>>>> entries),
>>>>>>> -                            GFP_KERNEL |
>>>>>>> -                            __GFP_ZERO);
>>>>>>> -            if (!entry->entries)
>>>>>>> -                return -ENOMEM;
>>>>>>> -        }
>>>>>>> -
>>>>>>> -
>>>>>>> -        if (entry->base.bo)
>>>>>>> -            continue;
>>>>>>> -
>>>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>>>>>> -
>>>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>> -        if (r)
>>>>>>> -            return r;
>>>>>>> -
>>>>>>> -        if (vm->use_cpu_for_update) {
>>>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
>>>>>>> -            if (r)
>>>>>>> -                goto error_free_pt;
>>>>>>> -        }
>>>>>>> +    if (entry->base.bo)
>>>>>>> +        return 0;
>>>>>>>
>>>>>>> -        /* Keep a reference to the root directory to avoid
>>>>>>> -        * freeing them up in the wrong order.
>>>>>>> -        */
>>>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>>>>>
>>>>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>> +    if (r)
>>>>>>> +        return r;
>>>>>>>
>>>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>> +    if (vm->use_cpu_for_update) {
>>>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
>>>>>>>             if (r)
>>>>>>>                 goto error_free_pt;
>>>>>>>         }
>>>>>>>
>>>>>>> +    /* Keep a reference to the root directory to avoid
>>>>>>> +     * freeing them up in the wrong order.
>>>>>>> +     */
>>>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>>>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>> +
>>>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>> +    if (r)
>>>>>>> +        goto error_free_pt;
>>>>>>> +
>>>>>>>         return 0;
>>>>>>>
>>>>>>>     error_free_pt:
>>>>>>> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
>>>>>>> amdgpu_pte_update_params *params,
>>>>>>>         struct amdgpu_vm_pt_cursor cursor;
>>>>>>>         uint64_t frag_start = start, frag_end;
>>>>>>>         unsigned int frag;
>>>>>>> +    int r;
>>>>>>>
>>>>>>>         /* figure out the initial fragment */
>>>>>>>         amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
>>>>>>> &frag_end); @@ -1634,12 +1571,15 @@ static int
>>>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
>> *params,
>>>>>>>         /* walk over the address space and update the PTs */
>>>>>>>         amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>>>>         while (cursor.pfn < end) {
>>>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>>>>>             unsigned shift, parent_shift, mask;
>>>>>>>             uint64_t incr, entry_end, pe_start;
>>>>>>> +        struct amdgpu_bo *pt;
>>>>>>>
>>>>>>> -        if (!pt)
>>>>>>> -            return -ENOENT;
>>>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>>>>> &cursor);
>>>>>>> +        if (r)
>>>>>>> +            return r;
>>>>>>> +
>>>>>>> +        pt = cursor.entry->base.bo;
>>>>>>>
>>>>>>>             /* The root level can't be a huge page */
>>>>>>>             if (cursor.level == adev->vm_manager.root_level) { diff
>>>>>>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>> index 81ff8177f092..116605c038d2 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm
>> *vm);
>>>>> int
>>>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
>>>>>>> amdgpu_vm *vm,
>>>>>>>                       int (*callback)(void *p, struct amdgpu_bo
>>>>>>> *bo),
>>>>>>>                       void *param); -int amdgpu_vm_alloc_pts(struct
>>>>>>> amdgpu_device *adev,
>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>> -            uint64_t saddr, uint64_t size);
>>>>>>>     int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
>>>>>>> *job, bool need_pipe_sync);  int
>>>>>>> amdgpu_vm_update_directories(struct
>>>>>>> amdgpu_device *adev,
>>>>>>>                      struct amdgpu_vm *vm);
>>>>>>> --
>>>>>>> 2.17.1
>>>>>>>
>>>>>>> _______________________________________________
>>>>>>> amd-gfx mailing list
>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                                     ` <5b125e82-e106-bc60-b8a2-37161aac4260-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-12 15:01                                       ` Russell, Kent
       [not found]                                         ` <CY4PR12MB162238C544AF46475778E18985490-rpdhrqHFk05g4+I42y6h/AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Russell, Kent @ 2019-03-12 15:01 UTC (permalink / raw)
  To: Koenig, Christian, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Oh right, I remember that issue. I had that happen to me once, where my installed libhsakmt didn't match up with the latest source code, so I ended up having to remove the libhsakmt package and pointing it to the folders instead. 

 Kent

> -----Original Message-----
> From: Koenig, Christian
> Sent: Tuesday, March 12, 2019 10:49 AM
> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
> <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> Yeah, the problem is I do have the libhsakmt installed.
> 
> Going to give it a try to specify the directory directly.
> 
> Christian.
> 
> Am 12.03.19 um 15:47 schrieb Russell, Kent:
> > The README.txt file inside the tests/kfdtest folder has instructions on how
> to do it if you don't have the libhsakmt package installed on your system:
> >
> > export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
> > headers and libraries are searched under LIBHSAKMT_PATH/include and
> > LIBHSAKMT_PATH/lib respectively.
> >
> > So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one
> containing include, src, tests, etc), then that should cover it.
> >
> >   Kent
> >
> >
> >> -----Original Message-----
> >> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >> Sent: Tuesday, March 12, 2019 9:13 AM
> >> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
> >> <Felix.Kuehling@amd.com>; Koenig, Christian
> >> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>
> >> Hi guys,
> >>
> >> so found a few minutes today to compile kfdtest.
> >>
> >> Problem is that during the compile I get a lots of this:
> >>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
> >>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
> >>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
> >>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
> >> Any idea?
> >>
> >> Christian.
> >>
> >> Am 11.03.19 um 17:55 schrieb Christian König:
> >>> Hi guys,
> >>>
> >>> well it's most likely some missing handling in the KFD, so I'm
> >>> rather reluctant to revert the change immediately.
> >>>
> >>> Problem is that I don't have time right now to look into it
> >>> immediately. So Kent can you continue to take a look?
> >>>
> >>> Sounds like its crashing immediately, so it should be something obvious.
> >>>
> >>> Christian.
> >>>
> >>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
> >>>>   From what I've been able to dig through, the VM Fault seems to
> >>>> occur right after a doorbell mmap, but that's as far as I got. I
> >>>> can try to revert it in today's merge and see how things go.
> >>>>
> >>>>    Kent
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: Kuehling, Felix
> >>>>> Sent: Friday, March 08, 2019 11:16 PM
> >>>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
> >>>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
> >>>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> demand
> >>>>>
> >>>>> My concerns were related to eviction fence handing. It would
> >>>>> manifest by unnecessary eviction callbacks into KFD that aren't
> >>>>> cause by real evictions. I addressed that with a previous patch
> >>>>> series that removed the need to remove eviction fences and add
> >>>>> them back around page table updates in amdgpu_amdkfd_gpuvm.c.
> >>>>>
> >>>>> I don't know what's going on here. I can probably take a look on
> >>>>> Monday. I haven't considered what changed with respect to PD
> >>>>> updates.
> >>>>>
> >>>>> Kent, can we temporarily revert the offending change in
> >>>>> amd-kfd-staging just to unblock the merge?
> >>>>>
> >>>>> Christian, I think KFD is currently broken on amd-staging-drm-next.
> >>>>> If we're
> >>>>> serious about supporting KFD upstream, you may also want to
> >>>>> consider reverting your change there for now. Also consider
> >>>>> building the Thunk and kfdtest so you can do quick smoke tests
> >>>>> locally whenever you make amdgpu_vm changes that can affect KFD.
> >>>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
> >>>>>
> >>>>> Regards,
> >>>>>     Felix
> >>>>>
> >>>>> -----Original Message-----
> >>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf
> Of
> >>>>> Christian König
> >>>>> Sent: Friday, March 08, 2019 9:14 AM
> >>>>> To: Russell, Kent <Kent.Russell@amd.com>;
> >>>>> amd-gfx@lists.freedesktop.org
> >>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> demand
> >>>>>
> >>>>> My best guess is that we forget somewhere to update the PDs. What
> >>>>> hardware is that on?
> >>>>>
> >>>>> Felix already mentioned that this could be problematic for the KFD.
> >>>>>
> >>>>> Maybe he has an idea,
> >>>>> Christian.
> >>>>>
> >>>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
> >>>>>> Hi Christian,
> >>>>>>
> >>>>>> This patch ended up causing a VM Fault in KFDTest. Reverting just
> >>>>>> this
> >>>>> patch addressed the issue:
> >>>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146
> >>>>>> 0x0000480c for
> >>>>> process  pid 0 thread  pid 0
> >>>>>> [   82.703512] amdgpu 0000:0c:00.0:
> >>>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
> >>>>>> [   82.703516] amdgpu 0000:0c:00.0:
> >>>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
> >>>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid
> >>>>>> 32769) at
> >>>>> page 4096, read from 'TC0' (0x54433000) (72)
> >>>>>> [   82.703585] Evicting PASID 32769 queues
> >>>>>>
> >>>>>> I am looking into it, but if you have any insight that would be
> >>>>>> great in
> >>>>> helping to resolve it quickly.
> >>>>>>     Kent
> >>>>>>> -----Original Message-----
> >>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
> Behalf
> >> Of
> >>>>>>> Christian König
> >>>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
> >>>>>>> To: amd-gfx@lists.freedesktop.org
> >>>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> demand
> >>>>>>>
> >>>>>>> Let's start to allocate VM PDs/PTs on demand instead of
> >>>>>>> pre-allocating them during mapping.
> >>>>>>>
> >>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >>>>>>> ---
> >>>>>>>     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
> >>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
> >>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
> >>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136
> >>>>>>> +++++------------
> >>>>> -
> >>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
> >>>>>>>     5 files changed, 39 insertions(+), 129 deletions(-)
> >>>>>>>
> >>>>>>> diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>> index 31e3953dcb6e..088e9b6b765b 100644
> >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct
> >> amdgpu_device
> >>>>>>> *adev, struct kgd_mem *mem,
> >>>>>>>         if (p_bo_va_entry)
> >>>>>>>             *p_bo_va_entry = bo_va_entry;
> >>>>>>>
> >>>>>>> -    /* Allocate new page tables if needed and validate
> >>>>>>> -     * them.
> >>>>>>> -     */
> >>>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va,
> >>>>>>> amdgpu_bo_size(bo));
> >>>>>>> -    if (ret) {
> >>>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
> >>>>>>> -        goto err_alloc_pts;
> >>>>>>> -    }
> >>>>>>> -
> >>>>>>> +    /* Allocate validate page tables if needed */
> >>>>>>>         ret = vm_validate_pt_pd_bos(vm);
> >>>>>>>         if (ret) {
> >>>>>>>             pr_err("validate_pt_pd_bos() failed\n"); diff --git
> >>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>> index 7e22be7ca68a..54dd02a898b9 100644
> >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct
> >> amdgpu_device
> >>>>>>> *adev, struct amdgpu_vm *vm,
> >>>>>>>             return -ENOMEM;
> >>>>>>>         }
> >>>>>>>
> >>>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
> >>>>>>> -                size);
> >>>>>>> -    if (r) {
> >>>>>>> -        DRM_ERROR("failed to allocate pts for static CSA,
> >>>>>>> err=%d\n", r);
> >>>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
> >>>>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
> >>>>>>> -        return r;
> >>>>>>> -    }
> >>>>>>> -
> >>>>>>>         r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
> >>>>>>>                      AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE
> >>>>>>> |
> >>>>>>>                      AMDGPU_PTE_EXECUTABLE); diff --git
> >>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>> index 555285e329ed..fcaaac30e84b 100644
> >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct
> drm_device
> >>>>> *dev,
> >>>>>>> void *data,
> >>>>>>>
> >>>>>>>         switch (args->operation) {
> >>>>>>>         case AMDGPU_VA_OP_MAP:
> >>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>>>>>>> va_address,
> >>>>>>> -                    args->map_size);
> >>>>>>> -        if (r)
> >>>>>>> -            goto error_backoff;
> >>>>>>> -
> >>>>>>>             va_flags = amdgpu_gmc_get_pte_flags(adev,
> >>>>>>> args->flags);
> >>>>>>>             r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
> >>>>>>>                          args->offset_in_bo, args->map_size, @@
> >>>>>>> -
> >>>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
> *dev,
> >>>>> void
> >>>>>>> *data,
> >>>>>>>                             args->map_size);
> >>>>>>>             break;
> >>>>>>>         case AMDGPU_VA_OP_REPLACE:
> >>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>>>>>>> va_address,
> >>>>>>> -                    args->map_size);
> >>>>>>> -        if (r)
> >>>>>>> -            goto error_backoff;
> >>>>>>> -
> >>>>>>>             va_flags = amdgpu_gmc_get_pte_flags(adev,
> >>>>>>> args->flags);
> >>>>>>>             r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
> >>>>>>>> va_address,
> >>>>>>>                              args->offset_in_bo, args-
> >>>>>>>> map_size, diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>> index 362436f4e856..dfad543fc000 100644
> >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
> >>>>>>> amdgpu_device *adev,
> >>>>>>>         }
> >>>>>>>     }
> >>>>>>>
> >>>>>>> -/**
> >>>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
> >>>>>>> - *
> >>>>>>> - * @adev: amdgpu_device pointer
> >>>>>>> - * @vm: amdgpu_vm structure
> >>>>>>> - * @start: start addr of the walk
> >>>>>>> - * @cursor: state to initialize
> >>>>>>> - *
> >>>>>>> - * Start a walk and go directly to the leaf node.
> >>>>>>> - */
> >>>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device
> *adev,
> >>>>>>> -                    struct amdgpu_vm *vm, uint64_t start,
> >>>>>>> -                    struct amdgpu_vm_pt_cursor *cursor) -{
> >>>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
> >>>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor)); -}
> >>>>>>> -
> >>>>>>> -/**
> >>>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
> >>>>>>> - *
> >>>>>>> - * @adev: amdgpu_device pointer
> >>>>>>> - * @cursor: current state
> >>>>>>> - *
> >>>>>>> - * Walk the PD/PT tree to the next leaf node.
> >>>>>>> - */
> >>>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device
> *adev,
> >>>>>>> -                   struct amdgpu_vm_pt_cursor *cursor) -{
> >>>>>>> -    amdgpu_vm_pt_next(adev, cursor);
> >>>>>>> -    if (cursor->pfn != ~0ll)
> >>>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor)); -}
> >>>>>>> -
> >>>>>>> -/**
> >>>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in
> >>>>>>> the hierarchy
> >>>>>>> - */
> >>>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end,
> >>>>>>> cursor)
> >>>>>>>      \
> >>>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start),
> >>>>>>> &(cursor));
> >>>>>>>          \
> >>>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
> >>>>>>> &(cursor)))
> >>>>>>> -
> >>>>>>>     /**
> >>>>>>>      * amdgpu_vm_pt_first_dfs - start a deep first search
> >>>>>>>      *
> >>>>>>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
> >>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
> >>>>>>>      * Returns:
> >>>>>>>      * 0 on success, errno otherwise.
> >>>>>>>      */
> >>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>>>> -            struct amdgpu_vm *vm,
> >>>>>>> -            uint64_t saddr, uint64_t size)
> >>>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>>>> +                   struct amdgpu_vm *vm,
> >>>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
> >>>>>>>     {
> >>>>>>> -    struct amdgpu_vm_pt_cursor cursor;
> >>>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
> >>>>>>> +    struct amdgpu_bo_param bp;
> >>>>>>>         struct amdgpu_bo *pt;
> >>>>>>> -    uint64_t eaddr;
> >>>>>>>         int r;
> >>>>>>>
> >>>>>>> -    /* validate the parameters */
> >>>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
> >>>>>>> AMDGPU_GPU_PAGE_MASK)
> >>>>>>> -        return -EINVAL;
> >>>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
> >>>>>>> +        unsigned num_entries;
> >>>>>>>
> >>>>>>> -    eaddr = saddr + size - 1;
> >>>>>>> -
> >>>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
> >>>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
> >>>>>>> -
> >>>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
> >>>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
> >>>>>>> 0x%08llX)\n",
> >>>>>>> -            eaddr, adev->vm_manager.max_pfn);
> >>>>>>> -        return -EINVAL;
> >>>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
> >>>>>>>> level);
> >>>>>>> +        entry->entries = kvmalloc_array(num_entries,
> >>>>>>> +                        sizeof(*entry->entries),
> >>>>>>> +                        GFP_KERNEL | __GFP_ZERO);
> >>>>>>> +        if (!entry->entries)
> >>>>>>> +            return -ENOMEM;
> >>>>>>>         }
> >>>>>>>
> >>>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor)
> >>>>>>> {
> >>>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
> >>>>>>> -        struct amdgpu_bo_param bp;
> >>>>>>> -
> >>>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
> >>>>>>> -            unsigned num_entries;
> >>>>>>> -
> >>>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
> >>>>>>> cursor.level);
> >>>>>>> -            entry->entries = kvmalloc_array(num_entries,
> >>>>>>> -                            sizeof(*entry-
> >>>>>>>> entries),
> >>>>>>> -                            GFP_KERNEL |
> >>>>>>> -                            __GFP_ZERO);
> >>>>>>> -            if (!entry->entries)
> >>>>>>> -                return -ENOMEM;
> >>>>>>> -        }
> >>>>>>> -
> >>>>>>> -
> >>>>>>> -        if (entry->base.bo)
> >>>>>>> -            continue;
> >>>>>>> -
> >>>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
> >>>>>>> -
> >>>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
> >>>>>>> -        if (r)
> >>>>>>> -            return r;
> >>>>>>> -
> >>>>>>> -        if (vm->use_cpu_for_update) {
> >>>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
> >>>>>>> -            if (r)
> >>>>>>> -                goto error_free_pt;
> >>>>>>> -        }
> >>>>>>> +    if (entry->base.bo)
> >>>>>>> +        return 0;
> >>>>>>>
> >>>>>>> -        /* Keep a reference to the root directory to avoid
> >>>>>>> -        * freeing them up in the wrong order.
> >>>>>>> -        */
> >>>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
> >>>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
> >>>>>>>
> >>>>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >>>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
> >>>>>>> +    if (r)
> >>>>>>> +        return r;
> >>>>>>>
> >>>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
> >>>>>>> +    if (vm->use_cpu_for_update) {
> >>>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
> >>>>>>>             if (r)
> >>>>>>>                 goto error_free_pt;
> >>>>>>>         }
> >>>>>>>
> >>>>>>> +    /* Keep a reference to the root directory to avoid
> >>>>>>> +     * freeing them up in the wrong order.
> >>>>>>> +     */
> >>>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
> >>>>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >>>>>>> +
> >>>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
> >>>>>>> +    if (r)
> >>>>>>> +        goto error_free_pt;
> >>>>>>> +
> >>>>>>>         return 0;
> >>>>>>>
> >>>>>>>     error_free_pt:
> >>>>>>> @@ -1627,6 +1563,7 @@ static int
> amdgpu_vm_update_ptes(struct
> >>>>>>> amdgpu_pte_update_params *params,
> >>>>>>>         struct amdgpu_vm_pt_cursor cursor;
> >>>>>>>         uint64_t frag_start = start, frag_end;
> >>>>>>>         unsigned int frag;
> >>>>>>> +    int r;
> >>>>>>>
> >>>>>>>         /* figure out the initial fragment */
> >>>>>>>         amdgpu_vm_fragment(params, frag_start, end, flags,
> >>>>>>> &frag, &frag_end); @@ -1634,12 +1571,15 @@ static int
> >>>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
> >> *params,
> >>>>>>>         /* walk over the address space and update the PTs */
> >>>>>>>         amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
> >>>>>>>         while (cursor.pfn < end) {
> >>>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
> >>>>>>>             unsigned shift, parent_shift, mask;
> >>>>>>>             uint64_t incr, entry_end, pe_start;
> >>>>>>> +        struct amdgpu_bo *pt;
> >>>>>>>
> >>>>>>> -        if (!pt)
> >>>>>>> -            return -ENOENT;
> >>>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
> >>>>>>> &cursor);
> >>>>>>> +        if (r)
> >>>>>>> +            return r;
> >>>>>>> +
> >>>>>>> +        pt = cursor.entry->base.bo;
> >>>>>>>
> >>>>>>>             /* The root level can't be a huge page */
> >>>>>>>             if (cursor.level == adev->vm_manager.root_level) {
> >>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>> index 81ff8177f092..116605c038d2 100644
> >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm
> >> *vm);
> >>>>> int
> >>>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
> >>>>>>> amdgpu_vm *vm,
> >>>>>>>                       int (*callback)(void *p, struct amdgpu_bo
> >>>>>>> *bo),
> >>>>>>>                       void *param); -int
> >>>>>>> amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>>>> -            struct amdgpu_vm *vm,
> >>>>>>> -            uint64_t saddr, uint64_t size);
> >>>>>>>     int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
> >>>>>>> amdgpu_job *job, bool need_pipe_sync);  int
> >>>>>>> amdgpu_vm_update_directories(struct
> >>>>>>> amdgpu_device *adev,
> >>>>>>>                      struct amdgpu_vm *vm);
> >>>>>>> --
> >>>>>>> 2.17.1
> >>>>>>>
> >>>>>>> _______________________________________________
> >>>>>>> amd-gfx mailing list
> >>>>>>> amd-gfx@lists.freedesktop.org
> >>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>>> _______________________________________________
> >>>>> amd-gfx mailing list
> >>>>> amd-gfx@lists.freedesktop.org
> >>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>> _______________________________________________
> >>>> amd-gfx mailing list
> >>>> amd-gfx@lists.freedesktop.org
> >>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                                         ` <CY4PR12MB162238C544AF46475778E18985490-rpdhrqHFk05g4+I42y6h/AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-03-12 15:09                                           ` Christian König
       [not found]                                             ` <307878c8-73eb-598f-f40a-7cf285b0b60b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Christian König @ 2019-03-12 15:09 UTC (permalink / raw)
  To: Russell, Kent, Koenig, Christian, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Yeah, same problem here.

I removed libhsakmt package and installed it manually and now it seems 
to work.

Doing some testing now, but at least of hand I can't seem to reproduce 
the VM fault on a Vega10.

Christian.

Am 12.03.19 um 16:01 schrieb Russell, Kent:
> Oh right, I remember that issue. I had that happen to me once, where my installed libhsakmt didn't match up with the latest source code, so I ended up having to remove the libhsakmt package and pointing it to the folders instead.
>
>   Kent
>
>> -----Original Message-----
>> From: Koenig, Christian
>> Sent: Tuesday, March 12, 2019 10:49 AM
>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
>> <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Yeah, the problem is I do have the libhsakmt installed.
>>
>> Going to give it a try to specify the directory directly.
>>
>> Christian.
>>
>> Am 12.03.19 um 15:47 schrieb Russell, Kent:
>>> The README.txt file inside the tests/kfdtest folder has instructions on how
>> to do it if you don't have the libhsakmt package installed on your system:
>>> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
>>> headers and libraries are searched under LIBHSAKMT_PATH/include and
>>> LIBHSAKMT_PATH/lib respectively.
>>>
>>> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one
>> containing include, src, tests, etc), then that should cover it.
>>>    Kent
>>>
>>>
>>>> -----Original Message-----
>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>> Sent: Tuesday, March 12, 2019 9:13 AM
>>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
>>>> <Felix.Kuehling@amd.com>; Koenig, Christian
>>>> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>
>>>> Hi guys,
>>>>
>>>> so found a few minutes today to compile kfdtest.
>>>>
>>>> Problem is that during the compile I get a lots of this:
>>>>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
>>>>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
>>>>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
>>>>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
>>>> Any idea?
>>>>
>>>> Christian.
>>>>
>>>> Am 11.03.19 um 17:55 schrieb Christian König:
>>>>> Hi guys,
>>>>>
>>>>> well it's most likely some missing handling in the KFD, so I'm
>>>>> rather reluctant to revert the change immediately.
>>>>>
>>>>> Problem is that I don't have time right now to look into it
>>>>> immediately. So Kent can you continue to take a look?
>>>>>
>>>>> Sounds like its crashing immediately, so it should be something obvious.
>>>>>
>>>>> Christian.
>>>>>
>>>>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>>>>>    From what I've been able to dig through, the VM Fault seems to
>>>>>> occur right after a doorbell mmap, but that's as far as I got. I
>>>>>> can try to revert it in today's merge and see how things go.
>>>>>>
>>>>>>     Kent
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: Kuehling, Felix
>>>>>>> Sent: Friday, March 08, 2019 11:16 PM
>>>>>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
>>>>>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>> demand
>>>>>>> My concerns were related to eviction fence handing. It would
>>>>>>> manifest by unnecessary eviction callbacks into KFD that aren't
>>>>>>> cause by real evictions. I addressed that with a previous patch
>>>>>>> series that removed the need to remove eviction fences and add
>>>>>>> them back around page table updates in amdgpu_amdkfd_gpuvm.c.
>>>>>>>
>>>>>>> I don't know what's going on here. I can probably take a look on
>>>>>>> Monday. I haven't considered what changed with respect to PD
>>>>>>> updates.
>>>>>>>
>>>>>>> Kent, can we temporarily revert the offending change in
>>>>>>> amd-kfd-staging just to unblock the merge?
>>>>>>>
>>>>>>> Christian, I think KFD is currently broken on amd-staging-drm-next.
>>>>>>> If we're
>>>>>>> serious about supporting KFD upstream, you may also want to
>>>>>>> consider reverting your change there for now. Also consider
>>>>>>> building the Thunk and kfdtest so you can do quick smoke tests
>>>>>>> locally whenever you make amdgpu_vm changes that can affect KFD.
>>>>>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>>>>>>
>>>>>>> Regards,
>>>>>>>      Felix
>>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf
>> Of
>>>>>>> Christian König
>>>>>>> Sent: Friday, March 08, 2019 9:14 AM
>>>>>>> To: Russell, Kent <Kent.Russell@amd.com>;
>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>> demand
>>>>>>> My best guess is that we forget somewhere to update the PDs. What
>>>>>>> hardware is that on?
>>>>>>>
>>>>>>> Felix already mentioned that this could be problematic for the KFD.
>>>>>>>
>>>>>>> Maybe he has an idea,
>>>>>>> Christian.
>>>>>>>
>>>>>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>>>>>>>> Hi Christian,
>>>>>>>>
>>>>>>>> This patch ended up causing a VM Fault in KFDTest. Reverting just
>>>>>>>> this
>>>>>>> patch addressed the issue:
>>>>>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146
>>>>>>>> 0x0000480c for
>>>>>>> process  pid 0 thread  pid 0
>>>>>>>> [   82.703512] amdgpu 0000:0c:00.0:
>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
>>>>>>>> [   82.703516] amdgpu 0000:0c:00.0:
>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>>>>>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid
>>>>>>>> 32769) at
>>>>>>> page 4096, read from 'TC0' (0x54433000) (72)
>>>>>>>> [   82.703585] Evicting PASID 32769 queues
>>>>>>>>
>>>>>>>> I am looking into it, but if you have any insight that would be
>>>>>>>> great in
>>>>>>> helping to resolve it quickly.
>>>>>>>>      Kent
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
>> Behalf
>>>> Of
>>>>>>>>> Christian König
>>>>>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>>>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>> demand
>>>>>>>>> Let's start to allocate VM PDs/PTs on demand instead of
>>>>>>>>> pre-allocating them during mapping.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>>>>>> ---
>>>>>>>>>      .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>>>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136
>>>>>>>>> +++++------------
>>>>>>> -
>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>>>>>>>>      5 files changed, 39 insertions(+), 129 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>> index 31e3953dcb6e..088e9b6b765b 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct
>>>> amdgpu_device
>>>>>>>>> *adev, struct kgd_mem *mem,
>>>>>>>>>          if (p_bo_va_entry)
>>>>>>>>>              *p_bo_va_entry = bo_va_entry;
>>>>>>>>>
>>>>>>>>> -    /* Allocate new page tables if needed and validate
>>>>>>>>> -     * them.
>>>>>>>>> -     */
>>>>>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va,
>>>>>>>>> amdgpu_bo_size(bo));
>>>>>>>>> -    if (ret) {
>>>>>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
>>>>>>>>> -        goto err_alloc_pts;
>>>>>>>>> -    }
>>>>>>>>> -
>>>>>>>>> +    /* Allocate validate page tables if needed */
>>>>>>>>>          ret = vm_validate_pt_pd_bos(vm);
>>>>>>>>>          if (ret) {
>>>>>>>>>              pr_err("validate_pt_pd_bos() failed\n"); diff --git
>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>> index 7e22be7ca68a..54dd02a898b9 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct
>>>> amdgpu_device
>>>>>>>>> *adev, struct amdgpu_vm *vm,
>>>>>>>>>              return -ENOMEM;
>>>>>>>>>          }
>>>>>>>>>
>>>>>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>>>>>>>>> -                size);
>>>>>>>>> -    if (r) {
>>>>>>>>> -        DRM_ERROR("failed to allocate pts for static CSA,
>>>>>>>>> err=%d\n", r);
>>>>>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
>>>>>>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
>>>>>>>>> -        return r;
>>>>>>>>> -    }
>>>>>>>>> -
>>>>>>>>>          r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>>>>>>>                       AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE
>>>>>>>>> |
>>>>>>>>>                       AMDGPU_PTE_EXECUTABLE); diff --git
>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>> index 555285e329ed..fcaaac30e84b 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct
>> drm_device
>>>>>>> *dev,
>>>>>>>>> void *data,
>>>>>>>>>
>>>>>>>>>          switch (args->operation) {
>>>>>>>>>          case AMDGPU_VA_OP_MAP:
>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>>>> va_address,
>>>>>>>>> -                    args->map_size);
>>>>>>>>> -        if (r)
>>>>>>>>> -            goto error_backoff;
>>>>>>>>> -
>>>>>>>>>              va_flags = amdgpu_gmc_get_pte_flags(adev,
>>>>>>>>> args->flags);
>>>>>>>>>              r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>>>>>>>                           args->offset_in_bo, args->map_size, @@
>>>>>>>>> -
>>>>>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
>> *dev,
>>>>>>> void
>>>>>>>>> *data,
>>>>>>>>>                              args->map_size);
>>>>>>>>>              break;
>>>>>>>>>          case AMDGPU_VA_OP_REPLACE:
>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>>>> va_address,
>>>>>>>>> -                    args->map_size);
>>>>>>>>> -        if (r)
>>>>>>>>> -            goto error_backoff;
>>>>>>>>> -
>>>>>>>>>              va_flags = amdgpu_gmc_get_pte_flags(adev,
>>>>>>>>> args->flags);
>>>>>>>>>              r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>>>>>>>>> va_address,
>>>>>>>>>                               args->offset_in_bo, args-
>>>>>>>>>> map_size, diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>> index 362436f4e856..dfad543fc000 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>>>>>>>>> amdgpu_device *adev,
>>>>>>>>>          }
>>>>>>>>>      }
>>>>>>>>>
>>>>>>>>> -/**
>>>>>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>>>>>>>> - *
>>>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>>>> - * @vm: amdgpu_vm structure
>>>>>>>>> - * @start: start addr of the walk
>>>>>>>>> - * @cursor: state to initialize
>>>>>>>>> - *
>>>>>>>>> - * Start a walk and go directly to the leaf node.
>>>>>>>>> - */
>>>>>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device
>> *adev,
>>>>>>>>> -                    struct amdgpu_vm *vm, uint64_t start,
>>>>>>>>> -                    struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
>>>>>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>>>> -
>>>>>>>>> -/**
>>>>>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>>>>>>>> - *
>>>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>>>> - * @cursor: current state
>>>>>>>>> - *
>>>>>>>>> - * Walk the PD/PT tree to the next leaf node.
>>>>>>>>> - */
>>>>>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device
>> *adev,
>>>>>>>>> -                   struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>>>> -    amdgpu_vm_pt_next(adev, cursor);
>>>>>>>>> -    if (cursor->pfn != ~0ll)
>>>>>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>>>> -
>>>>>>>>> -/**
>>>>>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in
>>>>>>>>> the hierarchy
>>>>>>>>> - */
>>>>>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end,
>>>>>>>>> cursor)
>>>>>>>>>       \
>>>>>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start),
>>>>>>>>> &(cursor));
>>>>>>>>>           \
>>>>>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>>>>>>>>> &(cursor)))
>>>>>>>>> -
>>>>>>>>>      /**
>>>>>>>>>       * amdgpu_vm_pt_first_dfs - start a deep first search
>>>>>>>>>       *
>>>>>>>>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
>>>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>>>>       * Returns:
>>>>>>>>>       * 0 on success, errno otherwise.
>>>>>>>>>       */
>>>>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>>>> -            uint64_t saddr, uint64_t size)
>>>>>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>> +                   struct amdgpu_vm *vm,
>>>>>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
>>>>>>>>>      {
>>>>>>>>> -    struct amdgpu_vm_pt_cursor cursor;
>>>>>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
>>>>>>>>> +    struct amdgpu_bo_param bp;
>>>>>>>>>          struct amdgpu_bo *pt;
>>>>>>>>> -    uint64_t eaddr;
>>>>>>>>>          int r;
>>>>>>>>>
>>>>>>>>> -    /* validate the parameters */
>>>>>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>>>>>>>>> AMDGPU_GPU_PAGE_MASK)
>>>>>>>>> -        return -EINVAL;
>>>>>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>>>>>>>> +        unsigned num_entries;
>>>>>>>>>
>>>>>>>>> -    eaddr = saddr + size - 1;
>>>>>>>>> -
>>>>>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>>>> -
>>>>>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
>>>>>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
>>>>>>>>> 0x%08llX)\n",
>>>>>>>>> -            eaddr, adev->vm_manager.max_pfn);
>>>>>>>>> -        return -EINVAL;
>>>>>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
>>>>>>>>>> level);
>>>>>>>>> +        entry->entries = kvmalloc_array(num_entries,
>>>>>>>>> +                        sizeof(*entry->entries),
>>>>>>>>> +                        GFP_KERNEL | __GFP_ZERO);
>>>>>>>>> +        if (!entry->entries)
>>>>>>>>> +            return -ENOMEM;
>>>>>>>>>          }
>>>>>>>>>
>>>>>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor)
>>>>>>>>> {
>>>>>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
>>>>>>>>> -        struct amdgpu_bo_param bp;
>>>>>>>>> -
>>>>>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
>>>>>>>>> -            unsigned num_entries;
>>>>>>>>> -
>>>>>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
>>>>>>>>> cursor.level);
>>>>>>>>> -            entry->entries = kvmalloc_array(num_entries,
>>>>>>>>> -                            sizeof(*entry-
>>>>>>>>>> entries),
>>>>>>>>> -                            GFP_KERNEL |
>>>>>>>>> -                            __GFP_ZERO);
>>>>>>>>> -            if (!entry->entries)
>>>>>>>>> -                return -ENOMEM;
>>>>>>>>> -        }
>>>>>>>>> -
>>>>>>>>> -
>>>>>>>>> -        if (entry->base.bo)
>>>>>>>>> -            continue;
>>>>>>>>> -
>>>>>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>>>>>>>> -
>>>>>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>>>> -        if (r)
>>>>>>>>> -            return r;
>>>>>>>>> -
>>>>>>>>> -        if (vm->use_cpu_for_update) {
>>>>>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
>>>>>>>>> -            if (r)
>>>>>>>>> -                goto error_free_pt;
>>>>>>>>> -        }
>>>>>>>>> +    if (entry->base.bo)
>>>>>>>>> +        return 0;
>>>>>>>>>
>>>>>>>>> -        /* Keep a reference to the root directory to avoid
>>>>>>>>> -        * freeing them up in the wrong order.
>>>>>>>>> -        */
>>>>>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>>>>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>>>>>>>
>>>>>>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>>>> +    if (r)
>>>>>>>>> +        return r;
>>>>>>>>>
>>>>>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>>>> +    if (vm->use_cpu_for_update) {
>>>>>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
>>>>>>>>>              if (r)
>>>>>>>>>                  goto error_free_pt;
>>>>>>>>>          }
>>>>>>>>>
>>>>>>>>> +    /* Keep a reference to the root directory to avoid
>>>>>>>>> +     * freeing them up in the wrong order.
>>>>>>>>> +     */
>>>>>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>>>>>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>>>> +
>>>>>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>>>> +    if (r)
>>>>>>>>> +        goto error_free_pt;
>>>>>>>>> +
>>>>>>>>>          return 0;
>>>>>>>>>
>>>>>>>>>      error_free_pt:
>>>>>>>>> @@ -1627,6 +1563,7 @@ static int
>> amdgpu_vm_update_ptes(struct
>>>>>>>>> amdgpu_pte_update_params *params,
>>>>>>>>>          struct amdgpu_vm_pt_cursor cursor;
>>>>>>>>>          uint64_t frag_start = start, frag_end;
>>>>>>>>>          unsigned int frag;
>>>>>>>>> +    int r;
>>>>>>>>>
>>>>>>>>>          /* figure out the initial fragment */
>>>>>>>>>          amdgpu_vm_fragment(params, frag_start, end, flags,
>>>>>>>>> &frag, &frag_end); @@ -1634,12 +1571,15 @@ static int
>>>>>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
>>>> *params,
>>>>>>>>>          /* walk over the address space and update the PTs */
>>>>>>>>>          amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>>>>>>          while (cursor.pfn < end) {
>>>>>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>>>>>>>              unsigned shift, parent_shift, mask;
>>>>>>>>>              uint64_t incr, entry_end, pe_start;
>>>>>>>>> +        struct amdgpu_bo *pt;
>>>>>>>>>
>>>>>>>>> -        if (!pt)
>>>>>>>>> -            return -ENOENT;
>>>>>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>>>>>>> &cursor);
>>>>>>>>> +        if (r)
>>>>>>>>> +            return r;
>>>>>>>>> +
>>>>>>>>> +        pt = cursor.entry->base.bo;
>>>>>>>>>
>>>>>>>>>              /* The root level can't be a huge page */
>>>>>>>>>              if (cursor.level == adev->vm_manager.root_level) {
>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>> index 81ff8177f092..116605c038d2 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm
>>>> *vm);
>>>>>>> int
>>>>>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
>>>>>>>>> amdgpu_vm *vm,
>>>>>>>>>                        int (*callback)(void *p, struct amdgpu_bo
>>>>>>>>> *bo),
>>>>>>>>>                        void *param); -int
>>>>>>>>> amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>>>> -            uint64_t saddr, uint64_t size);
>>>>>>>>>      int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
>>>>>>>>> amdgpu_job *job, bool need_pipe_sync);  int
>>>>>>>>> amdgpu_vm_update_directories(struct
>>>>>>>>> amdgpu_device *adev,
>>>>>>>>>                       struct amdgpu_vm *vm);
>>>>>>>>> --
>>>>>>>>> 2.17.1
>>>>>>>>>
>>>>>>>>> _______________________________________________
>>>>>>>>> amd-gfx mailing list
>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>> _______________________________________________
>>>>>>> amd-gfx mailing list
>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>> _______________________________________________
>>>>>> amd-gfx mailing list
>>>>>> amd-gfx@lists.freedesktop.org
>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                                             ` <307878c8-73eb-598f-f40a-7cf285b0b60b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-03-12 15:12                                               ` Russell, Kent
       [not found]                                                 ` <CY4PR12MB1622A73D027D2AC53AF2A2B985490-rpdhrqHFk05g4+I42y6h/AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Russell, Kent @ 2019-03-12 15:12 UTC (permalink / raw)
  To: Koenig, Christian, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Peculiar, I hit it immediately when I ran it . Can you try use --gtest_filter=KFDCWSRTest.BasicTest  . That one hung every time for me.

 Kent

> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, March 12, 2019 11:09 AM
> To: Russell, Kent <Kent.Russell@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>;
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> Yeah, same problem here.
> 
> I removed libhsakmt package and installed it manually and now it seems to
> work.
> 
> Doing some testing now, but at least of hand I can't seem to reproduce the
> VM fault on a Vega10.
> 
> Christian.
> 
> Am 12.03.19 um 16:01 schrieb Russell, Kent:
> > Oh right, I remember that issue. I had that happen to me once, where my
> installed libhsakmt didn't match up with the latest source code, so I ended up
> having to remove the libhsakmt package and pointing it to the folders
> instead.
> >
> >   Kent
> >
> >> -----Original Message-----
> >> From: Koenig, Christian
> >> Sent: Tuesday, March 12, 2019 10:49 AM
> >> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
> >> <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> >>
> >> Yeah, the problem is I do have the libhsakmt installed.
> >>
> >> Going to give it a try to specify the directory directly.
> >>
> >> Christian.
> >>
> >> Am 12.03.19 um 15:47 schrieb Russell, Kent:
> >>> The README.txt file inside the tests/kfdtest folder has instructions
> >>> on how
> >> to do it if you don't have the libhsakmt package installed on your system:
> >>> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
> >>> headers and libraries are searched under LIBHSAKMT_PATH/include and
> >>> LIBHSAKMT_PATH/lib respectively.
> >>>
> >>> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one
> >> containing include, src, tests, etc), then that should cover it.
> >>>    Kent
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >>>> Sent: Tuesday, March 12, 2019 9:13 AM
> >>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
> >>>> <Felix.Kuehling@amd.com>; Koenig, Christian
> >>>> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
> >>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> demand
> >>>>
> >>>> Hi guys,
> >>>>
> >>>> so found a few minutes today to compile kfdtest.
> >>>>
> >>>> Problem is that during the compile I get a lots of this:
> >>>>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
> >>>>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
> >>>>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
> >>>>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
> >>>> Any idea?
> >>>>
> >>>> Christian.
> >>>>
> >>>> Am 11.03.19 um 17:55 schrieb Christian König:
> >>>>> Hi guys,
> >>>>>
> >>>>> well it's most likely some missing handling in the KFD, so I'm
> >>>>> rather reluctant to revert the change immediately.
> >>>>>
> >>>>> Problem is that I don't have time right now to look into it
> >>>>> immediately. So Kent can you continue to take a look?
> >>>>>
> >>>>> Sounds like its crashing immediately, so it should be something
> obvious.
> >>>>>
> >>>>> Christian.
> >>>>>
> >>>>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
> >>>>>>    From what I've been able to dig through, the VM Fault seems to
> >>>>>> occur right after a doorbell mmap, but that's as far as I got. I
> >>>>>> can try to revert it in today's merge and see how things go.
> >>>>>>
> >>>>>>     Kent
> >>>>>>
> >>>>>>> -----Original Message-----
> >>>>>>> From: Kuehling, Felix
> >>>>>>> Sent: Friday, March 08, 2019 11:16 PM
> >>>>>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
> >>>>>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
> >>>>>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> >> demand
> >>>>>>> My concerns were related to eviction fence handing. It would
> >>>>>>> manifest by unnecessary eviction callbacks into KFD that aren't
> >>>>>>> cause by real evictions. I addressed that with a previous patch
> >>>>>>> series that removed the need to remove eviction fences and add
> >>>>>>> them back around page table updates in
> amdgpu_amdkfd_gpuvm.c.
> >>>>>>>
> >>>>>>> I don't know what's going on here. I can probably take a look on
> >>>>>>> Monday. I haven't considered what changed with respect to PD
> >>>>>>> updates.
> >>>>>>>
> >>>>>>> Kent, can we temporarily revert the offending change in
> >>>>>>> amd-kfd-staging just to unblock the merge?
> >>>>>>>
> >>>>>>> Christian, I think KFD is currently broken on amd-staging-drm-next.
> >>>>>>> If we're
> >>>>>>> serious about supporting KFD upstream, you may also want to
> >>>>>>> consider reverting your change there for now. Also consider
> >>>>>>> building the Thunk and kfdtest so you can do quick smoke tests
> >>>>>>> locally whenever you make amdgpu_vm changes that can affect
> KFD.
> >>>>>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
> >>>>>>>
> >>>>>>> Regards,
> >>>>>>>      Felix
> >>>>>>>
> >>>>>>> -----Original Message-----
> >>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
> Behalf
> >> Of
> >>>>>>> Christian König
> >>>>>>> Sent: Friday, March 08, 2019 9:14 AM
> >>>>>>> To: Russell, Kent <Kent.Russell@amd.com>;
> >>>>>>> amd-gfx@lists.freedesktop.org
> >>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> >> demand
> >>>>>>> My best guess is that we forget somewhere to update the PDs.
> >>>>>>> What hardware is that on?
> >>>>>>>
> >>>>>>> Felix already mentioned that this could be problematic for the KFD.
> >>>>>>>
> >>>>>>> Maybe he has an idea,
> >>>>>>> Christian.
> >>>>>>>
> >>>>>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
> >>>>>>>> Hi Christian,
> >>>>>>>>
> >>>>>>>> This patch ended up causing a VM Fault in KFDTest. Reverting
> >>>>>>>> just this
> >>>>>>> patch addressed the issue:
> >>>>>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146
> >>>>>>>> 0x0000480c for
> >>>>>>> process  pid 0 thread  pid 0
> >>>>>>>> [   82.703512] amdgpu 0000:0c:00.0:
> >>>>>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
> >>>>>>>> [   82.703516] amdgpu 0000:0c:00.0:
> >>>>>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
> >>>>>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8,
> >>>>>>>> pasid
> >>>>>>>> 32769) at
> >>>>>>> page 4096, read from 'TC0' (0x54433000) (72)
> >>>>>>>> [   82.703585] Evicting PASID 32769 queues
> >>>>>>>>
> >>>>>>>> I am looking into it, but if you have any insight that would be
> >>>>>>>> great in
> >>>>>>> helping to resolve it quickly.
> >>>>>>>>      Kent
> >>>>>>>>> -----Original Message-----
> >>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
> >> Behalf
> >>>> Of
> >>>>>>>>> Christian König
> >>>>>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
> >>>>>>>>> To: amd-gfx@lists.freedesktop.org
> >>>>>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> >> demand
> >>>>>>>>> Let's start to allocate VM PDs/PTs on demand instead of
> >>>>>>>>> pre-allocating them during mapping.
> >>>>>>>>>
> >>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>>>>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >>>>>>>>> ---
> >>>>>>>>>      .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10
> +-
> >>>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
> >>>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
> >>>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136
> >>>>>>>>> +++++------------
> >>>>>>> -
> >>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
> >>>>>>>>>      5 files changed, 39 insertions(+), 129 deletions(-)
> >>>>>>>>>
> >>>>>>>>> diff --git
> >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>>>> index 31e3953dcb6e..088e9b6b765b 100644
> >>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>>>> +++
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct
> >>>> amdgpu_device
> >>>>>>>>> *adev, struct kgd_mem *mem,
> >>>>>>>>>          if (p_bo_va_entry)
> >>>>>>>>>              *p_bo_va_entry = bo_va_entry;
> >>>>>>>>>
> >>>>>>>>> -    /* Allocate new page tables if needed and validate
> >>>>>>>>> -     * them.
> >>>>>>>>> -     */
> >>>>>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va,
> >>>>>>>>> amdgpu_bo_size(bo));
> >>>>>>>>> -    if (ret) {
> >>>>>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
> >>>>>>>>> -        goto err_alloc_pts;
> >>>>>>>>> -    }
> >>>>>>>>> -
> >>>>>>>>> +    /* Allocate validate page tables if needed */
> >>>>>>>>>          ret = vm_validate_pt_pd_bos(vm);
> >>>>>>>>>          if (ret) {
> >>>>>>>>>              pr_err("validate_pt_pd_bos() failed\n"); diff
> >>>>>>>>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>>>> index 7e22be7ca68a..54dd02a898b9 100644
> >>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct
> >>>> amdgpu_device
> >>>>>>>>> *adev, struct amdgpu_vm *vm,
> >>>>>>>>>              return -ENOMEM;
> >>>>>>>>>          }
> >>>>>>>>>
> >>>>>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm,
> >>>>>>>>> csa_addr,
> >>>>>>>>> -                size);
> >>>>>>>>> -    if (r) {
> >>>>>>>>> -        DRM_ERROR("failed to allocate pts for static CSA,
> >>>>>>>>> err=%d\n", r);
> >>>>>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
> >>>>>>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
> >>>>>>>>> -        return r;
> >>>>>>>>> -    }
> >>>>>>>>> -
> >>>>>>>>>          r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
> >>>>>>>>>                       AMDGPU_PTE_READABLE |
> >>>>>>>>> AMDGPU_PTE_WRITEABLE
> >>>>>>>>> |
> >>>>>>>>>                       AMDGPU_PTE_EXECUTABLE); diff --git
> >>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>>>> index 555285e329ed..fcaaac30e84b 100644
> >>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct
> >> drm_device
> >>>>>>> *dev,
> >>>>>>>>> void *data,
> >>>>>>>>>
> >>>>>>>>>          switch (args->operation) {
> >>>>>>>>>          case AMDGPU_VA_OP_MAP:
> >>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>>>>>>>>> va_address,
> >>>>>>>>> -                    args->map_size);
> >>>>>>>>> -        if (r)
> >>>>>>>>> -            goto error_backoff;
> >>>>>>>>> -
> >>>>>>>>>              va_flags = amdgpu_gmc_get_pte_flags(adev,
> >>>>>>>>> args->flags);
> >>>>>>>>>              r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
> >>>>>>>>>                           args->offset_in_bo, args->map_size,
> >>>>>>>>> @@
> >>>>>>>>> -
> >>>>>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
> >> *dev,
> >>>>>>> void
> >>>>>>>>> *data,
> >>>>>>>>>                              args->map_size);
> >>>>>>>>>              break;
> >>>>>>>>>          case AMDGPU_VA_OP_REPLACE:
> >>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
> >>>>>>>>>> va_address,
> >>>>>>>>> -                    args->map_size);
> >>>>>>>>> -        if (r)
> >>>>>>>>> -            goto error_backoff;
> >>>>>>>>> -
> >>>>>>>>>              va_flags = amdgpu_gmc_get_pte_flags(adev,
> >>>>>>>>> args->flags);
> >>>>>>>>>              r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
> >>>>>>>>>> va_address,
> >>>>>>>>>                               args->offset_in_bo, args-
> >>>>>>>>>> map_size, diff --git
> >> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>>>> index 362436f4e856..dfad543fc000 100644
> >>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
> >>>>>>>>> amdgpu_device *adev,
> >>>>>>>>>          }
> >>>>>>>>>      }
> >>>>>>>>>
> >>>>>>>>> -/**
> >>>>>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
> >>>>>>>>> - *
> >>>>>>>>> - * @adev: amdgpu_device pointer
> >>>>>>>>> - * @vm: amdgpu_vm structure
> >>>>>>>>> - * @start: start addr of the walk
> >>>>>>>>> - * @cursor: state to initialize
> >>>>>>>>> - *
> >>>>>>>>> - * Start a walk and go directly to the leaf node.
> >>>>>>>>> - */
> >>>>>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device
> >> *adev,
> >>>>>>>>> -                    struct amdgpu_vm *vm, uint64_t start,
> >>>>>>>>> -                    struct amdgpu_vm_pt_cursor *cursor) -{
> >>>>>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
> >>>>>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor)); -}
> >>>>>>>>> -
> >>>>>>>>> -/**
> >>>>>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
> >>>>>>>>> - *
> >>>>>>>>> - * @adev: amdgpu_device pointer
> >>>>>>>>> - * @cursor: current state
> >>>>>>>>> - *
> >>>>>>>>> - * Walk the PD/PT tree to the next leaf node.
> >>>>>>>>> - */
> >>>>>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device
> >> *adev,
> >>>>>>>>> -                   struct amdgpu_vm_pt_cursor *cursor) -{
> >>>>>>>>> -    amdgpu_vm_pt_next(adev, cursor);
> >>>>>>>>> -    if (cursor->pfn != ~0ll)
> >>>>>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor)); -}
> >>>>>>>>> -
> >>>>>>>>> -/**
> >>>>>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in
> >>>>>>>>> the hierarchy
> >>>>>>>>> - */
> >>>>>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end,
> >>>>>>>>> cursor)
> >>>>>>>>>       \
> >>>>>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start),
> >>>>>>>>> &(cursor));
> >>>>>>>>>           \
> >>>>>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
> >>>>>>>>> &(cursor)))
> >>>>>>>>> -
> >>>>>>>>>      /**
> >>>>>>>>>       * amdgpu_vm_pt_first_dfs - start a deep first search
> >>>>>>>>>       *
> >>>>>>>>> @@ -915,74 +874,51 @@ static void
> amdgpu_vm_bo_param(struct
> >>>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
> >>>>>>>>>       * Returns:
> >>>>>>>>>       * 0 on success, errno otherwise.
> >>>>>>>>>       */
> >>>>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>>>>>> -            struct amdgpu_vm *vm,
> >>>>>>>>> -            uint64_t saddr, uint64_t size)
> >>>>>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>>>>>> +                   struct amdgpu_vm *vm,
> >>>>>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
> >>>>>>>>>      {
> >>>>>>>>> -    struct amdgpu_vm_pt_cursor cursor;
> >>>>>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
> >>>>>>>>> +    struct amdgpu_bo_param bp;
> >>>>>>>>>          struct amdgpu_bo *pt;
> >>>>>>>>> -    uint64_t eaddr;
> >>>>>>>>>          int r;
> >>>>>>>>>
> >>>>>>>>> -    /* validate the parameters */
> >>>>>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
> >>>>>>>>> AMDGPU_GPU_PAGE_MASK)
> >>>>>>>>> -        return -EINVAL;
> >>>>>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
> >>>>>>>>> +        unsigned num_entries;
> >>>>>>>>>
> >>>>>>>>> -    eaddr = saddr + size - 1;
> >>>>>>>>> -
> >>>>>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
> >>>>>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
> >>>>>>>>> -
> >>>>>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
> >>>>>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
> >>>>>>>>> 0x%08llX)\n",
> >>>>>>>>> -            eaddr, adev->vm_manager.max_pfn);
> >>>>>>>>> -        return -EINVAL;
> >>>>>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
> >>>>>>>>>> level);
> >>>>>>>>> +        entry->entries = kvmalloc_array(num_entries,
> >>>>>>>>> +                        sizeof(*entry->entries),
> >>>>>>>>> +                        GFP_KERNEL | __GFP_ZERO);
> >>>>>>>>> +        if (!entry->entries)
> >>>>>>>>> +            return -ENOMEM;
> >>>>>>>>>          }
> >>>>>>>>>
> >>>>>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr,
> >>>>>>>>> cursor) {
> >>>>>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
> >>>>>>>>> -        struct amdgpu_bo_param bp;
> >>>>>>>>> -
> >>>>>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
> >>>>>>>>> -            unsigned num_entries;
> >>>>>>>>> -
> >>>>>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
> >>>>>>>>> cursor.level);
> >>>>>>>>> -            entry->entries = kvmalloc_array(num_entries,
> >>>>>>>>> -                            sizeof(*entry-
> >>>>>>>>>> entries),
> >>>>>>>>> -                            GFP_KERNEL |
> >>>>>>>>> -                            __GFP_ZERO);
> >>>>>>>>> -            if (!entry->entries)
> >>>>>>>>> -                return -ENOMEM;
> >>>>>>>>> -        }
> >>>>>>>>> -
> >>>>>>>>> -
> >>>>>>>>> -        if (entry->base.bo)
> >>>>>>>>> -            continue;
> >>>>>>>>> -
> >>>>>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
> >>>>>>>>> -
> >>>>>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
> >>>>>>>>> -        if (r)
> >>>>>>>>> -            return r;
> >>>>>>>>> -
> >>>>>>>>> -        if (vm->use_cpu_for_update) {
> >>>>>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
> >>>>>>>>> -            if (r)
> >>>>>>>>> -                goto error_free_pt;
> >>>>>>>>> -        }
> >>>>>>>>> +    if (entry->base.bo)
> >>>>>>>>> +        return 0;
> >>>>>>>>>
> >>>>>>>>> -        /* Keep a reference to the root directory to avoid
> >>>>>>>>> -        * freeing them up in the wrong order.
> >>>>>>>>> -        */
> >>>>>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
> >>>>>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
> >>>>>>>>>
> >>>>>>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >>>>>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
> >>>>>>>>> +    if (r)
> >>>>>>>>> +        return r;
> >>>>>>>>>
> >>>>>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
> >>>>>>>>> +    if (vm->use_cpu_for_update) {
> >>>>>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
> >>>>>>>>>              if (r)
> >>>>>>>>>                  goto error_free_pt;
> >>>>>>>>>          }
> >>>>>>>>>
> >>>>>>>>> +    /* Keep a reference to the root directory to avoid
> >>>>>>>>> +     * freeing them up in the wrong order.
> >>>>>>>>> +     */
> >>>>>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
> >>>>>>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >>>>>>>>> +
> >>>>>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
> >>>>>>>>> +    if (r)
> >>>>>>>>> +        goto error_free_pt;
> >>>>>>>>> +
> >>>>>>>>>          return 0;
> >>>>>>>>>
> >>>>>>>>>      error_free_pt:
> >>>>>>>>> @@ -1627,6 +1563,7 @@ static int
> >> amdgpu_vm_update_ptes(struct
> >>>>>>>>> amdgpu_pte_update_params *params,
> >>>>>>>>>          struct amdgpu_vm_pt_cursor cursor;
> >>>>>>>>>          uint64_t frag_start = start, frag_end;
> >>>>>>>>>          unsigned int frag;
> >>>>>>>>> +    int r;
> >>>>>>>>>
> >>>>>>>>>          /* figure out the initial fragment */
> >>>>>>>>>          amdgpu_vm_fragment(params, frag_start, end, flags,
> >>>>>>>>> &frag, &frag_end); @@ -1634,12 +1571,15 @@ static int
> >>>>>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
> >>>> *params,
> >>>>>>>>>          /* walk over the address space and update the PTs */
> >>>>>>>>>          amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
> >>>>>>>>>          while (cursor.pfn < end) {
> >>>>>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
> >>>>>>>>>              unsigned shift, parent_shift, mask;
> >>>>>>>>>              uint64_t incr, entry_end, pe_start;
> >>>>>>>>> +        struct amdgpu_bo *pt;
> >>>>>>>>>
> >>>>>>>>> -        if (!pt)
> >>>>>>>>> -            return -ENOENT;
> >>>>>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
> >>>>>>>>> &cursor);
> >>>>>>>>> +        if (r)
> >>>>>>>>> +            return r;
> >>>>>>>>> +
> >>>>>>>>> +        pt = cursor.entry->base.bo;
> >>>>>>>>>
> >>>>>>>>>              /* The root level can't be a huge page */
> >>>>>>>>>              if (cursor.level == adev->vm_manager.root_level)
> >>>>>>>>> { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>>>> index 81ff8177f092..116605c038d2 100644
> >>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct
> amdgpu_vm
> >>>> *vm);
> >>>>>>> int
> >>>>>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev,
> struct
> >>>>>>>>> amdgpu_vm *vm,
> >>>>>>>>>                        int (*callback)(void *p, struct
> >>>>>>>>> amdgpu_bo *bo),
> >>>>>>>>>                        void *param); -int
> >>>>>>>>> amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>>>>>> -            struct amdgpu_vm *vm,
> >>>>>>>>> -            uint64_t saddr, uint64_t size);
> >>>>>>>>>      int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
> >>>>>>>>> amdgpu_job *job, bool need_pipe_sync);  int
> >>>>>>>>> amdgpu_vm_update_directories(struct
> >>>>>>>>> amdgpu_device *adev,
> >>>>>>>>>                       struct amdgpu_vm *vm);
> >>>>>>>>> --
> >>>>>>>>> 2.17.1
> >>>>>>>>>
> >>>>>>>>> _______________________________________________
> >>>>>>>>> amd-gfx mailing list
> >>>>>>>>> amd-gfx@lists.freedesktop.org
> >>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>>>>> _______________________________________________
> >>>>>>> amd-gfx mailing list
> >>>>>>> amd-gfx@lists.freedesktop.org
> >>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>>>> _______________________________________________
> >>>>>> amd-gfx mailing list
> >>>>>> amd-gfx@lists.freedesktop.org
> >>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                         ` <b0383667-53ef-31c5-5a73-270bc90ab8c8-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2019-03-12 13:13                           ` Christian König
@ 2019-03-12 19:02                           ` Kuehling, Felix
       [not found]                             ` <9ad4e7aa-94e4-1d81-6da0-ce9050c4ca0a-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 26+ messages in thread
From: Kuehling, Felix @ 2019-03-12 19:02 UTC (permalink / raw)
  To: Koenig, Christian, Russell, Kent,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

I find that it's related to CPU page table updates. If I force page 
table updates with SDMA, I don't get the VM fault.

Regards,
   Felix

On 2019-03-11 12:55 p.m., Christian König wrote:
> Hi guys,
>
> well it's most likely some missing handling in the KFD, so I'm rather 
> reluctant to revert the change immediately.
>
> Problem is that I don't have time right now to look into it 
> immediately. So Kent can you continue to take a look?
>
> Sounds like its crashing immediately, so it should be something obvious.
>
> Christian.
>
> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>  From what I've been able to dig through, the VM Fault seems to occur 
>> right after a doorbell mmap, but that's as far as I got. I can try to 
>> revert it in today's merge and see how things go.
>>
>>   Kent
>>
>>> -----Original Message-----
>>> From: Kuehling, Felix
>>> Sent: Friday, March 08, 2019 11:16 PM
>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>
>>> My concerns were related to eviction fence handing. It would 
>>> manifest by
>>> unnecessary eviction callbacks into KFD that aren't cause by real 
>>> evictions. I
>>> addressed that with a previous patch series that removed the need to
>>> remove eviction fences and add them back around page table updates in
>>> amdgpu_amdkfd_gpuvm.c.
>>>
>>> I don't know what's going on here. I can probably take a look on 
>>> Monday. I
>>> haven't considered what changed with respect to PD updates.
>>>
>>> Kent, can we temporarily revert the offending change in amd-kfd-staging
>>> just to unblock the merge?
>>>
>>> Christian, I think KFD is currently broken on amd-staging-drm-next. 
>>> If we're
>>> serious about supporting KFD upstream, you may also want to consider
>>> reverting your change there for now. Also consider building the 
>>> Thunk and
>>> kfdtest so you can do quick smoke tests locally whenever you make
>>> amdgpu_vm changes that can affect KFD.
>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>>
>>> Regards,
>>>    Felix
>>>
>>> -----Original Message-----
>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>> Christian König
>>> Sent: Friday, March 08, 2019 9:14 AM
>>> To: Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>
>>> My best guess is that we forget somewhere to update the PDs. What
>>> hardware is that on?
>>>
>>> Felix already mentioned that this could be problematic for the KFD.
>>>
>>> Maybe he has an idea,
>>> Christian.
>>>
>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>>>> Hi Christian,
>>>>
>>>> This patch ended up causing a VM Fault in KFDTest. Reverting just this
>>> patch addressed the issue:
>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146 
>>>> 0x0000480c for
>>> process  pid 0 thread  pid 0
>>>> [   82.703512] amdgpu 0000:0c:00.0:
>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
>>>> [   82.703516] amdgpu 0000:0c:00.0:
>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid 
>>>> 32769) at
>>> page 4096, read from 'TC0' (0x54433000) (72)
>>>> [   82.703585] Evicting PASID 32769 queues
>>>>
>>>> I am looking into it, but if you have any insight that would be 
>>>> great in
>>> helping to resolve it quickly.
>>>>    Kent
>>>>> -----Original Message-----
>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>>>> Christian König
>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>>>> To: amd-gfx@lists.freedesktop.org
>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>>
>>>>> Let's start to allocate VM PDs/PTs on demand instead of
>>>>> pre-allocating them during mapping.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>> ---
>>>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 
>>>>> +++++------------
>>> -
>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>>>>    5 files changed, 39 insertions(+), 129 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> index 31e3953dcb6e..088e9b6b765b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
>>>>> *adev, struct kgd_mem *mem,
>>>>>        if (p_bo_va_entry)
>>>>>            *p_bo_va_entry = bo_va_entry;
>>>>>
>>>>> -    /* Allocate new page tables if needed and validate
>>>>> -     * them.
>>>>> -     */
>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>>>>> -    if (ret) {
>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
>>>>> -        goto err_alloc_pts;
>>>>> -    }
>>>>> -
>>>>> +    /* Allocate validate page tables if needed */
>>>>>        ret = vm_validate_pt_pd_bos(vm);
>>>>>        if (ret) {
>>>>>            pr_err("validate_pt_pd_bos() failed\n"); diff --git
>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>> index 7e22be7ca68a..54dd02a898b9 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device
>>>>> *adev, struct amdgpu_vm *vm,
>>>>>            return -ENOMEM;
>>>>>        }
>>>>>
>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>>>>> -                size);
>>>>> -    if (r) {
>>>>> -        DRM_ERROR("failed to allocate pts for static CSA, err=%d\n",
>>>>> r);
>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
>>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
>>>>> -        return r;
>>>>> -    }
>>>>> -
>>>>>        r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>>>                     AMDGPU_PTE_READABLE |
>>>>> AMDGPU_PTE_WRITEABLE |
>>>>>                     AMDGPU_PTE_EXECUTABLE);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> index 555285e329ed..fcaaac30e84b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>> *dev,
>>>>> void *data,
>>>>>
>>>>>        switch (args->operation) {
>>>>>        case AMDGPU_VA_OP_MAP:
>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>> va_address,
>>>>> -                    args->map_size);
>>>>> -        if (r)
>>>>> -            goto error_backoff;
>>>>> -
>>>>>            va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>>            r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>>>                         args->offset_in_bo, args->map_size, @@ -
>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void
>>>>> *data,
>>>>>                            args->map_size);
>>>>>            break;
>>>>>        case AMDGPU_VA_OP_REPLACE:
>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>> va_address,
>>>>> -                    args->map_size);
>>>>> -        if (r)
>>>>> -            goto error_backoff;
>>>>> -
>>>>>            va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>>            r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>>>>> va_address,
>>>>>                             args->offset_in_bo, args-
>>>>>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 362436f4e856..dfad543fc000 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>>>>> amdgpu_device *adev,
>>>>>        }
>>>>>    }
>>>>>
>>>>> -/**
>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>>>> - *
>>>>> - * @adev: amdgpu_device pointer
>>>>> - * @vm: amdgpu_vm structure
>>>>> - * @start: start addr of the walk
>>>>> - * @cursor: state to initialize
>>>>> - *
>>>>> - * Start a walk and go directly to the leaf node.
>>>>> - */
>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>>>>> -                    struct amdgpu_vm *vm, uint64_t start,
>>>>> -                    struct amdgpu_vm_pt_cursor *cursor)
>>>>> -{
>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor));
>>>>> -}
>>>>> -
>>>>> -/**
>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>>>> - *
>>>>> - * @adev: amdgpu_device pointer
>>>>> - * @cursor: current state
>>>>> - *
>>>>> - * Walk the PD/PT tree to the next leaf node.
>>>>> - */
>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>>>>> -                   struct amdgpu_vm_pt_cursor *cursor)
>>>>> -{
>>>>> -    amdgpu_vm_pt_next(adev, cursor);
>>>>> -    if (cursor->pfn != ~0ll)
>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor));
>>>>> -}
>>>>> -
>>>>> -/**
>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
>>>>> hierarchy
>>>>> - */
>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
>>>>>     \
>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));
>>>>>         \
>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>>>>> &(cursor)))
>>>>> -
>>>>>    /**
>>>>>     * amdgpu_vm_pt_first_dfs - start a deep first search
>>>>>     *
>>>>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>     * Returns:
>>>>>     * 0 on success, errno otherwise.
>>>>>     */
>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>> -            struct amdgpu_vm *vm,
>>>>> -            uint64_t saddr, uint64_t size)
>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>> +                   struct amdgpu_vm *vm,
>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
>>>>>    {
>>>>> -    struct amdgpu_vm_pt_cursor cursor;
>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
>>>>> +    struct amdgpu_bo_param bp;
>>>>>        struct amdgpu_bo *pt;
>>>>> -    uint64_t eaddr;
>>>>>        int r;
>>>>>
>>>>> -    /* validate the parameters */
>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>>>>> AMDGPU_GPU_PAGE_MASK)
>>>>> -        return -EINVAL;
>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>>>> +        unsigned num_entries;
>>>>>
>>>>> -    eaddr = saddr + size - 1;
>>>>> -
>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>> -
>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
>>>>> 0x%08llX)\n",
>>>>> -            eaddr, adev->vm_manager.max_pfn);
>>>>> -        return -EINVAL;
>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
>>>>>> level);
>>>>> +        entry->entries = kvmalloc_array(num_entries,
>>>>> +                        sizeof(*entry->entries),
>>>>> +                        GFP_KERNEL | __GFP_ZERO);
>>>>> +        if (!entry->entries)
>>>>> +            return -ENOMEM;
>>>>>        }
>>>>>
>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
>>>>> -        struct amdgpu_bo_param bp;
>>>>> -
>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
>>>>> -            unsigned num_entries;
>>>>> -
>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
>>>>> cursor.level);
>>>>> -            entry->entries = kvmalloc_array(num_entries,
>>>>> -                            sizeof(*entry-
>>>>>> entries),
>>>>> -                            GFP_KERNEL |
>>>>> -                            __GFP_ZERO);
>>>>> -            if (!entry->entries)
>>>>> -                return -ENOMEM;
>>>>> -        }
>>>>> -
>>>>> -
>>>>> -        if (entry->base.bo)
>>>>> -            continue;
>>>>> -
>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>>>> -
>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
>>>>> -        if (r)
>>>>> -            return r;
>>>>> -
>>>>> -        if (vm->use_cpu_for_update) {
>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
>>>>> -            if (r)
>>>>> -                goto error_free_pt;
>>>>> -        }
>>>>> +    if (entry->base.bo)
>>>>> +        return 0;
>>>>>
>>>>> -        /* Keep a reference to the root directory to avoid
>>>>> -        * freeing them up in the wrong order.
>>>>> -        */
>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>>>
>>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
>>>>> +    if (r)
>>>>> +        return r;
>>>>>
>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>> +    if (vm->use_cpu_for_update) {
>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
>>>>>            if (r)
>>>>>                goto error_free_pt;
>>>>>        }
>>>>>
>>>>> +    /* Keep a reference to the root directory to avoid
>>>>> +     * freeing them up in the wrong order.
>>>>> +     */
>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>> +
>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>> +    if (r)
>>>>> +        goto error_free_pt;
>>>>> +
>>>>>        return 0;
>>>>>
>>>>>    error_free_pt:
>>>>> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
>>>>> amdgpu_pte_update_params *params,
>>>>>        struct amdgpu_vm_pt_cursor cursor;
>>>>>        uint64_t frag_start = start, frag_end;
>>>>>        unsigned int frag;
>>>>> +    int r;
>>>>>
>>>>>        /* figure out the initial fragment */
>>>>>        amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
>>>>> &frag_end); @@ -1634,12 +1571,15 @@ static int
>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>>>>        /* walk over the address space and update the PTs */
>>>>>        amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>>        while (cursor.pfn < end) {
>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>>>            unsigned shift, parent_shift, mask;
>>>>>            uint64_t incr, entry_end, pe_start;
>>>>> +        struct amdgpu_bo *pt;
>>>>>
>>>>> -        if (!pt)
>>>>> -            return -ENOENT;
>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>>> &cursor);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +
>>>>> +        pt = cursor.entry->base.bo;
>>>>>
>>>>>            /* The root level can't be a huge page */
>>>>>            if (cursor.level == adev->vm_manager.root_level) { diff 
>>>>> --git
>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> index 81ff8177f092..116605c038d2 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>>> int
>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
>>>>> amdgpu_vm *vm,
>>>>>                      int (*callback)(void *p, struct amdgpu_bo *bo),
>>>>>                      void *param);
>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>> -            struct amdgpu_vm *vm,
>>>>> -            uint64_t saddr, uint64_t size);
>>>>>    int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
>>>>> *job, bool need_pipe_sync);  int amdgpu_vm_update_directories(struct
>>>>> amdgpu_device *adev,
>>>>>                     struct amdgpu_vm *vm);
>>>>> -- 
>>>>> 2.17.1
>>>>>
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                             ` <9ad4e7aa-94e4-1d81-6da0-ce9050c4ca0a-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-12 20:25                               ` Kuehling, Felix
  0 siblings, 0 replies; 26+ messages in thread
From: Kuehling, Felix @ 2019-03-12 20:25 UTC (permalink / raw)
  To: Koenig, Christian, Russell, Kent,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

The root cause is that we don't wait after calling amdgpu_vm_clear_bo in 
amdgpu_vm_alloc_pts.

Waiting for the page table BOs to be idle for CPU page table updates is 
done in amdgpu_vm_bo_update_mapping. That is now *before* the page 
tables are actually allocated and cleared in amdgpu_vm_update_ptes.

We'll need to move the waiting for page tables to be idle into 
amdgpu_vm_alloc_pts or amdgpu_vm_update_ptes.

Regards,
   Felix

On 2019-03-12 3:02 p.m., Felix Kuehling wrote:
> I find that it's related to CPU page table updates. If I force page 
> table updates with SDMA, I don't get the VM fault.
>
> Regards,
>   Felix
>
> On 2019-03-11 12:55 p.m., Christian König wrote:
>> Hi guys,
>>
>> well it's most likely some missing handling in the KFD, so I'm rather 
>> reluctant to revert the change immediately.
>>
>> Problem is that I don't have time right now to look into it 
>> immediately. So Kent can you continue to take a look?
>>
>> Sounds like its crashing immediately, so it should be something obvious.
>>
>> Christian.
>>
>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>>  From what I've been able to dig through, the VM Fault seems to 
>>> occur right after a doorbell mmap, but that's as far as I got. I can 
>>> try to revert it in today's merge and see how things go.
>>>
>>>   Kent
>>>
>>>> -----Original Message-----
>>>> From: Kuehling, Felix
>>>> Sent: Friday, March 08, 2019 11:16 PM
>>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
>>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>
>>>> My concerns were related to eviction fence handing. It would 
>>>> manifest by
>>>> unnecessary eviction callbacks into KFD that aren't cause by real 
>>>> evictions. I
>>>> addressed that with a previous patch series that removed the need to
>>>> remove eviction fences and add them back around page table updates in
>>>> amdgpu_amdkfd_gpuvm.c.
>>>>
>>>> I don't know what's going on here. I can probably take a look on 
>>>> Monday. I
>>>> haven't considered what changed with respect to PD updates.
>>>>
>>>> Kent, can we temporarily revert the offending change in 
>>>> amd-kfd-staging
>>>> just to unblock the merge?
>>>>
>>>> Christian, I think KFD is currently broken on amd-staging-drm-next. 
>>>> If we're
>>>> serious about supporting KFD upstream, you may also want to consider
>>>> reverting your change there for now. Also consider building the 
>>>> Thunk and
>>>> kfdtest so you can do quick smoke tests locally whenever you make
>>>> amdgpu_vm changes that can affect KFD.
>>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>>>
>>>> Regards,
>>>>    Felix
>>>>
>>>> -----Original Message-----
>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>>> Christian König
>>>> Sent: Friday, March 08, 2019 9:14 AM
>>>> To: Russell, Kent <Kent.Russell@amd.com>; 
>>>> amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>
>>>> My best guess is that we forget somewhere to update the PDs. What
>>>> hardware is that on?
>>>>
>>>> Felix already mentioned that this could be problematic for the KFD.
>>>>
>>>> Maybe he has an idea,
>>>> Christian.
>>>>
>>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>>>>> Hi Christian,
>>>>>
>>>>> This patch ended up causing a VM Fault in KFDTest. Reverting just 
>>>>> this
>>>> patch addressed the issue:
>>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146 
>>>>> 0x0000480c for
>>>> process  pid 0 thread  pid 0
>>>>> [   82.703512] amdgpu 0000:0c:00.0:
>>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
>>>>> [   82.703516] amdgpu 0000:0c:00.0:
>>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8, pasid 
>>>>> 32769) at
>>>> page 4096, read from 'TC0' (0x54433000) (72)
>>>>> [   82.703585] Evicting PASID 32769 queues
>>>>>
>>>>> I am looking into it, but if you have any insight that would be 
>>>>> great in
>>>> helping to resolve it quickly.
>>>>>    Kent
>>>>>> -----Original Message-----
>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>>>>> Christian König
>>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>>>
>>>>>> Let's start to allocate VM PDs/PTs on demand instead of
>>>>>> pre-allocating them during mapping.
>>>>>>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>>> ---
>>>>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 +-
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136 
>>>>>> +++++------------
>>>> -
>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 -
>>>>>>    5 files changed, 39 insertions(+), 129 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> index 31e3953dcb6e..088e9b6b765b 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device
>>>>>> *adev, struct kgd_mem *mem,
>>>>>>        if (p_bo_va_entry)
>>>>>>            *p_bo_va_entry = bo_va_entry;
>>>>>>
>>>>>> -    /* Allocate new page tables if needed and validate
>>>>>> -     * them.
>>>>>> -     */
>>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
>>>>>> -    if (ret) {
>>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
>>>>>> -        goto err_alloc_pts;
>>>>>> -    }
>>>>>> -
>>>>>> +    /* Allocate validate page tables if needed */
>>>>>>        ret = vm_validate_pt_pd_bos(vm);
>>>>>>        if (ret) {
>>>>>>            pr_err("validate_pt_pd_bos() failed\n"); diff --git
>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>> index 7e22be7ca68a..54dd02a898b9 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device
>>>>>> *adev, struct amdgpu_vm *vm,
>>>>>>            return -ENOMEM;
>>>>>>        }
>>>>>>
>>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
>>>>>> -                size);
>>>>>> -    if (r) {
>>>>>> -        DRM_ERROR("failed to allocate pts for static CSA, 
>>>>>> err=%d\n",
>>>>>> r);
>>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
>>>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
>>>>>> -        return r;
>>>>>> -    }
>>>>>> -
>>>>>>        r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>>>>                     AMDGPU_PTE_READABLE |
>>>>>> AMDGPU_PTE_WRITEABLE |
>>>>>>                     AMDGPU_PTE_EXECUTABLE);
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>> index 555285e329ed..fcaaac30e84b 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>>> *dev,
>>>>>> void *data,
>>>>>>
>>>>>>        switch (args->operation) {
>>>>>>        case AMDGPU_VA_OP_MAP:
>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>> va_address,
>>>>>> -                    args->map_size);
>>>>>> -        if (r)
>>>>>> -            goto error_backoff;
>>>>>> -
>>>>>>            va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>>>            r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>>>>                         args->offset_in_bo, args->map_size, @@ -
>>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>>> void
>>>>>> *data,
>>>>>>                            args->map_size);
>>>>>>            break;
>>>>>>        case AMDGPU_VA_OP_REPLACE:
>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>> va_address,
>>>>>> -                    args->map_size);
>>>>>> -        if (r)
>>>>>> -            goto error_backoff;
>>>>>> -
>>>>>>            va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
>>>>>>            r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>>>>>> va_address,
>>>>>>                             args->offset_in_bo, args-
>>>>>>> map_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index 362436f4e856..dfad543fc000 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>>>>>> amdgpu_device *adev,
>>>>>>        }
>>>>>>    }
>>>>>>
>>>>>> -/**
>>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>>>>> - *
>>>>>> - * @adev: amdgpu_device pointer
>>>>>> - * @vm: amdgpu_vm structure
>>>>>> - * @start: start addr of the walk
>>>>>> - * @cursor: state to initialize
>>>>>> - *
>>>>>> - * Start a walk and go directly to the leaf node.
>>>>>> - */
>>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
>>>>>> -                    struct amdgpu_vm *vm, uint64_t start,
>>>>>> -                    struct amdgpu_vm_pt_cursor *cursor)
>>>>>> -{
>>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
>>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor));
>>>>>> -}
>>>>>> -
>>>>>> -/**
>>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>>>>> - *
>>>>>> - * @adev: amdgpu_device pointer
>>>>>> - * @cursor: current state
>>>>>> - *
>>>>>> - * Walk the PD/PT tree to the next leaf node.
>>>>>> - */
>>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
>>>>>> -                   struct amdgpu_vm_pt_cursor *cursor)
>>>>>> -{
>>>>>> -    amdgpu_vm_pt_next(adev, cursor);
>>>>>> -    if (cursor->pfn != ~0ll)
>>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor));
>>>>>> -}
>>>>>> -
>>>>>> -/**
>>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the
>>>>>> hierarchy
>>>>>> - */
>>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor)
>>>>>>     \
>>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor));
>>>>>>         \
>>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>>>>>> &(cursor)))
>>>>>> -
>>>>>>    /**
>>>>>>     * amdgpu_vm_pt_first_dfs - start a deep first search
>>>>>>     *
>>>>>> @@ -915,74 +874,51 @@ static void amdgpu_vm_bo_param(struct
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>     * Returns:
>>>>>>     * 0 on success, errno otherwise.
>>>>>>     */
>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>> -            struct amdgpu_vm *vm,
>>>>>> -            uint64_t saddr, uint64_t size)
>>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>> +                   struct amdgpu_vm *vm,
>>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
>>>>>>    {
>>>>>> -    struct amdgpu_vm_pt_cursor cursor;
>>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
>>>>>> +    struct amdgpu_bo_param bp;
>>>>>>        struct amdgpu_bo *pt;
>>>>>> -    uint64_t eaddr;
>>>>>>        int r;
>>>>>>
>>>>>> -    /* validate the parameters */
>>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>>>>>> AMDGPU_GPU_PAGE_MASK)
>>>>>> -        return -EINVAL;
>>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>>>>> +        unsigned num_entries;
>>>>>>
>>>>>> -    eaddr = saddr + size - 1;
>>>>>> -
>>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>> -
>>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
>>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
>>>>>> 0x%08llX)\n",
>>>>>> -            eaddr, adev->vm_manager.max_pfn);
>>>>>> -        return -EINVAL;
>>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
>>>>>>> level);
>>>>>> +        entry->entries = kvmalloc_array(num_entries,
>>>>>> +                        sizeof(*entry->entries),
>>>>>> +                        GFP_KERNEL | __GFP_ZERO);
>>>>>> +        if (!entry->entries)
>>>>>> +            return -ENOMEM;
>>>>>>        }
>>>>>>
>>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
>>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
>>>>>> -        struct amdgpu_bo_param bp;
>>>>>> -
>>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
>>>>>> -            unsigned num_entries;
>>>>>> -
>>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
>>>>>> cursor.level);
>>>>>> -            entry->entries = kvmalloc_array(num_entries,
>>>>>> -                            sizeof(*entry-
>>>>>>> entries),
>>>>>> -                            GFP_KERNEL |
>>>>>> -                            __GFP_ZERO);
>>>>>> -            if (!entry->entries)
>>>>>> -                return -ENOMEM;
>>>>>> -        }
>>>>>> -
>>>>>> -
>>>>>> -        if (entry->base.bo)
>>>>>> -            continue;
>>>>>> -
>>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>>>>> -
>>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>> -        if (r)
>>>>>> -            return r;
>>>>>> -
>>>>>> -        if (vm->use_cpu_for_update) {
>>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
>>>>>> -            if (r)
>>>>>> -                goto error_free_pt;
>>>>>> -        }
>>>>>> +    if (entry->base.bo)
>>>>>> +        return 0;
>>>>>>
>>>>>> -        /* Keep a reference to the root directory to avoid
>>>>>> -        * freeing them up in the wrong order.
>>>>>> -        */
>>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>>>>
>>>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>> +    if (r)
>>>>>> +        return r;
>>>>>>
>>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>> +    if (vm->use_cpu_for_update) {
>>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
>>>>>>            if (r)
>>>>>>                goto error_free_pt;
>>>>>>        }
>>>>>>
>>>>>> +    /* Keep a reference to the root directory to avoid
>>>>>> +     * freeing them up in the wrong order.
>>>>>> +     */
>>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>> +
>>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>> +    if (r)
>>>>>> +        goto error_free_pt;
>>>>>> +
>>>>>>        return 0;
>>>>>>
>>>>>>    error_free_pt:
>>>>>> @@ -1627,6 +1563,7 @@ static int amdgpu_vm_update_ptes(struct
>>>>>> amdgpu_pte_update_params *params,
>>>>>>        struct amdgpu_vm_pt_cursor cursor;
>>>>>>        uint64_t frag_start = start, frag_end;
>>>>>>        unsigned int frag;
>>>>>> +    int r;
>>>>>>
>>>>>>        /* figure out the initial fragment */
>>>>>>        amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
>>>>>> &frag_end); @@ -1634,12 +1571,15 @@ static int
>>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
>>>>>>        /* walk over the address space and update the PTs */
>>>>>>        amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>>>        while (cursor.pfn < end) {
>>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>>>>            unsigned shift, parent_shift, mask;
>>>>>>            uint64_t incr, entry_end, pe_start;
>>>>>> +        struct amdgpu_bo *pt;
>>>>>>
>>>>>> -        if (!pt)
>>>>>> -            return -ENOENT;
>>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>>>> &cursor);
>>>>>> +        if (r)
>>>>>> +            return r;
>>>>>> +
>>>>>> +        pt = cursor.entry->base.bo;
>>>>>>
>>>>>>            /* The root level can't be a huge page */
>>>>>>            if (cursor.level == adev->vm_manager.root_level) { 
>>>>>> diff --git
>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> index 81ff8177f092..116605c038d2 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
>>>> int
>>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct
>>>>>> amdgpu_vm *vm,
>>>>>>                      int (*callback)(void *p, struct amdgpu_bo *bo),
>>>>>>                      void *param);
>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>> -            struct amdgpu_vm *vm,
>>>>>> -            uint64_t saddr, uint64_t size);
>>>>>>    int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
>>>>>> *job, bool need_pipe_sync);  int amdgpu_vm_update_directories(struct
>>>>>> amdgpu_device *adev,
>>>>>>                     struct amdgpu_vm *vm);
>>>>>> -- 
>>>>>> 2.17.1
>>>>>>
>>>>>> _______________________________________________
>>>>>> amd-gfx mailing list
>>>>>> amd-gfx@lists.freedesktop.org
>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                                                 ` <CY4PR12MB1622A73D027D2AC53AF2A2B985490-rpdhrqHFk05g4+I42y6h/AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-03-12 21:13                                                   ` Yang, Philip
       [not found]                                                     ` <01deb600-d1b9-8875-ee73-7796702792d5-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Yang, Philip @ 2019-03-12 21:13 UTC (permalink / raw)
  To: Russell, Kent, Koenig, Christian, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

vm fault happens about 1/10 for KFDCWSRTest.BasicTest for me. I am using 
SDMA for page table update. I don't try CPU page table update.

Philip

On 2019-03-12 11:12 a.m., Russell, Kent wrote:
> Peculiar, I hit it immediately when I ran it . Can you try use --gtest_filter=KFDCWSRTest.BasicTest  . That one hung every time for me.
> 
>   Kent
> 
>> -----Original Message-----
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>> Sent: Tuesday, March 12, 2019 11:09 AM
>> To: Russell, Kent <Kent.Russell@amd.com>; Koenig, Christian
>> <Christian.Koenig@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>;
>> amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>
>> Yeah, same problem here.
>>
>> I removed libhsakmt package and installed it manually and now it seems to
>> work.
>>
>> Doing some testing now, but at least of hand I can't seem to reproduce the
>> VM fault on a Vega10.
>>
>> Christian.
>>
>> Am 12.03.19 um 16:01 schrieb Russell, Kent:
>>> Oh right, I remember that issue. I had that happen to me once, where my
>> installed libhsakmt didn't match up with the latest source code, so I ended up
>> having to remove the libhsakmt package and pointing it to the folders
>> instead.
>>>
>>>    Kent
>>>
>>>> -----Original Message-----
>>>> From: Koenig, Christian
>>>> Sent: Tuesday, March 12, 2019 10:49 AM
>>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
>>>> <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>
>>>> Yeah, the problem is I do have the libhsakmt installed.
>>>>
>>>> Going to give it a try to specify the directory directly.
>>>>
>>>> Christian.
>>>>
>>>> Am 12.03.19 um 15:47 schrieb Russell, Kent:
>>>>> The README.txt file inside the tests/kfdtest folder has instructions
>>>>> on how
>>>> to do it if you don't have the libhsakmt package installed on your system:
>>>>> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
>>>>> headers and libraries are searched under LIBHSAKMT_PATH/include and
>>>>> LIBHSAKMT_PATH/lib respectively.
>>>>>
>>>>> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one
>>>> containing include, src, tests, etc), then that should cover it.
>>>>>     Kent
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>>> Sent: Tuesday, March 12, 2019 9:13 AM
>>>>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
>>>>>> <Felix.Kuehling@amd.com>; Koenig, Christian
>>>>>> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>> demand
>>>>>>
>>>>>> Hi guys,
>>>>>>
>>>>>> so found a few minutes today to compile kfdtest.
>>>>>>
>>>>>> Problem is that during the compile I get a lots of this:
>>>>>>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
>>>>>>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
>>>>>>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
>>>>>>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
>>>>>> Any idea?
>>>>>>
>>>>>> Christian.
>>>>>>
>>>>>> Am 11.03.19 um 17:55 schrieb Christian König:
>>>>>>> Hi guys,
>>>>>>>
>>>>>>> well it's most likely some missing handling in the KFD, so I'm
>>>>>>> rather reluctant to revert the change immediately.
>>>>>>>
>>>>>>> Problem is that I don't have time right now to look into it
>>>>>>> immediately. So Kent can you continue to take a look?
>>>>>>>
>>>>>>> Sounds like its crashing immediately, so it should be something
>> obvious.
>>>>>>>
>>>>>>> Christian.
>>>>>>>
>>>>>>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>>>>>>>     From what I've been able to dig through, the VM Fault seems to
>>>>>>>> occur right after a doorbell mmap, but that's as far as I got. I
>>>>>>>> can try to revert it in today's merge and see how things go.
>>>>>>>>
>>>>>>>>      Kent
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: Kuehling, Felix
>>>>>>>>> Sent: Friday, March 08, 2019 11:16 PM
>>>>>>>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
>>>>>>>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>>>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>> demand
>>>>>>>>> My concerns were related to eviction fence handing. It would
>>>>>>>>> manifest by unnecessary eviction callbacks into KFD that aren't
>>>>>>>>> cause by real evictions. I addressed that with a previous patch
>>>>>>>>> series that removed the need to remove eviction fences and add
>>>>>>>>> them back around page table updates in
>> amdgpu_amdkfd_gpuvm.c.
>>>>>>>>>
>>>>>>>>> I don't know what's going on here. I can probably take a look on
>>>>>>>>> Monday. I haven't considered what changed with respect to PD
>>>>>>>>> updates.
>>>>>>>>>
>>>>>>>>> Kent, can we temporarily revert the offending change in
>>>>>>>>> amd-kfd-staging just to unblock the merge?
>>>>>>>>>
>>>>>>>>> Christian, I think KFD is currently broken on amd-staging-drm-next.
>>>>>>>>> If we're
>>>>>>>>> serious about supporting KFD upstream, you may also want to
>>>>>>>>> consider reverting your change there for now. Also consider
>>>>>>>>> building the Thunk and kfdtest so you can do quick smoke tests
>>>>>>>>> locally whenever you make amdgpu_vm changes that can affect
>> KFD.
>>>>>>>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>>>>>>>>
>>>>>>>>> Regards,
>>>>>>>>>       Felix
>>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
>> Behalf
>>>> Of
>>>>>>>>> Christian König
>>>>>>>>> Sent: Friday, March 08, 2019 9:14 AM
>>>>>>>>> To: Russell, Kent <Kent.Russell@amd.com>;
>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>> demand
>>>>>>>>> My best guess is that we forget somewhere to update the PDs.
>>>>>>>>> What hardware is that on?
>>>>>>>>>
>>>>>>>>> Felix already mentioned that this could be problematic for the KFD.
>>>>>>>>>
>>>>>>>>> Maybe he has an idea,
>>>>>>>>> Christian.
>>>>>>>>>
>>>>>>>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>>>>>>>>>> Hi Christian,
>>>>>>>>>>
>>>>>>>>>> This patch ended up causing a VM Fault in KFDTest. Reverting
>>>>>>>>>> just this
>>>>>>>>> patch addressed the issue:
>>>>>>>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146
>>>>>>>>>> 0x0000480c for
>>>>>>>>> process  pid 0 thread  pid 0
>>>>>>>>>> [   82.703512] amdgpu 0000:0c:00.0:
>>>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
>>>>>>>>>> [   82.703516] amdgpu 0000:0c:00.0:
>>>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>>>>>>>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8,
>>>>>>>>>> pasid
>>>>>>>>>> 32769) at
>>>>>>>>> page 4096, read from 'TC0' (0x54433000) (72)
>>>>>>>>>> [   82.703585] Evicting PASID 32769 queues
>>>>>>>>>>
>>>>>>>>>> I am looking into it, but if you have any insight that would be
>>>>>>>>>> great in
>>>>>>>>> helping to resolve it quickly.
>>>>>>>>>>       Kent
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
>>>> Behalf
>>>>>> Of
>>>>>>>>>>> Christian König
>>>>>>>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>>>>>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>>>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>> demand
>>>>>>>>>>> Let's start to allocate VM PDs/PTs on demand instead of
>>>>>>>>>>> pre-allocating them during mapping.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>>>>>>>> ---
>>>>>>>>>>>       .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10
>> +-
>>>>>>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>>>>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>>>>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136
>>>>>>>>>>> +++++------------
>>>>>>>>> -
>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>>>>>>>>>>       5 files changed, 39 insertions(+), 129 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git
>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>> index 31e3953dcb6e..088e9b6b765b 100644
>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>> +++
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct
>>>>>> amdgpu_device
>>>>>>>>>>> *adev, struct kgd_mem *mem,
>>>>>>>>>>>           if (p_bo_va_entry)
>>>>>>>>>>>               *p_bo_va_entry = bo_va_entry;
>>>>>>>>>>>
>>>>>>>>>>> -    /* Allocate new page tables if needed and validate
>>>>>>>>>>> -     * them.
>>>>>>>>>>> -     */
>>>>>>>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va,
>>>>>>>>>>> amdgpu_bo_size(bo));
>>>>>>>>>>> -    if (ret) {
>>>>>>>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
>>>>>>>>>>> -        goto err_alloc_pts;
>>>>>>>>>>> -    }
>>>>>>>>>>> -
>>>>>>>>>>> +    /* Allocate validate page tables if needed */
>>>>>>>>>>>           ret = vm_validate_pt_pd_bos(vm);
>>>>>>>>>>>           if (ret) {
>>>>>>>>>>>               pr_err("validate_pt_pd_bos() failed\n"); diff
>>>>>>>>>>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>> index 7e22be7ca68a..54dd02a898b9 100644
>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct
>>>>>> amdgpu_device
>>>>>>>>>>> *adev, struct amdgpu_vm *vm,
>>>>>>>>>>>               return -ENOMEM;
>>>>>>>>>>>           }
>>>>>>>>>>>
>>>>>>>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm,
>>>>>>>>>>> csa_addr,
>>>>>>>>>>> -                size);
>>>>>>>>>>> -    if (r) {
>>>>>>>>>>> -        DRM_ERROR("failed to allocate pts for static CSA,
>>>>>>>>>>> err=%d\n", r);
>>>>>>>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
>>>>>>>>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
>>>>>>>>>>> -        return r;
>>>>>>>>>>> -    }
>>>>>>>>>>> -
>>>>>>>>>>>           r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>>>>>>>>>                        AMDGPU_PTE_READABLE |
>>>>>>>>>>> AMDGPU_PTE_WRITEABLE
>>>>>>>>>>> |
>>>>>>>>>>>                        AMDGPU_PTE_EXECUTABLE); diff --git
>>>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>> index 555285e329ed..fcaaac30e84b 100644
>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct
>>>> drm_device
>>>>>>>>> *dev,
>>>>>>>>>>> void *data,
>>>>>>>>>>>
>>>>>>>>>>>           switch (args->operation) {
>>>>>>>>>>>           case AMDGPU_VA_OP_MAP:
>>>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>>>>>> va_address,
>>>>>>>>>>> -                    args->map_size);
>>>>>>>>>>> -        if (r)
>>>>>>>>>>> -            goto error_backoff;
>>>>>>>>>>> -
>>>>>>>>>>>               va_flags = amdgpu_gmc_get_pte_flags(adev,
>>>>>>>>>>> args->flags);
>>>>>>>>>>>               r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>>>>>>>>>                            args->offset_in_bo, args->map_size,
>>>>>>>>>>> @@
>>>>>>>>>>> -
>>>>>>>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>>> *dev,
>>>>>>>>> void
>>>>>>>>>>> *data,
>>>>>>>>>>>                               args->map_size);
>>>>>>>>>>>               break;
>>>>>>>>>>>           case AMDGPU_VA_OP_REPLACE:
>>>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>>>>>> va_address,
>>>>>>>>>>> -                    args->map_size);
>>>>>>>>>>> -        if (r)
>>>>>>>>>>> -            goto error_backoff;
>>>>>>>>>>> -
>>>>>>>>>>>               va_flags = amdgpu_gmc_get_pte_flags(adev,
>>>>>>>>>>> args->flags);
>>>>>>>>>>>               r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>>>>>>>>>>> va_address,
>>>>>>>>>>>                                args->offset_in_bo, args-
>>>>>>>>>>>> map_size, diff --git
>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>> index 362436f4e856..dfad543fc000 100644
>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>>>>>>>>>>> amdgpu_device *adev,
>>>>>>>>>>>           }
>>>>>>>>>>>       }
>>>>>>>>>>>
>>>>>>>>>>> -/**
>>>>>>>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>>>>>>>>>> - *
>>>>>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>>>>>> - * @vm: amdgpu_vm structure
>>>>>>>>>>> - * @start: start addr of the walk
>>>>>>>>>>> - * @cursor: state to initialize
>>>>>>>>>>> - *
>>>>>>>>>>> - * Start a walk and go directly to the leaf node.
>>>>>>>>>>> - */
>>>>>>>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device
>>>> *adev,
>>>>>>>>>>> -                    struct amdgpu_vm *vm, uint64_t start,
>>>>>>>>>>> -                    struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
>>>>>>>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>>>>>> -
>>>>>>>>>>> -/**
>>>>>>>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>>>>>>>>>> - *
>>>>>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>>>>>> - * @cursor: current state
>>>>>>>>>>> - *
>>>>>>>>>>> - * Walk the PD/PT tree to the next leaf node.
>>>>>>>>>>> - */
>>>>>>>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device
>>>> *adev,
>>>>>>>>>>> -                   struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>>>>>> -    amdgpu_vm_pt_next(adev, cursor);
>>>>>>>>>>> -    if (cursor->pfn != ~0ll)
>>>>>>>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>>>>>> -
>>>>>>>>>>> -/**
>>>>>>>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in
>>>>>>>>>>> the hierarchy
>>>>>>>>>>> - */
>>>>>>>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end,
>>>>>>>>>>> cursor)
>>>>>>>>>>>        \
>>>>>>>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start),
>>>>>>>>>>> &(cursor));
>>>>>>>>>>>            \
>>>>>>>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>>>>>>>>>>> &(cursor)))
>>>>>>>>>>> -
>>>>>>>>>>>       /**
>>>>>>>>>>>        * amdgpu_vm_pt_first_dfs - start a deep first search
>>>>>>>>>>>        *
>>>>>>>>>>> @@ -915,74 +874,51 @@ static void
>> amdgpu_vm_bo_param(struct
>>>>>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>>>>>>        * Returns:
>>>>>>>>>>>        * 0 on success, errno otherwise.
>>>>>>>>>>>        */
>>>>>>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>>>>>> -            uint64_t saddr, uint64_t size)
>>>>>>>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>>>> +                   struct amdgpu_vm *vm,
>>>>>>>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
>>>>>>>>>>>       {
>>>>>>>>>>> -    struct amdgpu_vm_pt_cursor cursor;
>>>>>>>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
>>>>>>>>>>> +    struct amdgpu_bo_param bp;
>>>>>>>>>>>           struct amdgpu_bo *pt;
>>>>>>>>>>> -    uint64_t eaddr;
>>>>>>>>>>>           int r;
>>>>>>>>>>>
>>>>>>>>>>> -    /* validate the parameters */
>>>>>>>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>>>>>>>>>>> AMDGPU_GPU_PAGE_MASK)
>>>>>>>>>>> -        return -EINVAL;
>>>>>>>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>>>>>>>>>> +        unsigned num_entries;
>>>>>>>>>>>
>>>>>>>>>>> -    eaddr = saddr + size - 1;
>>>>>>>>>>> -
>>>>>>>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>>>>>> -
>>>>>>>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
>>>>>>>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
>>>>>>>>>>> 0x%08llX)\n",
>>>>>>>>>>> -            eaddr, adev->vm_manager.max_pfn);
>>>>>>>>>>> -        return -EINVAL;
>>>>>>>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
>>>>>>>>>>>> level);
>>>>>>>>>>> +        entry->entries = kvmalloc_array(num_entries,
>>>>>>>>>>> +                        sizeof(*entry->entries),
>>>>>>>>>>> +                        GFP_KERNEL | __GFP_ZERO);
>>>>>>>>>>> +        if (!entry->entries)
>>>>>>>>>>> +            return -ENOMEM;
>>>>>>>>>>>           }
>>>>>>>>>>>
>>>>>>>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr,
>>>>>>>>>>> cursor) {
>>>>>>>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
>>>>>>>>>>> -        struct amdgpu_bo_param bp;
>>>>>>>>>>> -
>>>>>>>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
>>>>>>>>>>> -            unsigned num_entries;
>>>>>>>>>>> -
>>>>>>>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
>>>>>>>>>>> cursor.level);
>>>>>>>>>>> -            entry->entries = kvmalloc_array(num_entries,
>>>>>>>>>>> -                            sizeof(*entry-
>>>>>>>>>>>> entries),
>>>>>>>>>>> -                            GFP_KERNEL |
>>>>>>>>>>> -                            __GFP_ZERO);
>>>>>>>>>>> -            if (!entry->entries)
>>>>>>>>>>> -                return -ENOMEM;
>>>>>>>>>>> -        }
>>>>>>>>>>> -
>>>>>>>>>>> -
>>>>>>>>>>> -        if (entry->base.bo)
>>>>>>>>>>> -            continue;
>>>>>>>>>>> -
>>>>>>>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>>>>>>>>>> -
>>>>>>>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>>>>>> -        if (r)
>>>>>>>>>>> -            return r;
>>>>>>>>>>> -
>>>>>>>>>>> -        if (vm->use_cpu_for_update) {
>>>>>>>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
>>>>>>>>>>> -            if (r)
>>>>>>>>>>> -                goto error_free_pt;
>>>>>>>>>>> -        }
>>>>>>>>>>> +    if (entry->base.bo)
>>>>>>>>>>> +        return 0;
>>>>>>>>>>>
>>>>>>>>>>> -        /* Keep a reference to the root directory to avoid
>>>>>>>>>>> -        * freeing them up in the wrong order.
>>>>>>>>>>> -        */
>>>>>>>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>>>>>>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>>>>>>>>>
>>>>>>>>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>>>>>> +    if (r)
>>>>>>>>>>> +        return r;
>>>>>>>>>>>
>>>>>>>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>>>>>> +    if (vm->use_cpu_for_update) {
>>>>>>>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
>>>>>>>>>>>               if (r)
>>>>>>>>>>>                   goto error_free_pt;
>>>>>>>>>>>           }
>>>>>>>>>>>
>>>>>>>>>>> +    /* Keep a reference to the root directory to avoid
>>>>>>>>>>> +     * freeing them up in the wrong order.
>>>>>>>>>>> +     */
>>>>>>>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>>>>>>>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>>>>>> +
>>>>>>>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>>>>>> +    if (r)
>>>>>>>>>>> +        goto error_free_pt;
>>>>>>>>>>> +
>>>>>>>>>>>           return 0;
>>>>>>>>>>>
>>>>>>>>>>>       error_free_pt:
>>>>>>>>>>> @@ -1627,6 +1563,7 @@ static int
>>>> amdgpu_vm_update_ptes(struct
>>>>>>>>>>> amdgpu_pte_update_params *params,
>>>>>>>>>>>           struct amdgpu_vm_pt_cursor cursor;
>>>>>>>>>>>           uint64_t frag_start = start, frag_end;
>>>>>>>>>>>           unsigned int frag;
>>>>>>>>>>> +    int r;
>>>>>>>>>>>
>>>>>>>>>>>           /* figure out the initial fragment */
>>>>>>>>>>>           amdgpu_vm_fragment(params, frag_start, end, flags,
>>>>>>>>>>> &frag, &frag_end); @@ -1634,12 +1571,15 @@ static int
>>>>>>>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
>>>>>> *params,
>>>>>>>>>>>           /* walk over the address space and update the PTs */
>>>>>>>>>>>           amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>>>>>>>>           while (cursor.pfn < end) {
>>>>>>>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>>>>>>>>>               unsigned shift, parent_shift, mask;
>>>>>>>>>>>               uint64_t incr, entry_end, pe_start;
>>>>>>>>>>> +        struct amdgpu_bo *pt;
>>>>>>>>>>>
>>>>>>>>>>> -        if (!pt)
>>>>>>>>>>> -            return -ENOENT;
>>>>>>>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>>>>>>>>> &cursor);
>>>>>>>>>>> +        if (r)
>>>>>>>>>>> +            return r;
>>>>>>>>>>> +
>>>>>>>>>>> +        pt = cursor.entry->base.bo;
>>>>>>>>>>>
>>>>>>>>>>>               /* The root level can't be a huge page */
>>>>>>>>>>>               if (cursor.level == adev->vm_manager.root_level)
>>>>>>>>>>> { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>> index 81ff8177f092..116605c038d2 100644
>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct
>> amdgpu_vm
>>>>>> *vm);
>>>>>>>>> int
>>>>>>>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev,
>> struct
>>>>>>>>>>> amdgpu_vm *vm,
>>>>>>>>>>>                         int (*callback)(void *p, struct
>>>>>>>>>>> amdgpu_bo *bo),
>>>>>>>>>>>                         void *param); -int
>>>>>>>>>>> amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>>>>>> -            uint64_t saddr, uint64_t size);
>>>>>>>>>>>       int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
>>>>>>>>>>> amdgpu_job *job, bool need_pipe_sync);  int
>>>>>>>>>>> amdgpu_vm_update_directories(struct
>>>>>>>>>>> amdgpu_device *adev,
>>>>>>>>>>>                        struct amdgpu_vm *vm);
>>>>>>>>>>> --
>>>>>>>>>>> 2.17.1
>>>>>>>>>>>
>>>>>>>>>>> _______________________________________________
>>>>>>>>>>> amd-gfx mailing list
>>>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>>>> _______________________________________________
>>>>>>>>> amd-gfx mailing list
>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>>> _______________________________________________
>>>>>>>> amd-gfx mailing list
>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                                                     ` <01deb600-d1b9-8875-ee73-7796702792d5-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-12 21:19                                                       ` Kuehling, Felix
       [not found]                                                         ` <baf94765-e2a2-6e2f-7e6d-69575a2cdbd2-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Kuehling, Felix @ 2019-03-12 21:19 UTC (permalink / raw)
  To: Yang, Philip, Russell, Kent, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

I'm also still seeing VM faults in the eviction test even with my fix, 
and even with SDMA page table updates. There is still something else 
going wrong. :/

Thanks,
   Felix

On 2019-03-12 5:13 p.m., Yang, Philip wrote:
> vm fault happens about 1/10 for KFDCWSRTest.BasicTest for me. I am using
> SDMA for page table update. I don't try CPU page table update.
>
> Philip
>
> On 2019-03-12 11:12 a.m., Russell, Kent wrote:
>> Peculiar, I hit it immediately when I ran it . Can you try use --gtest_filter=KFDCWSRTest.BasicTest  . That one hung every time for me.
>>
>>    Kent
>>
>>> -----Original Message-----
>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>> Sent: Tuesday, March 12, 2019 11:09 AM
>>> To: Russell, Kent <Kent.Russell@amd.com>; Koenig, Christian
>>> <Christian.Koenig@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>;
>>> amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>
>>> Yeah, same problem here.
>>>
>>> I removed libhsakmt package and installed it manually and now it seems to
>>> work.
>>>
>>> Doing some testing now, but at least of hand I can't seem to reproduce the
>>> VM fault on a Vega10.
>>>
>>> Christian.
>>>
>>> Am 12.03.19 um 16:01 schrieb Russell, Kent:
>>>> Oh right, I remember that issue. I had that happen to me once, where my
>>> installed libhsakmt didn't match up with the latest source code, so I ended up
>>> having to remove the libhsakmt package and pointing it to the folders
>>> instead.
>>>>     Kent
>>>>
>>>>> -----Original Message-----
>>>>> From: Koenig, Christian
>>>>> Sent: Tuesday, March 12, 2019 10:49 AM
>>>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
>>>>> <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>>
>>>>> Yeah, the problem is I do have the libhsakmt installed.
>>>>>
>>>>> Going to give it a try to specify the directory directly.
>>>>>
>>>>> Christian.
>>>>>
>>>>> Am 12.03.19 um 15:47 schrieb Russell, Kent:
>>>>>> The README.txt file inside the tests/kfdtest folder has instructions
>>>>>> on how
>>>>> to do it if you don't have the libhsakmt package installed on your system:
>>>>>> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, the
>>>>>> headers and libraries are searched under LIBHSAKMT_PATH/include and
>>>>>> LIBHSAKMT_PATH/lib respectively.
>>>>>>
>>>>>> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the one
>>>>> containing include, src, tests, etc), then that should cover it.
>>>>>>      Kent
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>>>> Sent: Tuesday, March 12, 2019 9:13 AM
>>>>>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
>>>>>>> <Felix.Kuehling@amd.com>; Koenig, Christian
>>>>>>> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>> demand
>>>>>>> Hi guys,
>>>>>>>
>>>>>>> so found a few minutes today to compile kfdtest.
>>>>>>>
>>>>>>> Problem is that during the compile I get a lots of this:
>>>>>>>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
>>>>>>>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
>>>>>>>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
>>>>>>>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
>>>>>>> Any idea?
>>>>>>>
>>>>>>> Christian.
>>>>>>>
>>>>>>> Am 11.03.19 um 17:55 schrieb Christian König:
>>>>>>>> Hi guys,
>>>>>>>>
>>>>>>>> well it's most likely some missing handling in the KFD, so I'm
>>>>>>>> rather reluctant to revert the change immediately.
>>>>>>>>
>>>>>>>> Problem is that I don't have time right now to look into it
>>>>>>>> immediately. So Kent can you continue to take a look?
>>>>>>>>
>>>>>>>> Sounds like its crashing immediately, so it should be something
>>> obvious.
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>>>>>>>>      From what I've been able to dig through, the VM Fault seems to
>>>>>>>>> occur right after a doorbell mmap, but that's as far as I got. I
>>>>>>>>> can try to revert it in today's merge and see how things go.
>>>>>>>>>
>>>>>>>>>       Kent
>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: Kuehling, Felix
>>>>>>>>>> Sent: Friday, March 08, 2019 11:16 PM
>>>>>>>>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
>>>>>>>>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>>>>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>>> demand
>>>>>>>>>> My concerns were related to eviction fence handing. It would
>>>>>>>>>> manifest by unnecessary eviction callbacks into KFD that aren't
>>>>>>>>>> cause by real evictions. I addressed that with a previous patch
>>>>>>>>>> series that removed the need to remove eviction fences and add
>>>>>>>>>> them back around page table updates in
>>> amdgpu_amdkfd_gpuvm.c.
>>>>>>>>>> I don't know what's going on here. I can probably take a look on
>>>>>>>>>> Monday. I haven't considered what changed with respect to PD
>>>>>>>>>> updates.
>>>>>>>>>>
>>>>>>>>>> Kent, can we temporarily revert the offending change in
>>>>>>>>>> amd-kfd-staging just to unblock the merge?
>>>>>>>>>>
>>>>>>>>>> Christian, I think KFD is currently broken on amd-staging-drm-next.
>>>>>>>>>> If we're
>>>>>>>>>> serious about supporting KFD upstream, you may also want to
>>>>>>>>>> consider reverting your change there for now. Also consider
>>>>>>>>>> building the Thunk and kfdtest so you can do quick smoke tests
>>>>>>>>>> locally whenever you make amdgpu_vm changes that can affect
>>> KFD.
>>>>>>>>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>>>>>>>>>
>>>>>>>>>> Regards,
>>>>>>>>>>        Felix
>>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
>>> Behalf
>>>>> Of
>>>>>>>>>> Christian König
>>>>>>>>>> Sent: Friday, March 08, 2019 9:14 AM
>>>>>>>>>> To: Russell, Kent <Kent.Russell@amd.com>;
>>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>>> demand
>>>>>>>>>> My best guess is that we forget somewhere to update the PDs.
>>>>>>>>>> What hardware is that on?
>>>>>>>>>>
>>>>>>>>>> Felix already mentioned that this could be problematic for the KFD.
>>>>>>>>>>
>>>>>>>>>> Maybe he has an idea,
>>>>>>>>>> Christian.
>>>>>>>>>>
>>>>>>>>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>>>>>>>>>>> Hi Christian,
>>>>>>>>>>>
>>>>>>>>>>> This patch ended up causing a VM Fault in KFDTest. Reverting
>>>>>>>>>>> just this
>>>>>>>>>> patch addressed the issue:
>>>>>>>>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146
>>>>>>>>>>> 0x0000480c for
>>>>>>>>>> process  pid 0 thread  pid 0
>>>>>>>>>>> [   82.703512] amdgpu 0000:0c:00.0:
>>>>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
>>>>>>>>>>> [   82.703516] amdgpu 0000:0c:00.0:
>>>>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>>>>>>>>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8,
>>>>>>>>>>> pasid
>>>>>>>>>>> 32769) at
>>>>>>>>>> page 4096, read from 'TC0' (0x54433000) (72)
>>>>>>>>>>> [   82.703585] Evicting PASID 32769 queues
>>>>>>>>>>>
>>>>>>>>>>> I am looking into it, but if you have any insight that would be
>>>>>>>>>>> great in
>>>>>>>>>> helping to resolve it quickly.
>>>>>>>>>>>        Kent
>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
>>>>> Behalf
>>>>>>> Of
>>>>>>>>>>>> Christian König
>>>>>>>>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>>>>>>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>>>>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>>> demand
>>>>>>>>>>>> Let's start to allocate VM PDs/PTs on demand instead of
>>>>>>>>>>>> pre-allocating them during mapping.
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>>>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>        .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10
>>> +-
>>>>>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c       |   9 --
>>>>>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  10 --
>>>>>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 136
>>>>>>>>>>>> +++++------------
>>>>>>>>>> -
>>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |   3 -
>>>>>>>>>>>>        5 files changed, 39 insertions(+), 129 deletions(-)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git
>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>>> index 31e3953dcb6e..088e9b6b765b 100644
>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>>> +++
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct
>>>>>>> amdgpu_device
>>>>>>>>>>>> *adev, struct kgd_mem *mem,
>>>>>>>>>>>>            if (p_bo_va_entry)
>>>>>>>>>>>>                *p_bo_va_entry = bo_va_entry;
>>>>>>>>>>>>
>>>>>>>>>>>> -    /* Allocate new page tables if needed and validate
>>>>>>>>>>>> -     * them.
>>>>>>>>>>>> -     */
>>>>>>>>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va,
>>>>>>>>>>>> amdgpu_bo_size(bo));
>>>>>>>>>>>> -    if (ret) {
>>>>>>>>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
>>>>>>>>>>>> -        goto err_alloc_pts;
>>>>>>>>>>>> -    }
>>>>>>>>>>>> -
>>>>>>>>>>>> +    /* Allocate validate page tables if needed */
>>>>>>>>>>>>            ret = vm_validate_pt_pd_bos(vm);
>>>>>>>>>>>>            if (ret) {
>>>>>>>>>>>>                pr_err("validate_pt_pd_bos() failed\n"); diff
>>>>>>>>>>>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>>> index 7e22be7ca68a..54dd02a898b9 100644
>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct
>>>>>>> amdgpu_device
>>>>>>>>>>>> *adev, struct amdgpu_vm *vm,
>>>>>>>>>>>>                return -ENOMEM;
>>>>>>>>>>>>            }
>>>>>>>>>>>>
>>>>>>>>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm,
>>>>>>>>>>>> csa_addr,
>>>>>>>>>>>> -                size);
>>>>>>>>>>>> -    if (r) {
>>>>>>>>>>>> -        DRM_ERROR("failed to allocate pts for static CSA,
>>>>>>>>>>>> err=%d\n", r);
>>>>>>>>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
>>>>>>>>>>>> -        ttm_eu_backoff_reservation(&ticket, &list);
>>>>>>>>>>>> -        return r;
>>>>>>>>>>>> -    }
>>>>>>>>>>>> -
>>>>>>>>>>>>            r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
>>>>>>>>>>>>                         AMDGPU_PTE_READABLE |
>>>>>>>>>>>> AMDGPU_PTE_WRITEABLE
>>>>>>>>>>>> |
>>>>>>>>>>>>                         AMDGPU_PTE_EXECUTABLE); diff --git
>>>>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>>> index 555285e329ed..fcaaac30e84b 100644
>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct
>>>>> drm_device
>>>>>>>>>> *dev,
>>>>>>>>>>>> void *data,
>>>>>>>>>>>>
>>>>>>>>>>>>            switch (args->operation) {
>>>>>>>>>>>>            case AMDGPU_VA_OP_MAP:
>>>>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>>>>>>> va_address,
>>>>>>>>>>>> -                    args->map_size);
>>>>>>>>>>>> -        if (r)
>>>>>>>>>>>> -            goto error_backoff;
>>>>>>>>>>>> -
>>>>>>>>>>>>                va_flags = amdgpu_gmc_get_pte_flags(adev,
>>>>>>>>>>>> args->flags);
>>>>>>>>>>>>                r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
>>>>>>>>>>>>                             args->offset_in_bo, args->map_size,
>>>>>>>>>>>> @@
>>>>>>>>>>>> -
>>>>>>>>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>>>> *dev,
>>>>>>>>>> void
>>>>>>>>>>>> *data,
>>>>>>>>>>>>                                args->map_size);
>>>>>>>>>>>>                break;
>>>>>>>>>>>>            case AMDGPU_VA_OP_REPLACE:
>>>>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>>>>>>> va_address,
>>>>>>>>>>>> -                    args->map_size);
>>>>>>>>>>>> -        if (r)
>>>>>>>>>>>> -            goto error_backoff;
>>>>>>>>>>>> -
>>>>>>>>>>>>                va_flags = amdgpu_gmc_get_pte_flags(adev,
>>>>>>>>>>>> args->flags);
>>>>>>>>>>>>                r = amdgpu_vm_bo_replace_map(adev, bo_va, args-
>>>>>>>>>>>>> va_address,
>>>>>>>>>>>>                                 args->offset_in_bo, args-
>>>>>>>>>>>>> map_size, diff --git
>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>>> index 362436f4e856..dfad543fc000 100644
>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>>>>>>>>>>>> amdgpu_device *adev,
>>>>>>>>>>>>            }
>>>>>>>>>>>>        }
>>>>>>>>>>>>
>>>>>>>>>>>> -/**
>>>>>>>>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>>>>>>>>>>> - *
>>>>>>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>>>>>>> - * @vm: amdgpu_vm structure
>>>>>>>>>>>> - * @start: start addr of the walk
>>>>>>>>>>>> - * @cursor: state to initialize
>>>>>>>>>>>> - *
>>>>>>>>>>>> - * Start a walk and go directly to the leaf node.
>>>>>>>>>>>> - */
>>>>>>>>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device
>>>>> *adev,
>>>>>>>>>>>> -                    struct amdgpu_vm *vm, uint64_t start,
>>>>>>>>>>>> -                    struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>>>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
>>>>>>>>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>>>>>>> -
>>>>>>>>>>>> -/**
>>>>>>>>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>>>>>>>>>>> - *
>>>>>>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>>>>>>> - * @cursor: current state
>>>>>>>>>>>> - *
>>>>>>>>>>>> - * Walk the PD/PT tree to the next leaf node.
>>>>>>>>>>>> - */
>>>>>>>>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device
>>>>> *adev,
>>>>>>>>>>>> -                   struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>>>>>>> -    amdgpu_vm_pt_next(adev, cursor);
>>>>>>>>>>>> -    if (cursor->pfn != ~0ll)
>>>>>>>>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>>>>>>> -
>>>>>>>>>>>> -/**
>>>>>>>>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in
>>>>>>>>>>>> the hierarchy
>>>>>>>>>>>> - */
>>>>>>>>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end,
>>>>>>>>>>>> cursor)
>>>>>>>>>>>>         \
>>>>>>>>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start),
>>>>>>>>>>>> &(cursor));
>>>>>>>>>>>>             \
>>>>>>>>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>>>>>>>>>>>> &(cursor)))
>>>>>>>>>>>> -
>>>>>>>>>>>>        /**
>>>>>>>>>>>>         * amdgpu_vm_pt_first_dfs - start a deep first search
>>>>>>>>>>>>         *
>>>>>>>>>>>> @@ -915,74 +874,51 @@ static void
>>> amdgpu_vm_bo_param(struct
>>>>>>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>>>>>>>         * Returns:
>>>>>>>>>>>>         * 0 on success, errno otherwise.
>>>>>>>>>>>>         */
>>>>>>>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>>>>>>> -            uint64_t saddr, uint64_t size)
>>>>>>>>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>>>>> +                   struct amdgpu_vm *vm,
>>>>>>>>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
>>>>>>>>>>>>        {
>>>>>>>>>>>> -    struct amdgpu_vm_pt_cursor cursor;
>>>>>>>>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
>>>>>>>>>>>> +    struct amdgpu_bo_param bp;
>>>>>>>>>>>>            struct amdgpu_bo *pt;
>>>>>>>>>>>> -    uint64_t eaddr;
>>>>>>>>>>>>            int r;
>>>>>>>>>>>>
>>>>>>>>>>>> -    /* validate the parameters */
>>>>>>>>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>>>>>>>>>>>> AMDGPU_GPU_PAGE_MASK)
>>>>>>>>>>>> -        return -EINVAL;
>>>>>>>>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>>>>>>>>>>> +        unsigned num_entries;
>>>>>>>>>>>>
>>>>>>>>>>>> -    eaddr = saddr + size - 1;
>>>>>>>>>>>> -
>>>>>>>>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>>>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>>>>>>> -
>>>>>>>>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
>>>>>>>>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
>>>>>>>>>>>> 0x%08llX)\n",
>>>>>>>>>>>> -            eaddr, adev->vm_manager.max_pfn);
>>>>>>>>>>>> -        return -EINVAL;
>>>>>>>>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
>>>>>>>>>>>>> level);
>>>>>>>>>>>> +        entry->entries = kvmalloc_array(num_entries,
>>>>>>>>>>>> +                        sizeof(*entry->entries),
>>>>>>>>>>>> +                        GFP_KERNEL | __GFP_ZERO);
>>>>>>>>>>>> +        if (!entry->entries)
>>>>>>>>>>>> +            return -ENOMEM;
>>>>>>>>>>>>            }
>>>>>>>>>>>>
>>>>>>>>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr,
>>>>>>>>>>>> cursor) {
>>>>>>>>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
>>>>>>>>>>>> -        struct amdgpu_bo_param bp;
>>>>>>>>>>>> -
>>>>>>>>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
>>>>>>>>>>>> -            unsigned num_entries;
>>>>>>>>>>>> -
>>>>>>>>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
>>>>>>>>>>>> cursor.level);
>>>>>>>>>>>> -            entry->entries = kvmalloc_array(num_entries,
>>>>>>>>>>>> -                            sizeof(*entry-
>>>>>>>>>>>>> entries),
>>>>>>>>>>>> -                            GFP_KERNEL |
>>>>>>>>>>>> -                            __GFP_ZERO);
>>>>>>>>>>>> -            if (!entry->entries)
>>>>>>>>>>>> -                return -ENOMEM;
>>>>>>>>>>>> -        }
>>>>>>>>>>>> -
>>>>>>>>>>>> -
>>>>>>>>>>>> -        if (entry->base.bo)
>>>>>>>>>>>> -            continue;
>>>>>>>>>>>> -
>>>>>>>>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>>>>>>>>>>> -
>>>>>>>>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>>>>>>> -        if (r)
>>>>>>>>>>>> -            return r;
>>>>>>>>>>>> -
>>>>>>>>>>>> -        if (vm->use_cpu_for_update) {
>>>>>>>>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
>>>>>>>>>>>> -            if (r)
>>>>>>>>>>>> -                goto error_free_pt;
>>>>>>>>>>>> -        }
>>>>>>>>>>>> +    if (entry->base.bo)
>>>>>>>>>>>> +        return 0;
>>>>>>>>>>>>
>>>>>>>>>>>> -        /* Keep a reference to the root directory to avoid
>>>>>>>>>>>> -        * freeing them up in the wrong order.
>>>>>>>>>>>> -        */
>>>>>>>>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>>>>>>>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>>>>>>>>>>
>>>>>>>>>>>> -        amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>>>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>>>>>>> +    if (r)
>>>>>>>>>>>> +        return r;
>>>>>>>>>>>>
>>>>>>>>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>>>>>>> +    if (vm->use_cpu_for_update) {
>>>>>>>>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
>>>>>>>>>>>>                if (r)
>>>>>>>>>>>>                    goto error_free_pt;
>>>>>>>>>>>>            }
>>>>>>>>>>>>
>>>>>>>>>>>> +    /* Keep a reference to the root directory to avoid
>>>>>>>>>>>> +     * freeing them up in the wrong order.
>>>>>>>>>>>> +     */
>>>>>>>>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>>>>>>>>>>> +    amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>>>>>>> +
>>>>>>>>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>>>>>>> +    if (r)
>>>>>>>>>>>> +        goto error_free_pt;
>>>>>>>>>>>> +
>>>>>>>>>>>>            return 0;
>>>>>>>>>>>>
>>>>>>>>>>>>        error_free_pt:
>>>>>>>>>>>> @@ -1627,6 +1563,7 @@ static int
>>>>> amdgpu_vm_update_ptes(struct
>>>>>>>>>>>> amdgpu_pte_update_params *params,
>>>>>>>>>>>>            struct amdgpu_vm_pt_cursor cursor;
>>>>>>>>>>>>            uint64_t frag_start = start, frag_end;
>>>>>>>>>>>>            unsigned int frag;
>>>>>>>>>>>> +    int r;
>>>>>>>>>>>>
>>>>>>>>>>>>            /* figure out the initial fragment */
>>>>>>>>>>>>            amdgpu_vm_fragment(params, frag_start, end, flags,
>>>>>>>>>>>> &frag, &frag_end); @@ -1634,12 +1571,15 @@ static int
>>>>>>>>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
>>>>>>> *params,
>>>>>>>>>>>>            /* walk over the address space and update the PTs */
>>>>>>>>>>>>            amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>>>>>>>>>>            while (cursor.pfn < end) {
>>>>>>>>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>>>>>>>>>>                unsigned shift, parent_shift, mask;
>>>>>>>>>>>>                uint64_t incr, entry_end, pe_start;
>>>>>>>>>>>> +        struct amdgpu_bo *pt;
>>>>>>>>>>>>
>>>>>>>>>>>> -        if (!pt)
>>>>>>>>>>>> -            return -ENOENT;
>>>>>>>>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>>>>>>>>>> &cursor);
>>>>>>>>>>>> +        if (r)
>>>>>>>>>>>> +            return r;
>>>>>>>>>>>> +
>>>>>>>>>>>> +        pt = cursor.entry->base.bo;
>>>>>>>>>>>>
>>>>>>>>>>>>                /* The root level can't be a huge page */
>>>>>>>>>>>>                if (cursor.level == adev->vm_manager.root_level)
>>>>>>>>>>>> { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>>> index 81ff8177f092..116605c038d2 100644
>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct
>>> amdgpu_vm
>>>>>>> *vm);
>>>>>>>>>> int
>>>>>>>>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev,
>>> struct
>>>>>>>>>>>> amdgpu_vm *vm,
>>>>>>>>>>>>                          int (*callback)(void *p, struct
>>>>>>>>>>>> amdgpu_bo *bo),
>>>>>>>>>>>>                          void *param); -int
>>>>>>>>>>>> amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>>>>>>> -            uint64_t saddr, uint64_t size);
>>>>>>>>>>>>        int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
>>>>>>>>>>>> amdgpu_job *job, bool need_pipe_sync);  int
>>>>>>>>>>>> amdgpu_vm_update_directories(struct
>>>>>>>>>>>> amdgpu_device *adev,
>>>>>>>>>>>>                         struct amdgpu_vm *vm);
>>>>>>>>>>>> --
>>>>>>>>>>>> 2.17.1
>>>>>>>>>>>>
>>>>>>>>>>>> _______________________________________________
>>>>>>>>>>>> amd-gfx mailing list
>>>>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>>>>> _______________________________________________
>>>>>>>>>> amd-gfx mailing list
>>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>>>> _______________________________________________
>>>>>>>>> amd-gfx mailing list
>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                                                         ` <baf94765-e2a2-6e2f-7e6d-69575a2cdbd2-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-12 23:30                                                           ` Kuehling, Felix
       [not found]                                                             ` <3e185bb0-ec43-4784-2130-084d46167605-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Kuehling, Felix @ 2019-03-12 23:30 UTC (permalink / raw)
  To: Yang, Philip, Russell, Kent, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Never mind. I must have messed up my build. I can't reproduce the 
problem any more. The patch I sent out is still needed and valid. AFAICT 
it should be all that's needed to fix GPUVM for KFD.

I have not seen any faults with KFDCWSRTest.BasicTest on my system with 
Fiji or Vega10 with that patch applied.

Regards,
   Felix

On 2019-03-12 5:19 p.m., Felix Kuehling wrote:
> I'm also still seeing VM faults in the eviction test even with my fix, 
> and even with SDMA page table updates. There is still something else 
> going wrong. :/
>
> Thanks,
>   Felix
>
> On 2019-03-12 5:13 p.m., Yang, Philip wrote:
>> vm fault happens about 1/10 for KFDCWSRTest.BasicTest for me. I am using
>> SDMA for page table update. I don't try CPU page table update.
>>
>> Philip
>>
>> On 2019-03-12 11:12 a.m., Russell, Kent wrote:
>>> Peculiar, I hit it immediately when I ran it . Can you try use 
>>> --gtest_filter=KFDCWSRTest.BasicTest . That one hung every time for me.
>>>
>>>    Kent
>>>
>>>> -----Original Message-----
>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>> Sent: Tuesday, March 12, 2019 11:09 AM
>>>> To: Russell, Kent <Kent.Russell@amd.com>; Koenig, Christian
>>>> <Christian.Koenig@amd.com>; Kuehling, Felix <Felix.Kuehling@amd.com>;
>>>> amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>
>>>> Yeah, same problem here.
>>>>
>>>> I removed libhsakmt package and installed it manually and now it 
>>>> seems to
>>>> work.
>>>>
>>>> Doing some testing now, but at least of hand I can't seem to 
>>>> reproduce the
>>>> VM fault on a Vega10.
>>>>
>>>> Christian.
>>>>
>>>> Am 12.03.19 um 16:01 schrieb Russell, Kent:
>>>>> Oh right, I remember that issue. I had that happen to me once, 
>>>>> where my
>>>> installed libhsakmt didn't match up with the latest source code, so 
>>>> I ended up
>>>> having to remove the libhsakmt package and pointing it to the folders
>>>> instead.
>>>>>     Kent
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Koenig, Christian
>>>>>> Sent: Tuesday, March 12, 2019 10:49 AM
>>>>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
>>>>>> <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
>>>>>>
>>>>>> Yeah, the problem is I do have the libhsakmt installed.
>>>>>>
>>>>>> Going to give it a try to specify the directory directly.
>>>>>>
>>>>>> Christian.
>>>>>>
>>>>>> Am 12.03.19 um 15:47 schrieb Russell, Kent:
>>>>>>> The README.txt file inside the tests/kfdtest folder has 
>>>>>>> instructions
>>>>>>> on how
>>>>>> to do it if you don't have the libhsakmt package installed on 
>>>>>> your system:
>>>>>>> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With that, 
>>>>>>> the
>>>>>>> headers and libraries are searched under LIBHSAKMT_PATH/include and
>>>>>>> LIBHSAKMT_PATH/lib respectively.
>>>>>>>
>>>>>>> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the 
>>>>>>> one
>>>>>> containing include, src, tests, etc), then that should cover it.
>>>>>>>      Kent
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>>>>> Sent: Tuesday, March 12, 2019 9:13 AM
>>>>>>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
>>>>>>>> <Felix.Kuehling@amd.com>; Koenig, Christian
>>>>>>>> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>> demand
>>>>>>>> Hi guys,
>>>>>>>>
>>>>>>>> so found a few minutes today to compile kfdtest.
>>>>>>>>
>>>>>>>> Problem is that during the compile I get a lots of this:
>>>>>>>>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
>>>>>>>>> »BaseQueue::Create(unsigned int, unsigned int, unsigned long*)«:
>>>>>>>>> /usr/src/ROCT-Thunk-Interface/tests/kfdtest/src/BaseQueue.cpp:57:
>>>>>>>>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
>>>>>>>> Any idea?
>>>>>>>>
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>> Am 11.03.19 um 17:55 schrieb Christian König:
>>>>>>>>> Hi guys,
>>>>>>>>>
>>>>>>>>> well it's most likely some missing handling in the KFD, so I'm
>>>>>>>>> rather reluctant to revert the change immediately.
>>>>>>>>>
>>>>>>>>> Problem is that I don't have time right now to look into it
>>>>>>>>> immediately. So Kent can you continue to take a look?
>>>>>>>>>
>>>>>>>>> Sounds like its crashing immediately, so it should be something
>>>> obvious.
>>>>>>>>> Christian.
>>>>>>>>>
>>>>>>>>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
>>>>>>>>>>      From what I've been able to dig through, the VM Fault 
>>>>>>>>>> seems to
>>>>>>>>>> occur right after a doorbell mmap, but that's as far as I got. I
>>>>>>>>>> can try to revert it in today's merge and see how things go.
>>>>>>>>>>
>>>>>>>>>>       Kent
>>>>>>>>>>
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: Kuehling, Felix
>>>>>>>>>>> Sent: Friday, March 08, 2019 11:16 PM
>>>>>>>>>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell, Kent
>>>>>>>>>>> <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>>>>>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>>>> demand
>>>>>>>>>>> My concerns were related to eviction fence handing. It would
>>>>>>>>>>> manifest by unnecessary eviction callbacks into KFD that aren't
>>>>>>>>>>> cause by real evictions. I addressed that with a previous patch
>>>>>>>>>>> series that removed the need to remove eviction fences and add
>>>>>>>>>>> them back around page table updates in
>>>> amdgpu_amdkfd_gpuvm.c.
>>>>>>>>>>> I don't know what's going on here. I can probably take a 
>>>>>>>>>>> look on
>>>>>>>>>>> Monday. I haven't considered what changed with respect to PD
>>>>>>>>>>> updates.
>>>>>>>>>>>
>>>>>>>>>>> Kent, can we temporarily revert the offending change in
>>>>>>>>>>> amd-kfd-staging just to unblock the merge?
>>>>>>>>>>>
>>>>>>>>>>> Christian, I think KFD is currently broken on 
>>>>>>>>>>> amd-staging-drm-next.
>>>>>>>>>>> If we're
>>>>>>>>>>> serious about supporting KFD upstream, you may also want to
>>>>>>>>>>> consider reverting your change there for now. Also consider
>>>>>>>>>>> building the Thunk and kfdtest so you can do quick smoke tests
>>>>>>>>>>> locally whenever you make amdgpu_vm changes that can affect
>>>> KFD.
>>>>>>>>>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface
>>>>>>>>>>>
>>>>>>>>>>> Regards,
>>>>>>>>>>>        Felix
>>>>>>>>>>>
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
>>>> Behalf
>>>>>> Of
>>>>>>>>>>> Christian König
>>>>>>>>>>> Sent: Friday, March 08, 2019 9:14 AM
>>>>>>>>>>> To: Russell, Kent <Kent.Russell@amd.com>;
>>>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>>>> demand
>>>>>>>>>>> My best guess is that we forget somewhere to update the PDs.
>>>>>>>>>>> What hardware is that on?
>>>>>>>>>>>
>>>>>>>>>>> Felix already mentioned that this could be problematic for 
>>>>>>>>>>> the KFD.
>>>>>>>>>>>
>>>>>>>>>>> Maybe he has an idea,
>>>>>>>>>>> Christian.
>>>>>>>>>>>
>>>>>>>>>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
>>>>>>>>>>>> Hi Christian,
>>>>>>>>>>>>
>>>>>>>>>>>> This patch ended up causing a VM Fault in KFDTest. Reverting
>>>>>>>>>>>> just this
>>>>>>>>>>> patch addressed the issue:
>>>>>>>>>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146
>>>>>>>>>>>> 0x0000480c for
>>>>>>>>>>> process  pid 0 thread  pid 0
>>>>>>>>>>>> [   82.703512] amdgpu 0000:0c:00.0:
>>>>>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
>>>>>>>>>>>> [   82.703516] amdgpu 0000:0c:00.0:
>>>>>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
>>>>>>>>>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8,
>>>>>>>>>>>> pasid
>>>>>>>>>>>> 32769) at
>>>>>>>>>>> page 4096, read from 'TC0' (0x54433000) (72)
>>>>>>>>>>>> [   82.703585] Evicting PASID 32769 queues
>>>>>>>>>>>>
>>>>>>>>>>>> I am looking into it, but if you have any insight that 
>>>>>>>>>>>> would be
>>>>>>>>>>>> great in
>>>>>>>>>>> helping to resolve it quickly.
>>>>>>>>>>>>        Kent
>>>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
>>>>>> Behalf
>>>>>>>> Of
>>>>>>>>>>>>> Christian König
>>>>>>>>>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
>>>>>>>>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>>>>>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
>>>>>> demand
>>>>>>>>>>>>> Let's start to allocate VM PDs/PTs on demand instead of
>>>>>>>>>>>>> pre-allocating them during mapping.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>>>>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>>>>>>>>>> ---
>>>>>>>>>>>>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  10
>>>> +-
>>>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c |   9 --
>>>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  10 --
>>>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 136
>>>>>>>>>>>>> +++++------------
>>>>>>>>>>> -
>>>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 -
>>>>>>>>>>>>>        5 files changed, 39 insertions(+), 129 deletions(-)
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git
>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>>>> index 31e3953dcb6e..088e9b6b765b 100644
>>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>>>> +++
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>>>>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct
>>>>>>>> amdgpu_device
>>>>>>>>>>>>> *adev, struct kgd_mem *mem,
>>>>>>>>>>>>>            if (p_bo_va_entry)
>>>>>>>>>>>>>                *p_bo_va_entry = bo_va_entry;
>>>>>>>>>>>>>
>>>>>>>>>>>>> -    /* Allocate new page tables if needed and validate
>>>>>>>>>>>>> -     * them.
>>>>>>>>>>>>> -     */
>>>>>>>>>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va,
>>>>>>>>>>>>> amdgpu_bo_size(bo));
>>>>>>>>>>>>> -    if (ret) {
>>>>>>>>>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
>>>>>>>>>>>>> -        goto err_alloc_pts;
>>>>>>>>>>>>> -    }
>>>>>>>>>>>>> -
>>>>>>>>>>>>> +    /* Allocate validate page tables if needed */
>>>>>>>>>>>>>            ret = vm_validate_pt_pd_bos(vm);
>>>>>>>>>>>>>            if (ret) {
>>>>>>>>>>>>> pr_err("validate_pt_pd_bos() failed\n"); diff
>>>>>>>>>>>>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>>>> index 7e22be7ca68a..54dd02a898b9 100644
>>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>>>>>>>>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct
>>>>>>>> amdgpu_device
>>>>>>>>>>>>> *adev, struct amdgpu_vm *vm,
>>>>>>>>>>>>>                return -ENOMEM;
>>>>>>>>>>>>>            }
>>>>>>>>>>>>>
>>>>>>>>>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm,
>>>>>>>>>>>>> csa_addr,
>>>>>>>>>>>>> -                size);
>>>>>>>>>>>>> -    if (r) {
>>>>>>>>>>>>> -        DRM_ERROR("failed to allocate pts for static CSA,
>>>>>>>>>>>>> err=%d\n", r);
>>>>>>>>>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
>>>>>>>>>>>>> - ttm_eu_backoff_reservation(&ticket, &list);
>>>>>>>>>>>>> -        return r;
>>>>>>>>>>>>> -    }
>>>>>>>>>>>>> -
>>>>>>>>>>>>>            r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, 
>>>>>>>>>>>>> size,
>>>>>>>>>>>>> AMDGPU_PTE_READABLE |
>>>>>>>>>>>>> AMDGPU_PTE_WRITEABLE
>>>>>>>>>>>>> |
>>>>>>>>>>>>> AMDGPU_PTE_EXECUTABLE); diff --git
>>>>>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>>>> index 555285e329ed..fcaaac30e84b 100644
>>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>>>>>>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct
>>>>>> drm_device
>>>>>>>>>>> *dev,
>>>>>>>>>>>>> void *data,
>>>>>>>>>>>>>
>>>>>>>>>>>>>            switch (args->operation) {
>>>>>>>>>>>>>            case AMDGPU_VA_OP_MAP:
>>>>>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>>>>>>>> va_address,
>>>>>>>>>>>>> -                    args->map_size);
>>>>>>>>>>>>> -        if (r)
>>>>>>>>>>>>> -            goto error_backoff;
>>>>>>>>>>>>> -
>>>>>>>>>>>>>                va_flags = amdgpu_gmc_get_pte_flags(adev,
>>>>>>>>>>>>> args->flags);
>>>>>>>>>>>>>                r = amdgpu_vm_bo_map(adev, bo_va, 
>>>>>>>>>>>>> args->va_address,
>>>>>>>>>>>>> args->offset_in_bo, args->map_size,
>>>>>>>>>>>>> @@
>>>>>>>>>>>>> -
>>>>>>>>>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device
>>>>>> *dev,
>>>>>>>>>>> void
>>>>>>>>>>>>> *data,
>>>>>>>>>>>>> args->map_size);
>>>>>>>>>>>>>                break;
>>>>>>>>>>>>>            case AMDGPU_VA_OP_REPLACE:
>>>>>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args-
>>>>>>>>>>>>>> va_address,
>>>>>>>>>>>>> -                    args->map_size);
>>>>>>>>>>>>> -        if (r)
>>>>>>>>>>>>> -            goto error_backoff;
>>>>>>>>>>>>> -
>>>>>>>>>>>>>                va_flags = amdgpu_gmc_get_pte_flags(adev,
>>>>>>>>>>>>> args->flags);
>>>>>>>>>>>>>                r = amdgpu_vm_bo_replace_map(adev, bo_va, 
>>>>>>>>>>>>> args-
>>>>>>>>>>>>>> va_address,
>>>>>>>>>>>>> args->offset_in_bo, args-
>>>>>>>>>>>>>> map_size, diff --git
>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>>>> index 362436f4e856..dfad543fc000 100644
>>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>>>>>>>> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct
>>>>>>>>>>>>> amdgpu_device *adev,
>>>>>>>>>>>>>            }
>>>>>>>>>>>>>        }
>>>>>>>>>>>>>
>>>>>>>>>>>>> -/**
>>>>>>>>>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
>>>>>>>>>>>>> - *
>>>>>>>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>>>>>>>> - * @vm: amdgpu_vm structure
>>>>>>>>>>>>> - * @start: start addr of the walk
>>>>>>>>>>>>> - * @cursor: state to initialize
>>>>>>>>>>>>> - *
>>>>>>>>>>>>> - * Start a walk and go directly to the leaf node.
>>>>>>>>>>>>> - */
>>>>>>>>>>>>> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device
>>>>>> *adev,
>>>>>>>>>>>>> - struct amdgpu_vm *vm, uint64_t start,
>>>>>>>>>>>>> -                    struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>>>>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
>>>>>>>>>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -/**
>>>>>>>>>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
>>>>>>>>>>>>> - *
>>>>>>>>>>>>> - * @adev: amdgpu_device pointer
>>>>>>>>>>>>> - * @cursor: current state
>>>>>>>>>>>>> - *
>>>>>>>>>>>>> - * Walk the PD/PT tree to the next leaf node.
>>>>>>>>>>>>> - */
>>>>>>>>>>>>> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device
>>>>>> *adev,
>>>>>>>>>>>>> - struct amdgpu_vm_pt_cursor *cursor) -{
>>>>>>>>>>>>> -    amdgpu_vm_pt_next(adev, cursor);
>>>>>>>>>>>>> -    if (cursor->pfn != ~0ll)
>>>>>>>>>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor)); -}
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -/**
>>>>>>>>>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf 
>>>>>>>>>>>>> PDs/PTs in
>>>>>>>>>>>>> the hierarchy
>>>>>>>>>>>>> - */
>>>>>>>>>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end,
>>>>>>>>>>>>> cursor)
>>>>>>>>>>>>>         \
>>>>>>>>>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start),
>>>>>>>>>>>>> &(cursor));
>>>>>>>>>>>>>             \
>>>>>>>>>>>>> -         (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev),
>>>>>>>>>>>>> &(cursor)))
>>>>>>>>>>>>> -
>>>>>>>>>>>>>        /**
>>>>>>>>>>>>>         * amdgpu_vm_pt_first_dfs - start a deep first search
>>>>>>>>>>>>>         *
>>>>>>>>>>>>> @@ -915,74 +874,51 @@ static void
>>>> amdgpu_vm_bo_param(struct
>>>>>>>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>>>>>>>>         * Returns:
>>>>>>>>>>>>>         * 0 on success, errno otherwise.
>>>>>>>>>>>>>         */
>>>>>>>>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>>>>>>>> -            uint64_t saddr, uint64_t size)
>>>>>>>>>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>>>>>> +                   struct amdgpu_vm *vm,
>>>>>>>>>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
>>>>>>>>>>>>>        {
>>>>>>>>>>>>> -    struct amdgpu_vm_pt_cursor cursor;
>>>>>>>>>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
>>>>>>>>>>>>> +    struct amdgpu_bo_param bp;
>>>>>>>>>>>>>            struct amdgpu_bo *pt;
>>>>>>>>>>>>> -    uint64_t eaddr;
>>>>>>>>>>>>>            int r;
>>>>>>>>>>>>>
>>>>>>>>>>>>> -    /* validate the parameters */
>>>>>>>>>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
>>>>>>>>>>>>> AMDGPU_GPU_PAGE_MASK)
>>>>>>>>>>>>> -        return -EINVAL;
>>>>>>>>>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>>>>>>>>>>>> +        unsigned num_entries;
>>>>>>>>>>>>>
>>>>>>>>>>>>> -    eaddr = saddr + size - 1;
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>>>>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
>>>>>>>>>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
>>>>>>>>>>>>> 0x%08llX)\n",
>>>>>>>>>>>>> -            eaddr, adev->vm_manager.max_pfn);
>>>>>>>>>>>>> -        return -EINVAL;
>>>>>>>>>>>>> +        num_entries = amdgpu_vm_num_entries(adev, cursor-
>>>>>>>>>>>>>> level);
>>>>>>>>>>>>> +        entry->entries = kvmalloc_array(num_entries,
>>>>>>>>>>>>> + sizeof(*entry->entries),
>>>>>>>>>>>>> +                        GFP_KERNEL | __GFP_ZERO);
>>>>>>>>>>>>> +        if (!entry->entries)
>>>>>>>>>>>>> +            return -ENOMEM;
>>>>>>>>>>>>>            }
>>>>>>>>>>>>>
>>>>>>>>>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr,
>>>>>>>>>>>>> cursor) {
>>>>>>>>>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
>>>>>>>>>>>>> -        struct amdgpu_bo_param bp;
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
>>>>>>>>>>>>> -            unsigned num_entries;
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
>>>>>>>>>>>>> cursor.level);
>>>>>>>>>>>>> -            entry->entries = kvmalloc_array(num_entries,
>>>>>>>>>>>>> - sizeof(*entry-
>>>>>>>>>>>>>> entries),
>>>>>>>>>>>>> -                            GFP_KERNEL |
>>>>>>>>>>>>> -                            __GFP_ZERO);
>>>>>>>>>>>>> -            if (!entry->entries)
>>>>>>>>>>>>> -                return -ENOMEM;
>>>>>>>>>>>>> -        }
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -        if (entry->base.bo)
>>>>>>>>>>>>> -            continue;
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>>>>>>>> -        if (r)
>>>>>>>>>>>>> -            return r;
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -        if (vm->use_cpu_for_update) {
>>>>>>>>>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
>>>>>>>>>>>>> -            if (r)
>>>>>>>>>>>>> -                goto error_free_pt;
>>>>>>>>>>>>> -        }
>>>>>>>>>>>>> +    if (entry->base.bo)
>>>>>>>>>>>>> +        return 0;
>>>>>>>>>>>>>
>>>>>>>>>>>>> -        /* Keep a reference to the root directory to avoid
>>>>>>>>>>>>> -        * freeing them up in the wrong order.
>>>>>>>>>>>>> -        */
>>>>>>>>>>>>> -        pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
>>>>>>>>>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>>>>>>>>>>>>>
>>>>>>>>>>>>> - amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>>>>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
>>>>>>>>>>>>> +    if (r)
>>>>>>>>>>>>> +        return r;
>>>>>>>>>>>>>
>>>>>>>>>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>>>>>>>> +    if (vm->use_cpu_for_update) {
>>>>>>>>>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
>>>>>>>>>>>>>                if (r)
>>>>>>>>>>>>>                    goto error_free_pt;
>>>>>>>>>>>>>            }
>>>>>>>>>>>>>
>>>>>>>>>>>>> +    /* Keep a reference to the root directory to avoid
>>>>>>>>>>>>> +     * freeing them up in the wrong order.
>>>>>>>>>>>>> +     */
>>>>>>>>>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
>>>>>>>>>>>>> + amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
>>>>>>>>>>>>> +    if (r)
>>>>>>>>>>>>> +        goto error_free_pt;
>>>>>>>>>>>>> +
>>>>>>>>>>>>>            return 0;
>>>>>>>>>>>>>
>>>>>>>>>>>>>        error_free_pt:
>>>>>>>>>>>>> @@ -1627,6 +1563,7 @@ static int
>>>>>> amdgpu_vm_update_ptes(struct
>>>>>>>>>>>>> amdgpu_pte_update_params *params,
>>>>>>>>>>>>>            struct amdgpu_vm_pt_cursor cursor;
>>>>>>>>>>>>>            uint64_t frag_start = start, frag_end;
>>>>>>>>>>>>>            unsigned int frag;
>>>>>>>>>>>>> +    int r;
>>>>>>>>>>>>>
>>>>>>>>>>>>>            /* figure out the initial fragment */
>>>>>>>>>>>>>            amdgpu_vm_fragment(params, frag_start, end, flags,
>>>>>>>>>>>>> &frag, &frag_end); @@ -1634,12 +1571,15 @@ static int
>>>>>>>>>>>>> amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
>>>>>>>> *params,
>>>>>>>>>>>>>            /* walk over the address space and update the 
>>>>>>>>>>>>> PTs */
>>>>>>>>>>>>>            amdgpu_vm_pt_start(adev, params->vm, start, 
>>>>>>>>>>>>> &cursor);
>>>>>>>>>>>>>            while (cursor.pfn < end) {
>>>>>>>>>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>>>>>>>>>>>                unsigned shift, parent_shift, mask;
>>>>>>>>>>>>>                uint64_t incr, entry_end, pe_start;
>>>>>>>>>>>>> +        struct amdgpu_bo *pt;
>>>>>>>>>>>>>
>>>>>>>>>>>>> -        if (!pt)
>>>>>>>>>>>>> -            return -ENOENT;
>>>>>>>>>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params->vm,
>>>>>>>>>>>>> &cursor);
>>>>>>>>>>>>> +        if (r)
>>>>>>>>>>>>> +            return r;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +        pt = cursor.entry->base.bo;
>>>>>>>>>>>>>
>>>>>>>>>>>>>                /* The root level can't be a huge page */
>>>>>>>>>>>>>                if (cursor.level == 
>>>>>>>>>>>>> adev->vm_manager.root_level)
>>>>>>>>>>>>> { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>>>> index 81ff8177f092..116605c038d2 100644
>>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>>>>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct
>>>> amdgpu_vm
>>>>>>>> *vm);
>>>>>>>>>>> int
>>>>>>>>>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev,
>>>> struct
>>>>>>>>>>>>> amdgpu_vm *vm,
>>>>>>>>>>>>>                          int (*callback)(void *p, struct
>>>>>>>>>>>>> amdgpu_bo *bo),
>>>>>>>>>>>>>                          void *param); -int
>>>>>>>>>>>>> amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>>>>>>>>>>>>> -            struct amdgpu_vm *vm,
>>>>>>>>>>>>> -            uint64_t saddr, uint64_t size);
>>>>>>>>>>>>>        int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
>>>>>>>>>>>>> amdgpu_job *job, bool need_pipe_sync); int
>>>>>>>>>>>>> amdgpu_vm_update_directories(struct
>>>>>>>>>>>>> amdgpu_device *adev,
>>>>>>>>>>>>>                         struct amdgpu_vm *vm);
>>>>>>>>>>>>> -- 
>>>>>>>>>>>>> 2.17.1
>>>>>>>>>>>>>
>>>>>>>>>>>>> _______________________________________________
>>>>>>>>>>>>> amd-gfx mailing list
>>>>>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>>>>>> _______________________________________________
>>>>>>>>>>> amd-gfx mailing list
>>>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>>>>> _______________________________________________
>>>>>>>>>> amd-gfx mailing list
>>>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
       [not found]                                                             ` <3e185bb0-ec43-4784-2130-084d46167605-5C7GfCeVMHo@public.gmane.org>
@ 2019-03-13 10:52                                                               ` Russell, Kent
  0 siblings, 0 replies; 26+ messages in thread
From: Russell, Kent @ 2019-03-13 10:52 UTC (permalink / raw)
  To: Kuehling, Felix, Yang, Philip, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Fantastic, I'm integrating it now. Fingers crossed!

 Kent

> -----Original Message-----
> From: Kuehling, Felix
> Sent: Tuesday, March 12, 2019 7:31 PM
> To: Yang, Philip <Philip.Yang@amd.com>; Russell, Kent
> <Kent.Russell@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>;
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand
> 
> Never mind. I must have messed up my build. I can't reproduce the problem
> any more. The patch I sent out is still needed and valid. AFAICT it should be all
> that's needed to fix GPUVM for KFD.
> 
> I have not seen any faults with KFDCWSRTest.BasicTest on my system with
> Fiji or Vega10 with that patch applied.
> 
> Regards,
>    Felix
> 
> On 2019-03-12 5:19 p.m., Felix Kuehling wrote:
> > I'm also still seeing VM faults in the eviction test even with my fix,
> > and even with SDMA page table updates. There is still something else
> > going wrong. :/
> >
> > Thanks,
> >   Felix
> >
> > On 2019-03-12 5:13 p.m., Yang, Philip wrote:
> >> vm fault happens about 1/10 for KFDCWSRTest.BasicTest for me. I am
> >> using SDMA for page table update. I don't try CPU page table update.
> >>
> >> Philip
> >>
> >> On 2019-03-12 11:12 a.m., Russell, Kent wrote:
> >>> Peculiar, I hit it immediately when I ran it . Can you try use
> >>> --gtest_filter=KFDCWSRTest.BasicTest . That one hung every time for
> me.
> >>>
> >>>    Kent
> >>>
> >>>> -----Original Message-----
> >>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >>>> Sent: Tuesday, March 12, 2019 11:09 AM
> >>>> To: Russell, Kent <Kent.Russell@amd.com>; Koenig, Christian
> >>>> <Christian.Koenig@amd.com>; Kuehling, Felix
> >>>> <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
> >>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> demand
> >>>>
> >>>> Yeah, same problem here.
> >>>>
> >>>> I removed libhsakmt package and installed it manually and now it
> >>>> seems to work.
> >>>>
> >>>> Doing some testing now, but at least of hand I can't seem to
> >>>> reproduce the VM fault on a Vega10.
> >>>>
> >>>> Christian.
> >>>>
> >>>> Am 12.03.19 um 16:01 schrieb Russell, Kent:
> >>>>> Oh right, I remember that issue. I had that happen to me once,
> >>>>> where my
> >>>> installed libhsakmt didn't match up with the latest source code, so
> >>>> I ended up having to remove the libhsakmt package and pointing it
> >>>> to the folders instead.
> >>>>>     Kent
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Koenig, Christian
> >>>>>> Sent: Tuesday, March 12, 2019 10:49 AM
> >>>>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
> >>>>>> <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
> >>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> >>>>>> demand
> >>>>>>
> >>>>>> Yeah, the problem is I do have the libhsakmt installed.
> >>>>>>
> >>>>>> Going to give it a try to specify the directory directly.
> >>>>>>
> >>>>>> Christian.
> >>>>>>
> >>>>>> Am 12.03.19 um 15:47 schrieb Russell, Kent:
> >>>>>>> The README.txt file inside the tests/kfdtest folder has
> >>>>>>> instructions on how
> >>>>>> to do it if you don't have the libhsakmt package installed on
> >>>>>> your system:
> >>>>>>> export LIBHSAKMT_PATH=/*your local libhsakmt folder*/ With
> that,
> >>>>>>> the headers and libraries are searched under
> >>>>>>> LIBHSAKMT_PATH/include and LIBHSAKMT_PATH/lib respectively.
> >>>>>>>
> >>>>>>> So if you try export LIBHSAKMT_PATH as the root ROCT folder (the
> >>>>>>> one
> >>>>>> containing include, src, tests, etc), then that should cover it.
> >>>>>>>      Kent
> >>>>>>>
> >>>>>>>
> >>>>>>>> -----Original Message-----
> >>>>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >>>>>>>> Sent: Tuesday, March 12, 2019 9:13 AM
> >>>>>>>> To: Russell, Kent <Kent.Russell@amd.com>; Kuehling, Felix
> >>>>>>>> <Felix.Kuehling@amd.com>; Koenig, Christian
> >>>>>>>> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
> >>>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> >>>> demand
> >>>>>>>> Hi guys,
> >>>>>>>>
> >>>>>>>> so found a few minutes today to compile kfdtest.
> >>>>>>>>
> >>>>>>>> Problem is that during the compile I get a lots of this:
> >>>>>>>>> CMakeFiles/kfdtest.dir/src/BaseQueue.cpp.o: In Funktion
> >>>>>>>>> »BaseQueue::Create(unsigned int, unsigned int, unsigned
> long*)«:
> >>>>>>>>> /usr/src/ROCT-Thunk-
> Interface/tests/kfdtest/src/BaseQueue.cpp:57:
> >>>>>>>>> Warnung: undefinierter Verweis auf »hsaKmtCreateQueue«
> >>>>>>>> Any idea?
> >>>>>>>>
> >>>>>>>> Christian.
> >>>>>>>>
> >>>>>>>> Am 11.03.19 um 17:55 schrieb Christian König:
> >>>>>>>>> Hi guys,
> >>>>>>>>>
> >>>>>>>>> well it's most likely some missing handling in the KFD, so I'm
> >>>>>>>>> rather reluctant to revert the change immediately.
> >>>>>>>>>
> >>>>>>>>> Problem is that I don't have time right now to look into it
> >>>>>>>>> immediately. So Kent can you continue to take a look?
> >>>>>>>>>
> >>>>>>>>> Sounds like its crashing immediately, so it should be
> >>>>>>>>> something
> >>>> obvious.
> >>>>>>>>> Christian.
> >>>>>>>>>
> >>>>>>>>> Am 11.03.19 um 10:49 schrieb Russell, Kent:
> >>>>>>>>>>      From what I've been able to dig through, the VM Fault
> >>>>>>>>>> seems to occur right after a doorbell mmap, but that's as far
> >>>>>>>>>> as I got. I can try to revert it in today's merge and see how
> >>>>>>>>>> things go.
> >>>>>>>>>>
> >>>>>>>>>>       Kent
> >>>>>>>>>>
> >>>>>>>>>>> -----Original Message-----
> >>>>>>>>>>> From: Kuehling, Felix
> >>>>>>>>>>> Sent: Friday, March 08, 2019 11:16 PM
> >>>>>>>>>>> To: Koenig, Christian <Christian.Koenig@amd.com>; Russell,
> >>>>>>>>>>> Kent <Kent.Russell@amd.com>; amd-
> gfx@lists.freedesktop.org
> >>>>>>>>>>> Subject: RE: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs
> on
> >>>>>> demand
> >>>>>>>>>>> My concerns were related to eviction fence handing. It would
> >>>>>>>>>>> manifest by unnecessary eviction callbacks into KFD that
> >>>>>>>>>>> aren't cause by real evictions. I addressed that with a
> >>>>>>>>>>> previous patch series that removed the need to remove
> >>>>>>>>>>> eviction fences and add them back around page table updates
> >>>>>>>>>>> in
> >>>> amdgpu_amdkfd_gpuvm.c.
> >>>>>>>>>>> I don't know what's going on here. I can probably take a
> >>>>>>>>>>> look on Monday. I haven't considered what changed with
> >>>>>>>>>>> respect to PD updates.
> >>>>>>>>>>>
> >>>>>>>>>>> Kent, can we temporarily revert the offending change in
> >>>>>>>>>>> amd-kfd-staging just to unblock the merge?
> >>>>>>>>>>>
> >>>>>>>>>>> Christian, I think KFD is currently broken on
> >>>>>>>>>>> amd-staging-drm-next.
> >>>>>>>>>>> If we're
> >>>>>>>>>>> serious about supporting KFD upstream, you may also want to
> >>>>>>>>>>> consider reverting your change there for now. Also consider
> >>>>>>>>>>> building the Thunk and kfdtest so you can do quick smoke
> >>>>>>>>>>> tests locally whenever you make amdgpu_vm changes that
> can
> >>>>>>>>>>> affect
> >>>> KFD.
> >>>>>>>>>>> https://github.com/RadeonOpenCompute/ROCT-Thunk-
> Interface
> >>>>>>>>>>>
> >>>>>>>>>>> Regards,
> >>>>>>>>>>>        Felix
> >>>>>>>>>>>
> >>>>>>>>>>> -----Original Message-----
> >>>>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On
> >>>> Behalf
> >>>>>> Of
> >>>>>>>>>>> Christian König
> >>>>>>>>>>> Sent: Friday, March 08, 2019 9:14 AM
> >>>>>>>>>>> To: Russell, Kent <Kent.Russell@amd.com>;
> >>>>>>>>>>> amd-gfx@lists.freedesktop.org
> >>>>>>>>>>> Subject: Re: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs
> on
> >>>>>> demand
> >>>>>>>>>>> My best guess is that we forget somewhere to update the
> PDs.
> >>>>>>>>>>> What hardware is that on?
> >>>>>>>>>>>
> >>>>>>>>>>> Felix already mentioned that this could be problematic for
> >>>>>>>>>>> the KFD.
> >>>>>>>>>>>
> >>>>>>>>>>> Maybe he has an idea,
> >>>>>>>>>>> Christian.
> >>>>>>>>>>>
> >>>>>>>>>>> Am 08.03.19 um 15:04 schrieb Russell, Kent:
> >>>>>>>>>>>> Hi Christian,
> >>>>>>>>>>>>
> >>>>>>>>>>>> This patch ended up causing a VM Fault in KFDTest.
> >>>>>>>>>>>> Reverting just this
> >>>>>>>>>>> patch addressed the issue:
> >>>>>>>>>>>> [   82.703503] amdgpu 0000:0c:00.0: GPU fault detected: 146
> >>>>>>>>>>>> 0x0000480c for
> >>>>>>>>>>> process  pid 0 thread  pid 0
> >>>>>>>>>>>> [   82.703512] amdgpu 0000:0c:00.0:
> >>>>>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x00001000
> >>>>>>>>>>>> [   82.703516] amdgpu 0000:0c:00.0:
> >>>>>>>>>>> VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x1004800C
> >>>>>>>>>>>> [   82.703522] amdgpu 0000:0c:00.0: VM fault (0x0c, vmid 8,
> >>>>>>>>>>>> pasid
> >>>>>>>>>>>> 32769) at
> >>>>>>>>>>> page 4096, read from 'TC0' (0x54433000) (72)
> >>>>>>>>>>>> [   82.703585] Evicting PASID 32769 queues
> >>>>>>>>>>>>
> >>>>>>>>>>>> I am looking into it, but if you have any insight that
> >>>>>>>>>>>> would be great in
> >>>>>>>>>>> helping to resolve it quickly.
> >>>>>>>>>>>>        Kent
> >>>>>>>>>>>>> -----Original Message-----
> >>>>>>>>>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org>
> On
> >>>>>> Behalf
> >>>>>>>> Of
> >>>>>>>>>>>>> Christian König
> >>>>>>>>>>>>> Sent: Tuesday, February 26, 2019 7:47 AM
> >>>>>>>>>>>>> To: amd-gfx@lists.freedesktop.org
> >>>>>>>>>>>>> Subject: [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on
> >>>>>> demand
> >>>>>>>>>>>>> Let's start to allocate VM PDs/PTs on demand instead of
> >>>>>>>>>>>>> pre-allocating them during mapping.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Signed-off-by: Christian König
> <christian.koenig@amd.com>
> >>>>>>>>>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >>>>>>>>>>>>> ---
> >>>>>>>>>>>>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  10
> >>>> +-
> >>>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c |   9 --
> >>>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  10 --
> >>>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 136
> >>>>>>>>>>>>> +++++------------
> >>>>>>>>>>> -
> >>>>>>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 -
> >>>>>>>>>>>>>        5 files changed, 39 insertions(+), 129 deletions(-)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> diff --git
> >>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>>>>>>>>
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>>>>>>>> index 31e3953dcb6e..088e9b6b765b 100644
> >>>>>>>>>>>>> ---
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>>>>>>>> +++
> >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> >>>>>>>>>>>>> @@ -410,15 +410,7 @@ static int add_bo_to_vm(struct
> >>>>>>>> amdgpu_device
> >>>>>>>>>>>>> *adev, struct kgd_mem *mem,
> >>>>>>>>>>>>>            if (p_bo_va_entry)
> >>>>>>>>>>>>>                *p_bo_va_entry = bo_va_entry;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> -    /* Allocate new page tables if needed and validate
> >>>>>>>>>>>>> -     * them.
> >>>>>>>>>>>>> -     */
> >>>>>>>>>>>>> -    ret = amdgpu_vm_alloc_pts(adev, vm, va,
> >>>>>>>>>>>>> amdgpu_bo_size(bo));
> >>>>>>>>>>>>> -    if (ret) {
> >>>>>>>>>>>>> -        pr_err("Failed to allocate pts, err=%d\n", ret);
> >>>>>>>>>>>>> -        goto err_alloc_pts;
> >>>>>>>>>>>>> -    }
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> +    /* Allocate validate page tables if needed */
> >>>>>>>>>>>>>            ret = vm_validate_pt_pd_bos(vm);
> >>>>>>>>>>>>>            if (ret) {
> >>>>>>>>>>>>> pr_err("validate_pt_pd_bos() failed\n"); diff --git
> >>>>>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>>>>>>>> index 7e22be7ca68a..54dd02a898b9 100644
> >>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> >>>>>>>>>>>>> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct
> >>>>>>>> amdgpu_device
> >>>>>>>>>>>>> *adev, struct amdgpu_vm *vm,
> >>>>>>>>>>>>>                return -ENOMEM;
> >>>>>>>>>>>>>            }
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> -    r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm,
> >>>>>>>>>>>>> csa_addr,
> >>>>>>>>>>>>> -                size);
> >>>>>>>>>>>>> -    if (r) {
> >>>>>>>>>>>>> -        DRM_ERROR("failed to allocate pts for static CSA,
> >>>>>>>>>>>>> err=%d\n", r);
> >>>>>>>>>>>>> -        amdgpu_vm_bo_rmv(adev, *bo_va);
> >>>>>>>>>>>>> - ttm_eu_backoff_reservation(&ticket, &list);
> >>>>>>>>>>>>> -        return r;
> >>>>>>>>>>>>> -    }
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>>            r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0,
> >>>>>>>>>>>>> size, AMDGPU_PTE_READABLE |
> AMDGPU_PTE_WRITEABLE
> >>>>>>>>>>>>> |
> >>>>>>>>>>>>> AMDGPU_PTE_EXECUTABLE); diff --git
> >>>>>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>>>>>>>> index 555285e329ed..fcaaac30e84b 100644
> >>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >>>>>>>>>>>>> @@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct
> >>>>>> drm_device
> >>>>>>>>>>> *dev,
> >>>>>>>>>>>>> void *data,
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>            switch (args->operation) {
> >>>>>>>>>>>>>            case AMDGPU_VA_OP_MAP:
> >>>>>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm,
> >>>>>>>>>>>>> args-
> >>>>>>>>>>>>>> va_address,
> >>>>>>>>>>>>> -                    args->map_size);
> >>>>>>>>>>>>> -        if (r)
> >>>>>>>>>>>>> -            goto error_backoff;
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>>                va_flags = amdgpu_gmc_get_pte_flags(adev,
> >>>>>>>>>>>>> args->flags);
> >>>>>>>>>>>>>                r = amdgpu_vm_bo_map(adev, bo_va,
> >>>>>>>>>>>>> args->va_address,
> >>>>>>>>>>>>> args->offset_in_bo, args->map_size,
> >>>>>>>>>>>>> @@
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> 645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct
> drm_device
> >>>>>> *dev,
> >>>>>>>>>>> void
> >>>>>>>>>>>>> *data,
> >>>>>>>>>>>>> args->map_size);
> >>>>>>>>>>>>>                break;
> >>>>>>>>>>>>>            case AMDGPU_VA_OP_REPLACE:
> >>>>>>>>>>>>> -        r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm,
> >>>>>>>>>>>>> args-
> >>>>>>>>>>>>>> va_address,
> >>>>>>>>>>>>> -                    args->map_size);
> >>>>>>>>>>>>> -        if (r)
> >>>>>>>>>>>>> -            goto error_backoff;
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>>                va_flags = amdgpu_gmc_get_pte_flags(adev,
> >>>>>>>>>>>>> args->flags);
> >>>>>>>>>>>>>                r = amdgpu_vm_bo_replace_map(adev, bo_va,
> >>>>>>>>>>>>> args-
> >>>>>>>>>>>>>> va_address,
> >>>>>>>>>>>>> args->offset_in_bo, args-
> >>>>>>>>>>>>>> map_size, diff --git
> >>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>>>>>>>> index 362436f4e856..dfad543fc000 100644
> >>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >>>>>>>>>>>>> @@ -504,47 +504,6 @@ static void
> amdgpu_vm_pt_next(struct
> >>>>>>>>>>>>> amdgpu_device *adev,
> >>>>>>>>>>>>>            }
> >>>>>>>>>>>>>        }
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> -/**
> >>>>>>>>>>>>> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
> >>>>>>>>>>>>> - *
> >>>>>>>>>>>>> - * @adev: amdgpu_device pointer
> >>>>>>>>>>>>> - * @vm: amdgpu_vm structure
> >>>>>>>>>>>>> - * @start: start addr of the walk
> >>>>>>>>>>>>> - * @cursor: state to initialize
> >>>>>>>>>>>>> - *
> >>>>>>>>>>>>> - * Start a walk and go directly to the leaf node.
> >>>>>>>>>>>>> - */
> >>>>>>>>>>>>> -static void amdgpu_vm_pt_first_leaf(struct
> amdgpu_device
> >>>>>> *adev,
> >>>>>>>>>>>>> - struct amdgpu_vm *vm, uint64_t start,
> >>>>>>>>>>>>> -                    struct amdgpu_vm_pt_cursor *cursor)
> >>>>>>>>>>>>> -{
> >>>>>>>>>>>>> -    amdgpu_vm_pt_start(adev, vm, start, cursor);
> >>>>>>>>>>>>> -    while (amdgpu_vm_pt_descendant(adev, cursor)); -}
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -/**
> >>>>>>>>>>>>> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
> >>>>>>>>>>>>> - *
> >>>>>>>>>>>>> - * @adev: amdgpu_device pointer
> >>>>>>>>>>>>> - * @cursor: current state
> >>>>>>>>>>>>> - *
> >>>>>>>>>>>>> - * Walk the PD/PT tree to the next leaf node.
> >>>>>>>>>>>>> - */
> >>>>>>>>>>>>> -static void amdgpu_vm_pt_next_leaf(struct
> amdgpu_device
> >>>>>> *adev,
> >>>>>>>>>>>>> - struct amdgpu_vm_pt_cursor *cursor) -{
> >>>>>>>>>>>>> -    amdgpu_vm_pt_next(adev, cursor);
> >>>>>>>>>>>>> -    if (cursor->pfn != ~0ll)
> >>>>>>>>>>>>> -        while (amdgpu_vm_pt_descendant(adev, cursor)); -}
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -/**
> >>>>>>>>>>>>> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf
> >>>>>>>>>>>>> PDs/PTs in the hierarchy
> >>>>>>>>>>>>> - */
> >>>>>>>>>>>>> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start,
> end,
> >>>>>>>>>>>>> cursor)
> >>>>>>>>>>>>>         \
> >>>>>>>>>>>>> -    for (amdgpu_vm_pt_first_leaf((adev), (vm), (start),
> >>>>>>>>>>>>> &(cursor));
> >>>>>>>>>>>>>             \
> >>>>>>>>>>>>> -         (cursor).pfn <= end;
> >>>>>>>>>>>>> amdgpu_vm_pt_next_leaf((adev),
> >>>>>>>>>>>>> &(cursor)))
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>>        /**
> >>>>>>>>>>>>>         * amdgpu_vm_pt_first_dfs - start a deep first
> >>>>>>>>>>>>> search
> >>>>>>>>>>>>>         *
> >>>>>>>>>>>>> @@ -915,74 +874,51 @@ static void
> >>>> amdgpu_vm_bo_param(struct
> >>>>>>>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
> >>>>>>>>>>>>>         * Returns:
> >>>>>>>>>>>>>         * 0 on success, errno otherwise.
> >>>>>>>>>>>>>         */
> >>>>>>>>>>>>> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>>>>>>>>>> -            struct amdgpu_vm *vm,
> >>>>>>>>>>>>> -            uint64_t saddr, uint64_t size)
> >>>>>>>>>>>>> +static int amdgpu_vm_alloc_pts(struct amdgpu_device
> >>>>>>>>>>>>> +*adev,
> >>>>>>>>>>>>> +                   struct amdgpu_vm *vm,
> >>>>>>>>>>>>> +                   struct amdgpu_vm_pt_cursor *cursor)
> >>>>>>>>>>>>>        {
> >>>>>>>>>>>>> -    struct amdgpu_vm_pt_cursor cursor;
> >>>>>>>>>>>>> +    struct amdgpu_vm_pt *entry = cursor->entry;
> >>>>>>>>>>>>> +    struct amdgpu_bo_param bp;
> >>>>>>>>>>>>>            struct amdgpu_bo *pt;
> >>>>>>>>>>>>> -    uint64_t eaddr;
> >>>>>>>>>>>>>            int r;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> -    /* validate the parameters */
> >>>>>>>>>>>>> -    if (saddr & AMDGPU_GPU_PAGE_MASK || size &
> >>>>>>>>>>>>> AMDGPU_GPU_PAGE_MASK)
> >>>>>>>>>>>>> -        return -EINVAL;
> >>>>>>>>>>>>> +    if (cursor->level < AMDGPU_VM_PTB && !entry-
> >entries)
> >>>>>>>>>>>>> +{
> >>>>>>>>>>>>> +        unsigned num_entries;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> -    eaddr = saddr + size - 1;
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -    saddr /= AMDGPU_GPU_PAGE_SIZE;
> >>>>>>>>>>>>> -    eaddr /= AMDGPU_GPU_PAGE_SIZE;
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -    if (eaddr >= adev->vm_manager.max_pfn) {
> >>>>>>>>>>>>> -        dev_err(adev->dev, "va above limit (0x%08llX >=
> >>>>>>>>>>>>> 0x%08llX)\n",
> >>>>>>>>>>>>> -            eaddr, adev->vm_manager.max_pfn);
> >>>>>>>>>>>>> -        return -EINVAL;
> >>>>>>>>>>>>> +        num_entries = amdgpu_vm_num_entries(adev,
> cursor-
> >>>>>>>>>>>>>> level);
> >>>>>>>>>>>>> +        entry->entries = kvmalloc_array(num_entries,
> >>>>>>>>>>>>> +sizeof(*entry->entries),
> >>>>>>>>>>>>> +                        GFP_KERNEL | __GFP_ZERO);
> >>>>>>>>>>>>> +        if (!entry->entries)
> >>>>>>>>>>>>> +            return -ENOMEM;
> >>>>>>>>>>>>>            }
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> -    for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr,
> >>>>>>>>>>>>> cursor) {
> >>>>>>>>>>>>> -        struct amdgpu_vm_pt *entry = cursor.entry;
> >>>>>>>>>>>>> -        struct amdgpu_bo_param bp;
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -        if (cursor.level < AMDGPU_VM_PTB) {
> >>>>>>>>>>>>> -            unsigned num_entries;
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -            num_entries = amdgpu_vm_num_entries(adev,
> >>>>>>>>>>>>> cursor.level);
> >>>>>>>>>>>>> -            entry->entries = kvmalloc_array(num_entries,
> >>>>>>>>>>>>> - sizeof(*entry-
> >>>>>>>>>>>>>> entries),
> >>>>>>>>>>>>> -                            GFP_KERNEL |
> >>>>>>>>>>>>> -                            __GFP_ZERO);
> >>>>>>>>>>>>> -            if (!entry->entries)
> >>>>>>>>>>>>> -                return -ENOMEM;
> >>>>>>>>>>>>> -        }
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -        if (entry->base.bo)
> >>>>>>>>>>>>> -            continue;
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -        amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -        r = amdgpu_bo_create(adev, &bp, &pt);
> >>>>>>>>>>>>> -        if (r)
> >>>>>>>>>>>>> -            return r;
> >>>>>>>>>>>>> -
> >>>>>>>>>>>>> -        if (vm->use_cpu_for_update) {
> >>>>>>>>>>>>> -            r = amdgpu_bo_kmap(pt, NULL);
> >>>>>>>>>>>>> -            if (r)
> >>>>>>>>>>>>> -                goto error_free_pt;
> >>>>>>>>>>>>> -        }
> >>>>>>>>>>>>> +    if (entry->base.bo)
> >>>>>>>>>>>>> +        return 0;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> -        /* Keep a reference to the root directory to
> >>>>>>>>>>>>> avoid
> >>>>>>>>>>>>> -        * freeing them up in the wrong order.
> >>>>>>>>>>>>> -        */
> >>>>>>>>>>>>> -        pt->parent =
> >>>>>>>>>>>>> amdgpu_bo_ref(cursor.parent->base.bo);
> >>>>>>>>>>>>> +    amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> - amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >>>>>>>>>>>>> +    r = amdgpu_bo_create(adev, &bp, &pt);
> >>>>>>>>>>>>> +    if (r)
> >>>>>>>>>>>>> +        return r;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> -        r = amdgpu_vm_clear_bo(adev, vm, pt);
> >>>>>>>>>>>>> +    if (vm->use_cpu_for_update) {
> >>>>>>>>>>>>> +        r = amdgpu_bo_kmap(pt, NULL);
> >>>>>>>>>>>>>                if (r)
> >>>>>>>>>>>>>                    goto error_free_pt;
> >>>>>>>>>>>>>            }
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +    /* Keep a reference to the root directory to avoid
> >>>>>>>>>>>>> +     * freeing them up in the wrong order.
> >>>>>>>>>>>>> +     */
> >>>>>>>>>>>>> +    pt->parent = amdgpu_bo_ref(cursor->parent-
> >base.bo);
> >>>>>>>>>>>>> + amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>> +    r = amdgpu_vm_clear_bo(adev, vm, pt);
> >>>>>>>>>>>>> +    if (r)
> >>>>>>>>>>>>> +        goto error_free_pt;
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>>            return 0;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>        error_free_pt:
> >>>>>>>>>>>>> @@ -1627,6 +1563,7 @@ static int
> >>>>>> amdgpu_vm_update_ptes(struct
> >>>>>>>>>>>>> amdgpu_pte_update_params *params,
> >>>>>>>>>>>>>            struct amdgpu_vm_pt_cursor cursor;
> >>>>>>>>>>>>>            uint64_t frag_start = start, frag_end;
> >>>>>>>>>>>>>            unsigned int frag;
> >>>>>>>>>>>>> +    int r;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>            /* figure out the initial fragment */
> >>>>>>>>>>>>>            amdgpu_vm_fragment(params, frag_start, end,
> >>>>>>>>>>>>> flags, &frag, &frag_end); @@ -1634,12 +1571,15 @@ static
> >>>>>>>>>>>>> int amdgpu_vm_update_ptes(struct
> amdgpu_pte_update_params
> >>>>>>>> *params,
> >>>>>>>>>>>>>            /* walk over the address space and update the
> >>>>>>>>>>>>> PTs */
> >>>>>>>>>>>>>            amdgpu_vm_pt_start(adev, params->vm, start,
> >>>>>>>>>>>>> &cursor);
> >>>>>>>>>>>>>            while (cursor.pfn < end) {
> >>>>>>>>>>>>> -        struct amdgpu_bo *pt = cursor.entry->base.bo;
> >>>>>>>>>>>>>                unsigned shift, parent_shift, mask;
> >>>>>>>>>>>>>                uint64_t incr, entry_end, pe_start;
> >>>>>>>>>>>>> +        struct amdgpu_bo *pt;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> -        if (!pt)
> >>>>>>>>>>>>> -            return -ENOENT;
> >>>>>>>>>>>>> +        r = amdgpu_vm_alloc_pts(params->adev, params-
> >vm,
> >>>>>>>>>>>>> &cursor);
> >>>>>>>>>>>>> +        if (r)
> >>>>>>>>>>>>> +            return r;
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>> +        pt = cursor.entry->base.bo;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>                /* The root level can't be a huge page */
> >>>>>>>>>>>>>                if (cursor.level ==
> >>>>>>>>>>>>> adev->vm_manager.root_level)
> >>>>>>>>>>>>> { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>>>>>>>> index 81ff8177f092..116605c038d2 100644
> >>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> >>>>>>>>>>>>> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct
> >>>> amdgpu_vm
> >>>>>>>> *vm);
> >>>>>>>>>>> int
> >>>>>>>>>>>>> amdgpu_vm_validate_pt_bos(struct amdgpu_device
> *adev,
> >>>> struct
> >>>>>>>>>>>>> amdgpu_vm *vm,
> >>>>>>>>>>>>>                          int (*callback)(void *p, struct
> >>>>>>>>>>>>> amdgpu_bo *bo),
> >>>>>>>>>>>>>                          void *param); -int
> >>>>>>>>>>>>> amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> >>>>>>>>>>>>> -            struct amdgpu_vm *vm,
> >>>>>>>>>>>>> -            uint64_t saddr, uint64_t size);
> >>>>>>>>>>>>>        int amdgpu_vm_flush(struct amdgpu_ring *ring,
> >>>>>>>>>>>>> struct amdgpu_job *job, bool need_pipe_sync); int
> >>>>>>>>>>>>> amdgpu_vm_update_directories(struct
> >>>>>>>>>>>>> amdgpu_device *adev,
> >>>>>>>>>>>>>                         struct amdgpu_vm *vm);
> >>>>>>>>>>>>> --
> >>>>>>>>>>>>> 2.17.1
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> _______________________________________________
> >>>>>>>>>>>>> amd-gfx mailing list
> >>>>>>>>>>>>> amd-gfx@lists.freedesktop.org
> >>>>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>>>>>>>>> _______________________________________________
> >>>>>>>>>>> amd-gfx mailing list
> >>>>>>>>>>> amd-gfx@lists.freedesktop.org
> >>>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>>>>>>>> _______________________________________________
> >>>>>>>>>> amd-gfx mailing list
> >>>>>>>>>> amd-gfx@lists.freedesktop.org
> >>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>>> _______________________________________________
> >>>>> amd-gfx mailing list
> >>>>> amd-gfx@lists.freedesktop.org
> >>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>> _______________________________________________
> >>> amd-gfx mailing list
> >>> amd-gfx@lists.freedesktop.org
> >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2019-03-13 10:52 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-26 12:46 [PATCH 1/6] drm/amdgpu: rework shadow handling during PD clear v2 Christian König
     [not found] ` <20190226124658.25334-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-02-26 12:46   ` [PATCH 2/6] drm/amdgpu: let amdgpu_vm_clear_bo figure out ats status v2 Christian König
     [not found]     ` <20190226124658.25334-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-02-27 19:25       ` Zeng, Oak
2019-02-26 12:46   ` [PATCH 3/6] drm/amdgpu: allocate VM PDs/PTs on demand Christian König
     [not found]     ` <20190226124658.25334-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-03-08 14:04       ` Russell, Kent
     [not found]         ` <BN6PR12MB1618B9E60AC6E9B323042F1D854D0-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-03-08 14:14           ` Christian König
     [not found]             ` <2bd27a3b-9f96-b2e4-5070-3413a14e9c7f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-03-08 14:17               ` Russell, Kent
2019-03-09  4:15               ` Kuehling, Felix
     [not found]                 ` <BYAPR12MB3176F9AA995D5C7DFAD5ABC1924E0-ZGDeBxoHBPks/z/azo2A2AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-03-11  9:49                   ` Russell, Kent
     [not found]                     ` <BN6PR12MB16181059CE65EF869EB044A985480-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-03-11 16:55                       ` Christian König
     [not found]                         ` <b0383667-53ef-31c5-5a73-270bc90ab8c8-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-03-12 13:13                           ` Christian König
     [not found]                             ` <6b1cfc95-fde8-e25f-121e-cbb03592ec3a-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-03-12 14:47                               ` Russell, Kent
     [not found]                                 ` <CY4PR12MB16224445596C88A29C3AE63D85490-rpdhrqHFk05g4+I42y6h/AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-03-12 14:49                                   ` Koenig, Christian
     [not found]                                     ` <5b125e82-e106-bc60-b8a2-37161aac4260-5C7GfCeVMHo@public.gmane.org>
2019-03-12 15:01                                       ` Russell, Kent
     [not found]                                         ` <CY4PR12MB162238C544AF46475778E18985490-rpdhrqHFk05g4+I42y6h/AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-03-12 15:09                                           ` Christian König
     [not found]                                             ` <307878c8-73eb-598f-f40a-7cf285b0b60b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-03-12 15:12                                               ` Russell, Kent
     [not found]                                                 ` <CY4PR12MB1622A73D027D2AC53AF2A2B985490-rpdhrqHFk05g4+I42y6h/AdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-03-12 21:13                                                   ` Yang, Philip
     [not found]                                                     ` <01deb600-d1b9-8875-ee73-7796702792d5-5C7GfCeVMHo@public.gmane.org>
2019-03-12 21:19                                                       ` Kuehling, Felix
     [not found]                                                         ` <baf94765-e2a2-6e2f-7e6d-69575a2cdbd2-5C7GfCeVMHo@public.gmane.org>
2019-03-12 23:30                                                           ` Kuehling, Felix
     [not found]                                                             ` <3e185bb0-ec43-4784-2130-084d46167605-5C7GfCeVMHo@public.gmane.org>
2019-03-13 10:52                                                               ` Russell, Kent
2019-03-12 19:02                           ` Kuehling, Felix
     [not found]                             ` <9ad4e7aa-94e4-1d81-6da0-ce9050c4ca0a-5C7GfCeVMHo@public.gmane.org>
2019-03-12 20:25                               ` Kuehling, Felix
2019-02-26 12:46   ` [PATCH 4/6] drm/amdgpu: free " Christian König
2019-02-26 12:46   ` [PATCH 5/6] drm/amdgpu: drop the huge page flag Christian König
2019-02-26 12:46   ` [PATCH 6/6] drm/amdgpu: allow huge invalid mappings on GMC8 Christian König
2019-02-27 11:49   ` [PATCH 1/6] drm/amdgpu: rework shadow handling during PD clear v2 Huang, Ray

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.