[PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
@ 2019-06-28 12:18 Christian König
       [not found] ` <20190628121812.1400-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2019-06-28 12:18 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Felix.Kuehling-5C7GfCeVMHo

This allows us to update page tables directly while in a page fault.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  5 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 29 +++++++++++++--------
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 489a162ca620..5941accea061 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -197,6 +197,11 @@ struct amdgpu_vm_update_params {
 	 */
 	struct amdgpu_vm *vm;
 
+	/**
+	 * @direct: if changes should be made directly
+	 */
+	bool direct;
+
 	/**
 	 * @pages_addr:
 	 *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 5222d165abfc..f94e4896079c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -49,6 +49,10 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
 {
 	int r;
 
+	/* Don't wait for anything during page fault */
+	if (p->direct)
+		return 0;
+
 	/* Wait for PT BOs to be idle. PTs share the same resv. object
 	 * as the root PD BO
 	 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index ddd181f5ed37..891d597063cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -68,17 +68,17 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
 	if (r)
 		return r;
 
-	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
-	if (r)
-		return r;
+	p->num_dw_left = ndw;
+
+	if (p->direct)
+		return 0;
 
-	r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
-			     owner, false);
+	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
 	if (r)
 		return r;
 
-	p->num_dw_left = ndw;
-	return 0;
+	return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
+				owner, false);
 }
 
 /**
@@ -99,13 +99,21 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 	struct dma_fence *f;
 	int r;
 
-	ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
+	if (p->direct)
+		ring = p->adev->vm_manager.page_fault;
+	else
+		ring = container_of(p->vm->entity.rq->sched,
+				    struct amdgpu_ring, sched);
 
 	WARN_ON(ib->length_dw == 0);
 	amdgpu_ring_pad_ib(ring, ib);
 	WARN_ON(ib->length_dw > p->num_dw_left);
-	r = amdgpu_job_submit(p->job, &p->vm->entity,
-			      AMDGPU_FENCE_OWNER_VM, &f);
+
+	if (p->direct)
+		r = amdgpu_job_submit_direct(p->job, ring, &f);
+	else
+		r = amdgpu_job_submit(p->job, &p->vm->entity,
+				      AMDGPU_FENCE_OWNER_VM, &f);
 	if (r)
 		goto error;
 
@@ -120,7 +128,6 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 	return r;
 }
 
-
 /**
  * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
  *
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/5] drm/amdgpu: allow direct submission of PDE updates.
       [not found] ` <20190628121812.1400-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-06-28 12:18   ` Christian König
  2019-06-28 12:18   ` [PATCH 3/5] drm/amdgpu: allow direct submission of PTE updates Christian König
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2019-06-28 12:18 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Felix.Kuehling-5C7GfCeVMHo

For handling PDE updates directly in the fault handler.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c          | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           | 8 +++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h           | 4 ++--
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 10abae398e51..9954f4ec44b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -348,7 +348,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 	int ret;
 
-	ret = amdgpu_vm_update_directories(adev, vm);
+	ret = amdgpu_vm_update_pdes(adev, vm, false);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c25e1ebc76c3..3fd7730612d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -913,7 +913,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_vm_update_directories(adev, vm);
+	r = amdgpu_vm_update_pdes(adev, vm, false);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index ed25a4e14404..579957e3ea67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -522,7 +522,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 			goto error;
 	}
 
-	r = amdgpu_vm_update_directories(adev, vm);
+	r = amdgpu_vm_update_pdes(adev, vm, false);
 
 error:
 	if (r && r != -ERESTARTSYS)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1951f2abbdbc..5bd0408e4e56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1221,18 +1221,19 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
 }
 
 /*
- * amdgpu_vm_update_directories - make sure that all directories are valid
+ * amdgpu_vm_update_ - make sure that all directories are valid
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @direct: submit directly to the paging queue
  *
  * Makes sure all directories are up to date.
  *
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
-				 struct amdgpu_vm *vm)
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm, bool direct)
 {
 	struct amdgpu_vm_update_params params;
 	int r;
@@ -1243,6 +1244,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
+	params.direct = direct;
 
 	r = vm->update_funcs->prepare(&params, AMDGPU_FENCE_OWNER_VM, NULL);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 5941accea061..7227c8da1a2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -361,8 +361,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
-				 struct amdgpu_vm *vm);
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm, bool direct);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/5] drm/amdgpu: allow direct submission of PTE updates
       [not found] ` <20190628121812.1400-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-06-28 12:18   ` [PATCH 2/5] drm/amdgpu: allow direct submission of PDE updates Christian König
@ 2019-06-28 12:18   ` Christian König
  2019-06-28 12:18   ` [PATCH 4/5] drm/amdgpu: allow direct submission of clears Christian König
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2019-06-28 12:18 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Felix.Kuehling-5C7GfCeVMHo

For handling PTE updates directly in the fault handler.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5bd0408e4e56..a8ea7bf18cf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1486,13 +1486,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
  * @adev: amdgpu_device pointer
- * @exclusive: fence we need to sync to
- * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
+ * @direct: direct submission in a page fault
+ * @exclusive: fence we need to sync to
  * @start: start of mapped range
  * @last: last mapped entry
  * @flags: flags for the entries
  * @addr: addr to set the area to
+ * @pages_addr: DMA addresses to use for mapping
  * @fence: optional resulting fence
  *
  * Fill in the page table entries between @start and @last.
@@ -1501,11 +1502,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+				       struct amdgpu_vm *vm, bool direct,
 				       struct dma_fence *exclusive,
-				       dma_addr_t *pages_addr,
-				       struct amdgpu_vm *vm,
 				       uint64_t start, uint64_t last,
 				       uint64_t flags, uint64_t addr,
+				       dma_addr_t *pages_addr,
 				       struct dma_fence **fence)
 {
 	struct amdgpu_vm_update_params params;
@@ -1515,6 +1516,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
+	params.direct = direct;
 	params.pages_addr = pages_addr;
 
 	/* sync to everything except eviction fences on unmapping */
@@ -1646,9 +1648,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 
 		last = min((uint64_t)mapping->last, start + max_entries - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, vm, false, exclusive,
 						start, last, flags, addr,
-						fence);
+						dma_addr, fence);
 		if (r)
 			return r;
 
@@ -1942,9 +1944,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		    mapping->start < AMDGPU_GMC_HOLE_START)
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
-		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL,
 						mapping->start, mapping->last,
-						init_pte_value, 0, &f);
+						init_pte_value, 0, NULL, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
 		if (r) {
 			dma_fence_put(f);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/5] drm/amdgpu: allow direct submission of clears
       [not found] ` <20190628121812.1400-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2019-06-28 12:18   ` [PATCH 2/5] drm/amdgpu: allow direct submission of PDE updates Christian König
  2019-06-28 12:18   ` [PATCH 3/5] drm/amdgpu: allow direct submission of PTE updates Christian König
@ 2019-06-28 12:18   ` Christian König
  2019-06-28 12:18   ` [PATCH 5/5] drm/amdgpu: add graceful VM fault handling Christian König
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2019-06-28 12:18 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Felix.Kuehling-5C7GfCeVMHo

For handling PD/PT clears directly in the fault handler.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a8ea7bf18cf6..a2d18fe46691 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -695,6 +695,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  * @adev: amdgpu_device pointer
  * @vm: VM to clear BO from
  * @bo: BO to clear
+ * @direct: use a direct update
  *
  * Root PD needs to be reserved when calling this.
  *
@@ -703,7 +704,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
  */
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 			      struct amdgpu_vm *vm,
-			      struct amdgpu_bo *bo)
+			      struct amdgpu_bo *bo,
+			      bool direct)
 {
 	struct ttm_operation_ctx ctx = { true, false };
 	unsigned level = adev->vm_manager.root_level;
@@ -762,6 +764,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
+	params.direct = direct;
 
 	r = vm->update_funcs->prepare(&params, AMDGPU_FENCE_OWNER_KFD, NULL);
 	if (r)
@@ -852,7 +855,8 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  */
 static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			       struct amdgpu_vm *vm,
-			       struct amdgpu_vm_pt_cursor *cursor)
+			       struct amdgpu_vm_pt_cursor *cursor,
+			       bool direct)
 {
 	struct amdgpu_vm_pt *entry = cursor->entry;
 	struct amdgpu_bo_param bp;
@@ -885,7 +889,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
 	amdgpu_vm_bo_base_init(&entry->base, vm, pt);
 
-	r = amdgpu_vm_clear_bo(adev, vm, pt);
+	r = amdgpu_vm_clear_bo(adev, vm, pt, direct);
 	if (r)
 		goto error_free_pt;
 
@@ -1395,7 +1399,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 		uint64_t incr, entry_end, pe_start;
 		struct amdgpu_bo *pt;
 
-		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
+		r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor,
+					params->direct);
 		if (r)
 			return r;
 
@@ -2733,7 +2738,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
 
-	r = amdgpu_vm_clear_bo(adev, vm, root);
+	r = amdgpu_vm_clear_bo(adev, vm, root, false);
 	if (r)
 		goto error_unreserve;
 
@@ -2853,7 +2858,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 	 */
 	if (pte_support_ats != vm->pte_support_ats) {
 		vm->pte_support_ats = pte_support_ats;
-		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo);
+		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false);
 		if (r)
 			goto free_idr;
 	}
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/5] drm/amdgpu: add graceful VM fault handling
       [not found] ` <20190628121812.1400-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2019-06-28 12:18   ` [PATCH 4/5] drm/amdgpu: allow direct submission of clears Christian König
@ 2019-06-28 12:18   ` Christian König
  2019-06-28 14:30   ` [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends Chunming Zhou
  2019-07-02 19:35   ` Kuehling, Felix
  5 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2019-06-28 12:18 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Felix.Kuehling-5C7GfCeVMHo

Next step towards HMM support. For now just silence the retry fault and
optionally redirect the request to the dummy page.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 59 ++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++
 3 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a2d18fe46691..6f8c7cc07e8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3122,3 +3122,62 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
 		}
 	}
 }
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+			    uint64_t addr)
+{
+	struct amdgpu_ring *ring = &adev->sdma.instance[0].page;
+	uint64_t value, flags;
+	struct amdgpu_vm *vm;
+	long r;
+
+	if (!ring->sched.ready)
+		return false;
+
+	spin_lock(&adev->vm_manager.pasid_lock);
+	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	spin_unlock(&adev->vm_manager.pasid_lock);
+
+	if (!vm)
+		return false;
+
+	r = amdgpu_bo_reserve(vm->root.base.bo, true);
+	if (r)
+		return false;
+
+	addr /= AMDGPU_GPU_PAGE_SIZE;
+	flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+		AMDGPU_PTE_SYSTEM;
+
+	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+		/* Redirect the access to the dummy page */
+		value = adev->dummy_page_addr;
+		flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+			AMDGPU_PTE_WRITEABLE;
+	} else {
+		value = 0;
+	}
+
+	r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
+					flags, value, NULL, NULL);
+	if (r)
+		goto error;
+
+	r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error:
+	amdgpu_bo_unreserve(vm->root.base.bo);
+	if (r < 0)
+		DRM_ERROR("Can't handle page fault (%ld)\n", r);
+
+	return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 7227c8da1a2a..d0a30614f70a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -408,6 +408,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
 
 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
 			     struct amdgpu_task_info *task_info);
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+			    uint64_t addr);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index bd5d36944481..faf6d64cd4d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -324,6 +324,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 		return 1; /* This also prevents sending it to KFD */
 
 	/* If it's the first fault for this address, process it normally */
+	if (retry_fault && !in_interrupt() &&
+	    amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+		return 1; /* This also prevents sending it to KFD */
+
 	if (!amdgpu_sriov_vf(adev)) {
 		status = RREG32(hub->vm_l2_pro_fault_status);
 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found] ` <20190628121812.1400-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2019-06-28 12:18   ` [PATCH 5/5] drm/amdgpu: add graceful VM fault handling Christian König
@ 2019-06-28 14:30   ` Chunming Zhou
       [not found]     ` <58a7ff4f-47f2-42a6-f9af-ca28726535bf-5C7GfCeVMHo@public.gmane.org>
  2019-07-02 19:35   ` Kuehling, Felix
  5 siblings, 1 reply; 15+ messages in thread
From: Chunming Zhou @ 2019-06-28 14:30 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kuehling, Felix


在 2019/6/28 20:18, Christian König 写道:
> This allows us to update page tables directly while in a page fault.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  5 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  4 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 29 +++++++++++++--------
>   3 files changed, 27 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 489a162ca620..5941accea061 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -197,6 +197,11 @@ struct amdgpu_vm_update_params {
>   	 */
>   	struct amdgpu_vm *vm;
>   
> +	/**
> +	 * @direct: if changes should be made directly
> +	 */
> +	bool direct;
> +
>   	/**
>   	 * @pages_addr:
>   	 *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> index 5222d165abfc..f94e4896079c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> @@ -49,6 +49,10 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
>   {
>   	int r;
>   
> +	/* Don't wait for anything during page fault */
> +	if (p->direct)
> +		return 0;
> +
>   	/* Wait for PT BOs to be idle. PTs share the same resv. object
>   	 * as the root PD BO
>   	 */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index ddd181f5ed37..891d597063cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -68,17 +68,17 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
>   	if (r)
>   		return r;
>   
> -	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
> -	if (r)
> -		return r;
> +	p->num_dw_left = ndw;
> +
> +	if (p->direct)
> +		return 0;
>   
> -	r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
> -			     owner, false);
> +	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>   	if (r)
>   		return r;
>   
> -	p->num_dw_left = ndw;
> -	return 0;
> +	return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
> +				owner, false);
>   }
>   
>   /**
> @@ -99,13 +99,21 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>   	struct dma_fence *f;
>   	int r;
>   
> -	ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
> +	if (p->direct)
> +		ring = p->adev->vm_manager.page_fault;
> +	else
> +		ring = container_of(p->vm->entity.rq->sched,
> +				    struct amdgpu_ring, sched);
>   
>   	WARN_ON(ib->length_dw == 0);
>   	amdgpu_ring_pad_ib(ring, ib);
>   	WARN_ON(ib->length_dw > p->num_dw_left);
> -	r = amdgpu_job_submit(p->job, &p->vm->entity,
> -			      AMDGPU_FENCE_OWNER_VM, &f);
> +
> +	if (p->direct)
> +		r = amdgpu_job_submit_direct(p->job, ring, &f);

When we use direct submission after intialization, we need to take care 
of ring race condision, don't we? Am I missing anything?


-David

> +	else
> +		r = amdgpu_job_submit(p->job, &p->vm->entity,
> +				      AMDGPU_FENCE_OWNER_VM, &f);
>   	if (r)
>   		goto error;
>   
> @@ -120,7 +128,6 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>   	return r;
>   }
>   
> -
>   /**
>    * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
>    *
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found]     ` <58a7ff4f-47f2-42a6-f9af-ca28726535bf-5C7GfCeVMHo@public.gmane.org>
@ 2019-06-28 15:26       ` Christian König
  0 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2019-06-28 15:26 UTC (permalink / raw)
  To: Chunming Zhou, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Kuehling, Felix

Am 28.06.19 um 16:30 schrieb Chunming Zhou:
> 在 2019/6/28 20:18, Christian König 写道:
>> This allows us to update page tables directly while in a page fault.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  5 ++++
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  4 +++
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 29 +++++++++++++--------
>>    3 files changed, 27 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 489a162ca620..5941accea061 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -197,6 +197,11 @@ struct amdgpu_vm_update_params {
>>    	 */
>>    	struct amdgpu_vm *vm;
>>    
>> +	/**
>> +	 * @direct: if changes should be made directly
>> +	 */
>> +	bool direct;
>> +
>>    	/**
>>    	 * @pages_addr:
>>    	 *
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>> index 5222d165abfc..f94e4896079c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>> @@ -49,6 +49,10 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
>>    {
>>    	int r;
>>    
>> +	/* Don't wait for anything during page fault */
>> +	if (p->direct)
>> +		return 0;
>> +
>>    	/* Wait for PT BOs to be idle. PTs share the same resv. object
>>    	 * as the root PD BO
>>    	 */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> index ddd181f5ed37..891d597063cb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> @@ -68,17 +68,17 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
>>    	if (r)
>>    		return r;
>>    
>> -	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>> -	if (r)
>> -		return r;
>> +	p->num_dw_left = ndw;
>> +
>> +	if (p->direct)
>> +		return 0;
>>    
>> -	r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>> -			     owner, false);
>> +	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>>    	if (r)
>>    		return r;
>>    
>> -	p->num_dw_left = ndw;
>> -	return 0;
>> +	return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>> +				owner, false);
>>    }
>>    
>>    /**
>> @@ -99,13 +99,21 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>>    	struct dma_fence *f;
>>    	int r;
>>    
>> -	ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
>> +	if (p->direct)
>> +		ring = p->adev->vm_manager.page_fault;
>> +	else
>> +		ring = container_of(p->vm->entity.rq->sched,
>> +				    struct amdgpu_ring, sched);
>>    
>>    	WARN_ON(ib->length_dw == 0);
>>    	amdgpu_ring_pad_ib(ring, ib);
>>    	WARN_ON(ib->length_dw > p->num_dw_left);
>> -	r = amdgpu_job_submit(p->job, &p->vm->entity,
>> -			      AMDGPU_FENCE_OWNER_VM, &f);
>> +
>> +	if (p->direct)
>> +		r = amdgpu_job_submit_direct(p->job, ring, &f);
> When we use direct submission after intialization, we need to take care
> of ring race condision, don't we? Am I missing anything?

Direct submission can only used by the page fault worker thread. And at 
least for now there is exactly one worker thread.

Christian.

>
>
> -David
>
>> +	else
>> +		r = amdgpu_job_submit(p->job, &p->vm->entity,
>> +				      AMDGPU_FENCE_OWNER_VM, &f);
>>    	if (r)
>>    		goto error;
>>    
>> @@ -120,7 +128,6 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>>    	return r;
>>    }
>>    
>> -
>>    /**
>>     * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
>>     *

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found] ` <20190628121812.1400-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2019-06-28 14:30   ` [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends Chunming Zhou
@ 2019-07-02 19:35   ` Kuehling, Felix
       [not found]     ` <a6e1e599-d972-1dc0-e0e7-5201895dee35-5C7GfCeVMHo@public.gmane.org>
  5 siblings, 1 reply; 15+ messages in thread
From: Kuehling, Felix @ 2019-07-02 19:35 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This assumes that page tables are resident when a page fault is handled. 
I think it's possible that a page table gets evicted while a page fault 
is pending. Maybe not with graphics, but definitely with compute where 
waves can be preempted while waiting for a page fault. In that case the 
direct access would break.

Even with graphics I think it's still possible that new page tables need 
to be allocated to handle a page fault. When that happens, you need to 
wait for fences to let new page tables be validated and initialized.

Patch #2 deals with updating page directories. That pretty much implies 
that page tables have moved or been reallocated. Wouldn't that be a 
counter-indication for using direct page table updates? In other words, 
if you need to update page directories, a direct page table updates is 
unsafe by definition.

Regards,
   Felix

On 2019-06-28 8:18 a.m., Christian König wrote:
> This allows us to update page tables directly while in a page fault.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  5 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  4 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 29 +++++++++++++--------
>   3 files changed, 27 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 489a162ca620..5941accea061 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -197,6 +197,11 @@ struct amdgpu_vm_update_params {
>   	 */
>   	struct amdgpu_vm *vm;
>   
> +	/**
> +	 * @direct: if changes should be made directly
> +	 */
> +	bool direct;
> +
>   	/**
>   	 * @pages_addr:
>   	 *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> index 5222d165abfc..f94e4896079c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> @@ -49,6 +49,10 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
>   {
>   	int r;
>   
> +	/* Don't wait for anything during page fault */
> +	if (p->direct)
> +		return 0;
> +
>   	/* Wait for PT BOs to be idle. PTs share the same resv. object
>   	 * as the root PD BO
>   	 */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index ddd181f5ed37..891d597063cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -68,17 +68,17 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
>   	if (r)
>   		return r;
>   
> -	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
> -	if (r)
> -		return r;
> +	p->num_dw_left = ndw;
> +
> +	if (p->direct)
> +		return 0;
>   
> -	r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
> -			     owner, false);
> +	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>   	if (r)
>   		return r;
>   
> -	p->num_dw_left = ndw;
> -	return 0;
> +	return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
> +				owner, false);
>   }
>   
>   /**
> @@ -99,13 +99,21 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>   	struct dma_fence *f;
>   	int r;
>   
> -	ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
> +	if (p->direct)
> +		ring = p->adev->vm_manager.page_fault;
> +	else
> +		ring = container_of(p->vm->entity.rq->sched,
> +				    struct amdgpu_ring, sched);
>   
>   	WARN_ON(ib->length_dw == 0);
>   	amdgpu_ring_pad_ib(ring, ib);
>   	WARN_ON(ib->length_dw > p->num_dw_left);
> -	r = amdgpu_job_submit(p->job, &p->vm->entity,
> -			      AMDGPU_FENCE_OWNER_VM, &f);
> +
> +	if (p->direct)
> +		r = amdgpu_job_submit_direct(p->job, ring, &f);
> +	else
> +		r = amdgpu_job_submit(p->job, &p->vm->entity,
> +				      AMDGPU_FENCE_OWNER_VM, &f);
>   	if (r)
>   		goto error;
>   
> @@ -120,7 +128,6 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>   	return r;
>   }
>   
> -
>   /**
>    * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
>    *
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found]     ` <a6e1e599-d972-1dc0-e0e7-5201895dee35-5C7GfCeVMHo@public.gmane.org>
@ 2019-07-16 13:36       ` Christian König
       [not found]         ` <0ad76267-d76d-c2fc-0397-7a696cc9926c-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2019-07-16 13:36 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 02.07.19 um 21:35 schrieb Kuehling, Felix:
> This assumes that page tables are resident when a page fault is handled.

Yeah, that is correct. I also haven't completely figured out how we can 
prevent the VM from being destroyed while handling the fault.

I mean it's perfectly possible that the process is killed while faults 
are still in the pipeline.

> I think it's possible that a page table gets evicted while a page fault
> is pending. Maybe not with graphics, but definitely with compute where
> waves can be preempted while waiting for a page fault. In that case the
> direct access would break.
>
> Even with graphics I think it's still possible that new page tables need
> to be allocated to handle a page fault. When that happens, you need to
> wait for fences to let new page tables be validated and initialized.

Yeah, the problem here is that when you wait on fences which in turn 
depend on your submission your end up in a deadlock.

> Patch #2 deals with updating page directories. That pretty much implies
> that page tables have moved or been reallocated. Wouldn't that be a
> counter-indication for using direct page table updates? In other words,
> if you need to update page directories, a direct page table updates is
> unsafe by definition.

No, page tables are allocated because this is for handling invalid faults.

E.g. we get a fault for an address where nothing is mapped and just want 
to silence it.

I will try to implement something to at least avoid accessing a 
destructed VM.

Regards,
Christian.

>
> Regards,
>     Felix
>
> On 2019-06-28 8:18 a.m., Christian König wrote:
>> This allows us to update page tables directly while in a page fault.
>>
>> Signed-off-by: Christian König<christian.koenig@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  5 ++++
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  4 +++
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 29 +++++++++++++--------
>>    3 files changed, 27 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 489a162ca620..5941accea061 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -197,6 +197,11 @@ struct amdgpu_vm_update_params {
>>    	 */
>>    	struct amdgpu_vm *vm;
>>    
>> +	/**
>> +	 * @direct: if changes should be made directly
>> +	 */
>> +	bool direct;
>> +
>>    	/**
>>    	 * @pages_addr:
>>    	 *
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>> index 5222d165abfc..f94e4896079c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>> @@ -49,6 +49,10 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
>>    {
>>    	int r;
>>    
>> +	/* Don't wait for anything during page fault */
>> +	if (p->direct)
>> +		return 0;
>> +
>>    	/* Wait for PT BOs to be idle. PTs share the same resv. object
>>    	 * as the root PD BO
>>    	 */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> index ddd181f5ed37..891d597063cb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>> @@ -68,17 +68,17 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
>>    	if (r)
>>    		return r;
>>    
>> -	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>> -	if (r)
>> -		return r;
>> +	p->num_dw_left = ndw;
>> +
>> +	if (p->direct)
>> +		return 0;
>>    
>> -	r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>> -			     owner, false);
>> +	r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>>    	if (r)
>>    		return r;
>>    
>> -	p->num_dw_left = ndw;
>> -	return 0;
>> +	return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>> +				owner, false);
>>    }
>>    
>>    /**
>> @@ -99,13 +99,21 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>>    	struct dma_fence *f;
>>    	int r;
>>    
>> -	ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
>> +	if (p->direct)
>> +		ring = p->adev->vm_manager.page_fault;
>> +	else
>> +		ring = container_of(p->vm->entity.rq->sched,
>> +				    struct amdgpu_ring, sched);
>>    
>>    	WARN_ON(ib->length_dw == 0);
>>    	amdgpu_ring_pad_ib(ring, ib);
>>    	WARN_ON(ib->length_dw > p->num_dw_left);
>> -	r = amdgpu_job_submit(p->job, &p->vm->entity,
>> -			      AMDGPU_FENCE_OWNER_VM, &f);
>> +
>> +	if (p->direct)
>> +		r = amdgpu_job_submit_direct(p->job, ring, &f);
>> +	else
>> +		r = amdgpu_job_submit(p->job, &p->vm->entity,
>> +				      AMDGPU_FENCE_OWNER_VM, &f);
>>    	if (r)
>>    		goto error;
>>    
>> @@ -120,7 +128,6 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>>    	return r;
>>    }
>>    
>> -
>>    /**
>>     * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
>>     *

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found]         ` <0ad76267-d76d-c2fc-0397-7a696cc9926c-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-07-16 16:40           ` Kuehling, Felix
       [not found]             ` <18ebb588-efba-2c5c-3f0b-05ef9e3542ad-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Kuehling, Felix @ 2019-07-16 16:40 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2019-07-16 9:36 a.m., Christian König wrote:
> Am 02.07.19 um 21:35 schrieb Kuehling, Felix:
>> This assumes that page tables are resident when a page fault is handled.
>
> Yeah, that is correct. I also haven't completely figured out how we 
> can prevent the VM from being destroyed while handling the fault.

There are other cases I had in mind: Page tables can be evicted. For KFD 
processes which can be preempted with CWSR, it's possible that a wave 
that caused a page fault is preempted due to a page table eviction. That 
means, by the time the page fault is handled, the page table is no 
longer resident.


>
> I mean it's perfectly possible that the process is killed while faults 
> are still in the pipeline.
>
>> I think it's possible that a page table gets evicted while a page fault
>> is pending. Maybe not with graphics, but definitely with compute where
>> waves can be preempted while waiting for a page fault. In that case the
>> direct access would break.
>>
>> Even with graphics I think it's still possible that new page tables need
>> to be allocated to handle a page fault. When that happens, you need to
>> wait for fences to let new page tables be validated and initialized.
>
> Yeah, the problem here is that when you wait on fences which in turn 
> depend on your submission your end up in a deadlock.
>
I think this implies that you have amdgpu_cs fences attached to page 
tables. I believe this is the fundamental issue that needs to be fixed. 
If you want to manage page tables in page fault interrupt handlers, you 
can't have command submission fences attached to your page tables. You 
can allow page tables to be evicted while the command submission is in 
progress. A page fault will fault it back in if it's needed. If you 
eliminate command submission fences on the page tables, you remove the 
potential for deadlocks.

But you do need fences on page tables related to the allocation and 
migration of page tables themselves. And your page table updates must 
wait for those fences. Therefore I think the whole approach of direct 
submission for page table updates is fundamentally broken.


>> Patch #2 deals with updating page directories. That pretty much implies
>> that page tables have moved or been reallocated. Wouldn't that be a
>> counter-indication for using direct page table updates? In other words,
>> if you need to update page directories, a direct page table updates is
>> unsafe by definition.
>
> No, page tables are allocated because this is for handling invalid 
> faults.
>
> E.g. we get a fault for an address where nothing is mapped and just 
> want to silence it.

That's the scenario you have in mind now. But in the future we'll get 
page faults for addresses that have a valid VMA, and we want to use HMM 
to map that into the GPU page table.

Regards,
   Felix


>
> I will try to implement something to at least avoid accessing a 
> destructed VM.
>
> Regards,
> Christian.
>
>>
>> Regards,
>>     Felix
>>
>> On 2019-06-28 8:18 a.m., Christian König wrote:
>>> This allows us to update page tables directly while in a page fault.
>>>
>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  5 ++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  4 +++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 29 
>>> +++++++++++++--------
>>>    3 files changed, 27 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 489a162ca620..5941accea061 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -197,6 +197,11 @@ struct amdgpu_vm_update_params {
>>>         */
>>>        struct amdgpu_vm *vm;
>>>    +    /**
>>> +     * @direct: if changes should be made directly
>>> +     */
>>> +    bool direct;
>>> +
>>>        /**
>>>         * @pages_addr:
>>>         *
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>> index 5222d165abfc..f94e4896079c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>> @@ -49,6 +49,10 @@ static int amdgpu_vm_cpu_prepare(struct 
>>> amdgpu_vm_update_params *p, void *owner,
>>>    {
>>>        int r;
>>>    +    /* Don't wait for anything during page fault */
>>> +    if (p->direct)
>>> +        return 0;
>>> +
>>>        /* Wait for PT BOs to be idle. PTs share the same resv. object
>>>         * as the root PD BO
>>>         */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> index ddd181f5ed37..891d597063cb 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> @@ -68,17 +68,17 @@ static int amdgpu_vm_sdma_prepare(struct 
>>> amdgpu_vm_update_params *p,
>>>        if (r)
>>>            return r;
>>>    -    r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, 
>>> false);
>>> -    if (r)
>>> -        return r;
>>> +    p->num_dw_left = ndw;
>>> +
>>> +    if (p->direct)
>>> +        return 0;
>>>    -    r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>>> -                 owner, false);
>>> +    r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>>>        if (r)
>>>            return r;
>>>    -    p->num_dw_left = ndw;
>>> -    return 0;
>>> +    return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>>> +                owner, false);
>>>    }
>>>       /**
>>> @@ -99,13 +99,21 @@ static int amdgpu_vm_sdma_commit(struct 
>>> amdgpu_vm_update_params *p,
>>>        struct dma_fence *f;
>>>        int r;
>>>    -    ring = container_of(p->vm->entity.rq->sched, struct 
>>> amdgpu_ring, sched);
>>> +    if (p->direct)
>>> +        ring = p->adev->vm_manager.page_fault;
>>> +    else
>>> +        ring = container_of(p->vm->entity.rq->sched,
>>> +                    struct amdgpu_ring, sched);
>>>           WARN_ON(ib->length_dw == 0);
>>>        amdgpu_ring_pad_ib(ring, ib);
>>>        WARN_ON(ib->length_dw > p->num_dw_left);
>>> -    r = amdgpu_job_submit(p->job, &p->vm->entity,
>>> -                  AMDGPU_FENCE_OWNER_VM, &f);
>>> +
>>> +    if (p->direct)
>>> +        r = amdgpu_job_submit_direct(p->job, ring, &f);
>>> +    else
>>> +        r = amdgpu_job_submit(p->job, &p->vm->entity,
>>> +                      AMDGPU_FENCE_OWNER_VM, &f);
>>>        if (r)
>>>            goto error;
>>>    @@ -120,7 +128,6 @@ static int amdgpu_vm_sdma_commit(struct 
>>> amdgpu_vm_update_params *p,
>>>        return r;
>>>    }
>>>    -
>>>    /**
>>>     * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
>>>     *
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found]             ` <18ebb588-efba-2c5c-3f0b-05ef9e3542ad-5C7GfCeVMHo@public.gmane.org>
@ 2019-07-17  9:10               ` Christian König
       [not found]                 ` <5f35fc24-3c35-4fe8-6b54-bafb9c9f069e-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2019-07-17  9:10 UTC (permalink / raw)
  To: Kuehling, Felix, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 16.07.19 um 18:40 schrieb Kuehling, Felix:
> On 2019-07-16 9:36 a.m., Christian König wrote:
>> Am 02.07.19 um 21:35 schrieb Kuehling, Felix:
>>> This assumes that page tables are resident when a page fault is handled.
>> Yeah, that is correct. I also haven't completely figured out how we
>> can prevent the VM from being destroyed while handling the fault.
> There are other cases I had in mind: Page tables can be evicted. For KFD
> processes which can be preempted with CWSR, it's possible that a wave
> that caused a page fault is preempted due to a page table eviction. That
> means, by the time the page fault is handled, the page table is no
> longer resident.

This is a corner case we can handle later on. As long as the VM is still 
alive just allocating page tables again should be sufficient for this.

>> I mean it's perfectly possible that the process is killed while faults
>> are still in the pipeline.
>>
>>> I think it's possible that a page table gets evicted while a page fault
>>> is pending. Maybe not with graphics, but definitely with compute where
>>> waves can be preempted while waiting for a page fault. In that case the
>>> direct access would break.
>>>
>>> Even with graphics I think it's still possible that new page tables need
>>> to be allocated to handle a page fault. When that happens, you need to
>>> wait for fences to let new page tables be validated and initialized.
>> Yeah, the problem here is that when you wait on fences which in turn
>> depend on your submission your end up in a deadlock.
>>
> I think this implies that you have amdgpu_cs fences attached to page
> tables. I believe this is the fundamental issue that needs to be fixed.

We still need this cause even with page faults the root PD can't be evicted.

What we can probably do is to split up the PDs/PTs into the root PD and 
everything else.

> If you want to manage page tables in page fault interrupt handlers, you
> can't have command submission fences attached to your page tables. You
> can allow page tables to be evicted while the command submission is in
> progress. A page fault will fault it back in if it's needed. If you
> eliminate command submission fences on the page tables, you remove the
> potential for deadlocks.

No, there is still a huge potential for deadlocks here.

Additional to the root PDs you can have a MM submission which needs to 
wait for a compute submission to be finished.

If you then make your new allocation depend on the MM submission to be 
finished you have a classical circle dependency and a deadlock at hand.

The only way around that is to allocate the new page tables with the 
no_wait_gpu flag set and so avoid having any dependencies on ongoing 
operations.

> But you do need fences on page tables related to the allocation and
> migration of page tables themselves. And your page table updates must
> wait for those fences. Therefore I think the whole approach of direct
> submission for page table updates is fundamentally broken.

For the reasons noted above you can't have any fences related to the 
allocation and migration on page tables.

What can happen later on is that you need to wait for a BO move to 
finish before we can update the page tables.

But I think that this is a completely different operation which 
shouldn't be handled in the fault handler.

In those cases the fault handler would just silence the retry fault and 
continue crunching on other faults.

As soon as the BO is moved in place we should update the page tables 
again using the normal SDMA scheduler entity.

>>> Patch #2 deals with updating page directories. That pretty much implies
>>> that page tables have moved or been reallocated. Wouldn't that be a
>>> counter-indication for using direct page table updates? In other words,
>>> if you need to update page directories, a direct page table updates is
>>> unsafe by definition.
>> No, page tables are allocated because this is for handling invalid
>> faults.
>>
>> E.g. we get a fault for an address where nothing is mapped and just
>> want to silence it.
> That's the scenario you have in mind now. But in the future we'll get
> page faults for addresses that have a valid VMA, and we want to use HMM
> to map that into the GPU page table.

Yeah, but we will still need to use the same infrastructure.

Avoiding waiting on ongoing operations is mandatory or otherwise we will 
immediately run into deadlocks.

Regards,
Christian.

>
> Regards,
>     Felix
>
>
>> I will try to implement something to at least avoid accessing a
>> destructed VM.
>>
>> Regards,
>> Christian.
>>
>>> Regards,
>>>      Felix
>>>
>>> On 2019-06-28 8:18 a.m., Christian König wrote:
>>>> This allows us to update page tables directly while in a page fault.
>>>>
>>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>>> ---
>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  5 ++++
>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  4 +++
>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 29
>>>> +++++++++++++--------
>>>>     3 files changed, 27 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index 489a162ca620..5941accea061 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -197,6 +197,11 @@ struct amdgpu_vm_update_params {
>>>>          */
>>>>         struct amdgpu_vm *vm;
>>>>     +    /**
>>>> +     * @direct: if changes should be made directly
>>>> +     */
>>>> +    bool direct;
>>>> +
>>>>         /**
>>>>          * @pages_addr:
>>>>          *
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>>> index 5222d165abfc..f94e4896079c 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>>> @@ -49,6 +49,10 @@ static int amdgpu_vm_cpu_prepare(struct
>>>> amdgpu_vm_update_params *p, void *owner,
>>>>     {
>>>>         int r;
>>>>     +    /* Don't wait for anything during page fault */
>>>> +    if (p->direct)
>>>> +        return 0;
>>>> +
>>>>         /* Wait for PT BOs to be idle. PTs share the same resv. object
>>>>          * as the root PD BO
>>>>          */
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>>> index ddd181f5ed37..891d597063cb 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>>> @@ -68,17 +68,17 @@ static int amdgpu_vm_sdma_prepare(struct
>>>> amdgpu_vm_update_params *p,
>>>>         if (r)
>>>>             return r;
>>>>     -    r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive,
>>>> false);
>>>> -    if (r)
>>>> -        return r;
>>>> +    p->num_dw_left = ndw;
>>>> +
>>>> +    if (p->direct)
>>>> +        return 0;
>>>>     -    r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>>>> -                 owner, false);
>>>> +    r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>>>>         if (r)
>>>>             return r;
>>>>     -    p->num_dw_left = ndw;
>>>> -    return 0;
>>>> +    return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>>>> +                owner, false);
>>>>     }
>>>>        /**
>>>> @@ -99,13 +99,21 @@ static int amdgpu_vm_sdma_commit(struct
>>>> amdgpu_vm_update_params *p,
>>>>         struct dma_fence *f;
>>>>         int r;
>>>>     -    ring = container_of(p->vm->entity.rq->sched, struct
>>>> amdgpu_ring, sched);
>>>> +    if (p->direct)
>>>> +        ring = p->adev->vm_manager.page_fault;
>>>> +    else
>>>> +        ring = container_of(p->vm->entity.rq->sched,
>>>> +                    struct amdgpu_ring, sched);
>>>>            WARN_ON(ib->length_dw == 0);
>>>>         amdgpu_ring_pad_ib(ring, ib);
>>>>         WARN_ON(ib->length_dw > p->num_dw_left);
>>>> -    r = amdgpu_job_submit(p->job, &p->vm->entity,
>>>> -                  AMDGPU_FENCE_OWNER_VM, &f);
>>>> +
>>>> +    if (p->direct)
>>>> +        r = amdgpu_job_submit_direct(p->job, ring, &f);
>>>> +    else
>>>> +        r = amdgpu_job_submit(p->job, &p->vm->entity,
>>>> +                      AMDGPU_FENCE_OWNER_VM, &f);
>>>>         if (r)
>>>>             goto error;
>>>>     @@ -120,7 +128,6 @@ static int amdgpu_vm_sdma_commit(struct
>>>> amdgpu_vm_update_params *p,
>>>>         return r;
>>>>     }
>>>>     -
>>>>     /**
>>>>      * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
>>>>      *
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found]                 ` <5f35fc24-3c35-4fe8-6b54-bafb9c9f069e-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-07-17 20:31                   ` Kuehling, Felix
       [not found]                     ` <0a70b4ff-fe80-5f2a-1727-48abc3f71278-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Kuehling, Felix @ 2019-07-17 20:31 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2019-07-17 5:10, Christian König wrote:
> Am 16.07.19 um 18:40 schrieb Kuehling, Felix:
>> On 2019-07-16 9:36 a.m., Christian König wrote:
>>> Am 02.07.19 um 21:35 schrieb Kuehling, Felix:
>>>> This assumes that page tables are resident when a page fault is 
>>>> handled.
>>> Yeah, that is correct. I also haven't completely figured out how we
>>> can prevent the VM from being destroyed while handling the fault.
>> There are other cases I had in mind: Page tables can be evicted. For KFD
>> processes which can be preempted with CWSR, it's possible that a wave
>> that caused a page fault is preempted due to a page table eviction. That
>> means, by the time the page fault is handled, the page table is no
>> longer resident.
>
> This is a corner case we can handle later on. As long as the VM is 
> still alive just allocating page tables again should be sufficient for 
> this.

Do you mean, instead of migrating page tables back, throwing them away 
and allocating a new one?

Also, this may be a corner case. But I feel you're limiting yourself to 
a small range of current use cases. I'm not convinced that the design 
you're building here will scale to future use cases for HMM updating 
page tables for random virtual addresses. I'm looking for a general 
solution that will work for those future use cases. Otherwise we'll end 
up having to rewrite this page-table-update-in-fault-handler code from 
scratch in a month or two.


>
>>> I mean it's perfectly possible that the process is killed while faults
>>> are still in the pipeline.
>>>
>>>> I think it's possible that a page table gets evicted while a page 
>>>> fault
>>>> is pending. Maybe not with graphics, but definitely with compute where
>>>> waves can be preempted while waiting for a page fault. In that case 
>>>> the
>>>> direct access would break.
>>>>
>>>> Even with graphics I think it's still possible that new page tables 
>>>> need
>>>> to be allocated to handle a page fault. When that happens, you need to
>>>> wait for fences to let new page tables be validated and initialized.
>>> Yeah, the problem here is that when you wait on fences which in turn
>>> depend on your submission your end up in a deadlock.
>>>
>> I think this implies that you have amdgpu_cs fences attached to page
>> tables. I believe this is the fundamental issue that needs to be fixed.
>
> We still need this cause even with page faults the root PD can't be 
> evicted.
>
> What we can probably do is to split up the PDs/PTs into the root PD 
> and everything else.

Yeah, the root PD always exists as long as the VM exists. Everything 
else can be created/destroyed/moved dynamically.


>
>> If you want to manage page tables in page fault interrupt handlers, you
>> can't have command submission fences attached to your page tables. You
>> can allow page tables to be evicted while the command submission is in
>> progress. A page fault will fault it back in if it's needed. If you
>> eliminate command submission fences on the page tables, you remove the
>> potential for deadlocks.
>
> No, there is still a huge potential for deadlocks here.
>
> Additional to the root PDs you can have a MM submission which needs to 
> wait for a compute submission to be finished.

I assume by MM you mean "memory manger", not "multi-media". What kind of 
MM submission specifically? It can't be a VM page table update, with my 
proposal to never add user CS fences to VM page table reservations. Are 
you talking about buffer moves? They could go on a separate scheduler 
entity from VM submissions, so they don't hold up VM updates. VM updates 
only need to wait for MM fences that affect the page table BOs themselves.


>
> If you then make your new allocation depend on the MM submission to be 
> finished you have a classical circle dependency and a deadlock at hand.

I don't see it. Allocate page table, wait for fence associated with that 
page table initialization, update PTEs. At no point do we depend on the 
user CS being stalled by the page fault. There is not user submission on 
the paging ring. Anything that has been scheduled on the paging ring has 
its dependencies satisfied. We may need separate scheduler entities 
(queues) for regular MM submissions that can depend on user fences and 
VM submissions that must not.


>
> The only way around that is to allocate the new page tables with the 
> no_wait_gpu flag set and so avoid having any dependencies on ongoing 
> operations.

We discussed this before. I suggested an emergency pool for page tables. 
That pool can have a limited size. If page tables don't have user fences 
on them, they can always be evicted, so we can always make room in this 
emergency pool.


>
>> But you do need fences on page tables related to the allocation and
>> migration of page tables themselves. And your page table updates must
>> wait for those fences. Therefore I think the whole approach of direct
>> submission for page table updates is fundamentally broken.
>
> For the reasons noted above you can't have any fences related to the 
> allocation and migration on page tables.
>
> What can happen later on is that you need to wait for a BO move to 
> finish before we can update the page tables.

A page table updated coming from a page fault handler should never have 
to wait for any BO move. The fact that there was a page fault means, 
someone is trying to access this memory right now.


>
>
> But I think that this is a completely different operation which 
> shouldn't be handled in the fault handler.

Right. If you have page table updates done to prepare for a CS, they can 
depend on use fences. Page table updates done as part of the page fault 
handler must not. Again, I think this could be handled by using separate 
scheduler entities to avoid false dependencies.


>
> In those cases the fault handler would just silence the retry fault 
> and continue crunching on other faults.

I don't think that's the right approach. If you have a retry fault for a 
virtual address, it means you already have something running on the GPU 
accessing it. It can't be something that depends on an in-flight page 
table update, because the scheduler would not have emitted that to the 
ring. You either need to fill in a valid address, or if there is nothing 
mapped at that address (yet), treat it as an application error and 
convert it into a no-retry-fault which kills the application.

Regards,
   Felix


>
> As soon as the BO is moved in place we should update the page tables 
> again using the normal SDMA scheduler entity.
>
>>>> Patch #2 deals with updating page directories. That pretty much 
>>>> implies
>>>> that page tables have moved or been reallocated. Wouldn't that be a
>>>> counter-indication for using direct page table updates? In other 
>>>> words,
>>>> if you need to update page directories, a direct page table updates is
>>>> unsafe by definition.
>>> No, page tables are allocated because this is for handling invalid
>>> faults.
>>>
>>> E.g. we get a fault for an address where nothing is mapped and just
>>> want to silence it.
>> That's the scenario you have in mind now. But in the future we'll get
>> page faults for addresses that have a valid VMA, and we want to use HMM
>> to map that into the GPU page table.
>
> Yeah, but we will still need to use the same infrastructure.
>
> Avoiding waiting on ongoing operations is mandatory or otherwise we 
> will immediately run into deadlocks.
>
> Regards,
> Christian.
>
>>
>> Regards,
>>     Felix
>>
>>
>>> I will try to implement something to at least avoid accessing a
>>> destructed VM.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> Regards,
>>>>      Felix
>>>>
>>>> On 2019-06-28 8:18 a.m., Christian König wrote:
>>>>> This allows us to update page tables directly while in a page fault.
>>>>>
>>>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>>>> ---
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  5 ++++
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  4 +++
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 29
>>>>> +++++++++++++--------
>>>>>     3 files changed, 27 insertions(+), 11 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> index 489a162ca620..5941accea061 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> @@ -197,6 +197,11 @@ struct amdgpu_vm_update_params {
>>>>>          */
>>>>>         struct amdgpu_vm *vm;
>>>>>     +    /**
>>>>> +     * @direct: if changes should be made directly
>>>>> +     */
>>>>> +    bool direct;
>>>>> +
>>>>>         /**
>>>>>          * @pages_addr:
>>>>>          *
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>>>> index 5222d165abfc..f94e4896079c 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>>>> @@ -49,6 +49,10 @@ static int amdgpu_vm_cpu_prepare(struct
>>>>> amdgpu_vm_update_params *p, void *owner,
>>>>>     {
>>>>>         int r;
>>>>>     +    /* Don't wait for anything during page fault */
>>>>> +    if (p->direct)
>>>>> +        return 0;
>>>>> +
>>>>>         /* Wait for PT BOs to be idle. PTs share the same resv. 
>>>>> object
>>>>>          * as the root PD BO
>>>>>          */
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>>>> index ddd181f5ed37..891d597063cb 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>>>> @@ -68,17 +68,17 @@ static int amdgpu_vm_sdma_prepare(struct
>>>>> amdgpu_vm_update_params *p,
>>>>>         if (r)
>>>>>             return r;
>>>>>     -    r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive,
>>>>> false);
>>>>> -    if (r)
>>>>> -        return r;
>>>>> +    p->num_dw_left = ndw;
>>>>> +
>>>>> +    if (p->direct)
>>>>> +        return 0;
>>>>>     -    r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>>>>> -                 owner, false);
>>>>> +    r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
>>>>>         if (r)
>>>>>             return r;
>>>>>     -    p->num_dw_left = ndw;
>>>>> -    return 0;
>>>>> +    return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
>>>>> +                owner, false);
>>>>>     }
>>>>>        /**
>>>>> @@ -99,13 +99,21 @@ static int amdgpu_vm_sdma_commit(struct
>>>>> amdgpu_vm_update_params *p,
>>>>>         struct dma_fence *f;
>>>>>         int r;
>>>>>     -    ring = container_of(p->vm->entity.rq->sched, struct
>>>>> amdgpu_ring, sched);
>>>>> +    if (p->direct)
>>>>> +        ring = p->adev->vm_manager.page_fault;
>>>>> +    else
>>>>> +        ring = container_of(p->vm->entity.rq->sched,
>>>>> +                    struct amdgpu_ring, sched);
>>>>>            WARN_ON(ib->length_dw == 0);
>>>>>         amdgpu_ring_pad_ib(ring, ib);
>>>>>         WARN_ON(ib->length_dw > p->num_dw_left);
>>>>> -    r = amdgpu_job_submit(p->job, &p->vm->entity,
>>>>> -                  AMDGPU_FENCE_OWNER_VM, &f);
>>>>> +
>>>>> +    if (p->direct)
>>>>> +        r = amdgpu_job_submit_direct(p->job, ring, &f);
>>>>> +    else
>>>>> +        r = amdgpu_job_submit(p->job, &p->vm->entity,
>>>>> +                      AMDGPU_FENCE_OWNER_VM, &f);
>>>>>         if (r)
>>>>>             goto error;
>>>>>     @@ -120,7 +128,6 @@ static int amdgpu_vm_sdma_commit(struct
>>>>> amdgpu_vm_update_params *p,
>>>>>         return r;
>>>>>     }
>>>>>     -
>>>>>     /**
>>>>>      * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
>>>>>      *
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found]                     ` <0a70b4ff-fe80-5f2a-1727-48abc3f71278-5C7GfCeVMHo@public.gmane.org>
@ 2019-07-18  8:47                       ` Christian König
       [not found]                         ` <005fc745-8c58-4f56-326f-b0ebc5b3a5e6-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2019-07-18  8:47 UTC (permalink / raw)
  To: Kuehling, Felix, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 17.07.19 um 22:31 schrieb Kuehling, Felix:
> On 2019-07-17 5:10, Christian König wrote:
>> Am 16.07.19 um 18:40 schrieb Kuehling, Felix:
>>> On 2019-07-16 9:36 a.m., Christian König wrote:
>>>> Am 02.07.19 um 21:35 schrieb Kuehling, Felix:
>>>>> This assumes that page tables are resident when a page fault is
>>>>> handled.
>>>> Yeah, that is correct. I also haven't completely figured out how we
>>>> can prevent the VM from being destroyed while handling the fault.
>>> There are other cases I had in mind: Page tables can be evicted. For KFD
>>> processes which can be preempted with CWSR, it's possible that a wave
>>> that caused a page fault is preempted due to a page table eviction. That
>>> means, by the time the page fault is handled, the page table is no
>>> longer resident.
>> This is a corner case we can handle later on. As long as the VM is
>> still alive just allocating page tables again should be sufficient for
>> this.
> Do you mean, instead of migrating page tables back, throwing them away
> and allocating a new one?

Yes, exactly that's the idea here.

> Also, this may be a corner case. But I feel you're limiting yourself to
> a small range of current use cases. I'm not convinced that the design
> you're building here will scale to future use cases for HMM updating
> page tables for random virtual addresses. I'm looking for a general
> solution that will work for those future use cases. Otherwise we'll end
> up having to rewrite this page-table-update-in-fault-handler code from
> scratch in a month or two.

Well actually I'm keeping mostly HMM in mind. Filling page tables on 
demand is just a step in between.

I also want to support a use case where per-VM-BOs are swapped in and 
out on demand, but I think that we will just use that for testing.

>>>> I mean it's perfectly possible that the process is killed while faults
>>>> are still in the pipeline.
>>>>
>>>>> I think it's possible that a page table gets evicted while a page
>>>>> fault
>>>>> is pending. Maybe not with graphics, but definitely with compute where
>>>>> waves can be preempted while waiting for a page fault. In that case
>>>>> the
>>>>> direct access would break.
>>>>>
>>>>> Even with graphics I think it's still possible that new page tables
>>>>> need
>>>>> to be allocated to handle a page fault. When that happens, you need to
>>>>> wait for fences to let new page tables be validated and initialized.
>>>> Yeah, the problem here is that when you wait on fences which in turn
>>>> depend on your submission your end up in a deadlock.
>>>>
>>> I think this implies that you have amdgpu_cs fences attached to page
>>> tables. I believe this is the fundamental issue that needs to be fixed.
>> We still need this cause even with page faults the root PD can't be
>> evicted.
>>
>> What we can probably do is to split up the PDs/PTs into the root PD
>> and everything else.
> Yeah, the root PD always exists as long as the VM exists. Everything
> else can be created/destroyed/moved dynamically.

Yeah, exactly. The question is how do we want to keep the root PD in place?

We could still add the fence or we could pin it permanently.

>>> If you want to manage page tables in page fault interrupt handlers, you
>>> can't have command submission fences attached to your page tables. You
>>> can allow page tables to be evicted while the command submission is in
>>> progress. A page fault will fault it back in if it's needed. If you
>>> eliminate command submission fences on the page tables, you remove the
>>> potential for deadlocks.
>> No, there is still a huge potential for deadlocks here.
>>
>> Additional to the root PDs you can have a MM submission which needs to
>> wait for a compute submission to be finished.
> I assume by MM you mean "memory manger", not "multi-media". [SNIP]

Sorry I meant "multi-media", so just snipped your response.

What I want to say here is that I don't believe we can keep user CS 
fences our of memory management.

See there can be submission from engines which don't support or don't 
want to enabled recoverable page faults which depend on submissions 
which do use recoverable page faults.

I mean it was your requirement that we have a mix of page fault and 
pre-filled page tables in the same process.

>> If you then make your new allocation depend on the MM submission to be
>> finished you have a classical circle dependency and a deadlock at hand.
> I don't see it. Allocate page table, wait for fence associated with that
> page table initialization, update PTEs. At no point do we depend on the
> user CS being stalled by the page fault. There is not user submission on
> the paging ring. Anything that has been scheduled on the paging ring has
> its dependencies satisfied.

Allocation is the main problem here. We need to make sure that we never 
ever depend on user CS when making memory allocation in the page fault 
handler.

> We may need separate scheduler entities
> (queues) for regular MM submissions that can depend on user fences and
> VM submissions that must not.

Yeah, thought about that as well but even then you need a way to note 
that you want to use this separate entity.

>> The only way around that is to allocate the new page tables with the
>> no_wait_gpu flag set and so avoid having any dependencies on ongoing
>> operations.
> We discussed this before. I suggested an emergency pool for page tables.
> That pool can have a limited size. If page tables don't have user fences
> on them, they can always be evicted, so we can always make room in this
> emergency pool.

You underestimate the problem. For page tables I can make sure rather 
easily that we can always allocate something, but ALL allocations made 
during page fault can't depend on user CS.

This means we need to use this for pages which are used for HMM based 
migration and for this you can't have a fixed pool.

>>> But you do need fences on page tables related to the allocation and
>>> migration of page tables themselves. And your page table updates must
>>> wait for those fences. Therefore I think the whole approach of direct
>>> submission for page table updates is fundamentally broken.
>> For the reasons noted above you can't have any fences related to the
>> allocation and migration on page tables.
>>
>> What can happen later on is that you need to wait for a BO move to
>> finish before we can update the page tables.
> A page table updated coming from a page fault handler should never have  could want to migrate memory
> to wait for any BO move. The fact that there was a page fault means,
> someone is trying to access this memory right now.

Well essentially with HMM we want to migrate memory to VRAM during the 
page fault handler, don't we?

>> But I think that this is a completely different operation which
>> shouldn't be handled in the fault handler.
> Right. If you have page table updates done to prepare for a CS, they can
> depend on use fences. Page table updates done as part of the page fault
> handler must not. Again, I think this could be handled by using separate
> scheduler entities to avoid false dependencies.

Agreed, but using a separate entity means that we are sending the 
updates to a separate kernel thread first which then commits them to the 
ring buffer.

I was already a step further and thought that we can avoid this extra 
overhead and write directly to the ring buffer.

>> In those cases the fault handler would just silence the retry fault
>> and continue crunching on other faults.
> I don't think that's the right approach. If you have a retry f
> ault for a
> virtual address, it means you already have something running on the GPU
> accessing it. It can't be something that depends on an in-flight page
> table update, because the scheduler would not have emitted that to the
> ring. You either need to fill in a valid address, or if there is nothing
> mapped at that address (yet), treat it as an application error and
> convert it into a no-retry-fault which kills the application.

Mhm, and what do we do if we want to migrate a page to VRAM in a fault 
handler?

I mean that's what HMM is mostly all about, isn't it?

Regards,
Christian.

>
> Regards,
>     Felix
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found]                         ` <005fc745-8c58-4f56-326f-b0ebc5b3a5e6-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-07-18 22:34                           ` Kuehling, Felix
       [not found]                             ` <c1a9298a-cbb6-7de8-ebda-16755280e8af-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Kuehling, Felix @ 2019-07-18 22:34 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2019-07-18 4:47 a.m., Christian König wrote:

[snip]

>>> This is a corner case we can handle later on. As long as the VM is
>>> still alive just allocating page tables again should be sufficient for
>>> this.
>> Do you mean, instead of migrating page tables back, throwing them away
>> and allocating a new one?
>
> Yes, exactly that's the idea here. 

OK, I think that would work. The thing with direct submission and not 
waiting for fences is, that you only have implicit synchronization with 
anything else that was also submitted directly to the same SDMA ring. So 
page table allocation and initialization would work fine. Migration 
would not, unless you have special cases for migration of page table BOs.

There is also a more general issue with direct submission that I found 
while you were on vacation. There is no locking of the ring buffer. So 
direct and non-direct submission to the same ring is broken at the moment.


>>>>> I mean it's perfectly possible that the process is killed while 
>>>>> faults
>>>>> are still in the pipeline.
>>>>>
>>>>>> I think it's possible that a page table gets evicted while a page
>>>>>> fault
>>>>>> is pending. Maybe not with graphics, but definitely with compute 
>>>>>> where
>>>>>> waves can be preempted while waiting for a page fault. In that case
>>>>>> the
>>>>>> direct access would break.
>>>>>>
>>>>>> Even with graphics I think it's still possible that new page tables
>>>>>> need
>>>>>> to be allocated to handle a page fault. When that happens, you 
>>>>>> need to
>>>>>> wait for fences to let new page tables be validated and initialized.
>>>>> Yeah, the problem here is that when you wait on fences which in turn
>>>>> depend on your submission your end up in a deadlock.
>>>>>
>>>> I think this implies that you have amdgpu_cs fences attached to page
>>>> tables. I believe this is the fundamental issue that needs to be 
>>>> fixed.
>>> We still need this cause even with page faults the root PD can't be
>>> evicted.
>>>
>>> What we can probably do is to split up the PDs/PTs into the root PD
>>> and everything else.
>> Yeah, the root PD always exists as long as the VM exists. Everything
>> else can be created/destroyed/moved dynamically.
>
> Yeah, exactly. The question is how do we want to keep the root PD in 
> place?
>
> We could still add the fence or we could pin it permanently.

Right. I was thinking permanent pinning can lead to fragmentation. It 
would be good if those small root PDs could be moved around to make room 
for bigger contiguous allocations when needed.


>
>>>> If you want to manage page tables in page fault interrupt handlers, 
>>>> you
>>>> can't have command submission fences attached to your page tables. You
>>>> can allow page tables to be evicted while the command submission is in
>>>> progress. A page fault will fault it back in if it's needed. If you
>>>> eliminate command submission fences on the page tables, you remove the
>>>> potential for deadlocks.
>>> No, there is still a huge potential for deadlocks here.
>>>
>>> Additional to the root PDs you can have a MM submission which needs to
>>> wait for a compute submission to be finished.
>> I assume by MM you mean "memory manger", not "multi-media". [SNIP]
>
> Sorry I meant "multi-media", so just snipped your response.
>
> What I want to say here is that I don't believe we can keep user CS 
> fences our of memory management.
>
> See there can be submission from engines which don't support or don't 
> want to enabled recoverable page faults which depend on submissions 
> which do use recoverable page faults.
>
> I mean it was your requirement that we have a mix of page fault and 
> pre-filled page tables in the same process.

Right. There are a few different requirements:

 1. Disable retry faults and instruction replay for a VM completely
    (better performance for ML shaders)
 2. Pre-fill page tables even when retry faults are enabled

In case #2 we could deal with page tables being evicted (not fenced). 
But MM engines that don't support retry faults would throw a wrench in 
this idea.


>
>>> If you then make your new allocation depend on the MM submission to be
>>> finished you have a classical circle dependency and a deadlock at hand.
>> I don't see it. Allocate page table, wait for fence associated with that
>> page table initialization, update PTEs. At no point do we depend on the
>> user CS being stalled by the page fault. There is not user submission on
>> the paging ring. Anything that has been scheduled on the paging ring has
>> its dependencies satisfied.
>
> Allocation is the main problem here. We need to make sure that we 
> never ever depend on user CS when making memory allocation in the page 
> fault handler.
>> We may need separate scheduler entities
>> (queues) for regular MM submissions that can depend on user fences and
>> VM submissions that must not.
>
> Yeah, thought about that as well but even then you need a way to note 
> that you want to use this separate entity.
>
>>> The only way around that is to allocate the new page tables with the
>>> no_wait_gpu flag set and so avoid having any dependencies on ongoing
>>> operations.
>> We discussed this before. I suggested an emergency pool for page tables.
>> That pool can have a limited size. If page tables don't have user fences
>> on them, they can always be evicted, so we can always make room in this
>> emergency pool.
>
> You underestimate the problem. For page tables I can make sure rather 
> easily that we can always allocate something, but ALL allocations made 
> during page fault can't depend on user CS.
>
> This means we need to use this for pages which are used for HMM based 
> migration and for this you can't have a fixed pool.

That is if you do migration in the page fault handler. We could do 
migration outside of the page fault handler. See below.


>
>>>> But you do need fences on page tables related to the allocation and
>>>> migration of page tables themselves. And your page table updates must
>>>> wait for those fences. Therefore I think the whole approach of direct
>>>> submission for page table updates is fundamentally broken.
>>> For the reasons noted above you can't have any fences related to the
>>> allocation and migration on page tables.
>>>
>>> What can happen later on is that you need to wait for a BO move to
>>> finish before we can update the page tables.
>> A page table updated coming from a page fault handler should never 
>> have  could want to migrate memory
>> to wait for any BO move. The fact that there was a page fault means,
>> someone is trying to access this memory right now.
>
> Well essentially with HMM we want to migrate memory to VRAM during the 
> page fault handler, don't we?

The page fault handler could inform migration decisions. But the 
migration itself doesn't need to be in the page fault handler. A 
migration can be on an application thread (e.g. triggered by an hbind or 
similar call) or on a worker thread that gets triggered by asynchonous 
events such as page faults, polling of performance counters, etc. A 
migration would trigger an MMU notifier that would invalidate the 
respective page table entries. Updating the page table entries with the 
new physical addresses would happen likely in a page fault handler after 
the migration is complete.

To minimize the amount of page faults while the migration is in progress 
we could also put PTEs in silent retry mode first. After the migration 
is complete we could update the PTEs with non-silent retry or with the 
new physical addresses.


>
>>> But I think that this is a completely different operation which
>>> shouldn't be handled in the fault handler.
>> Right. If you have page table updates done to prepare for a CS, they can
>> depend on use fences. Page table updates done as part of the page fault
>> handler must not. Again, I think this could be handled by using separate
>> scheduler entities to avoid false dependencies.
>
> Agreed, but using a separate entity means that we are sending the 
> updates to a separate kernel thread first which then commits them to 
> the ring buffer.
>
> I was already a step further and thought that we can avoid this extra 
> overhead and write directly to the ring buffer.

OK. If we can justify the assumptions made in the direct submission 
code. Basically we can only rely on implicit synchronization with other 
operation that use direct submission. That means all page table 
migration would have to use direct submission. Or we can't migrate page 
tables and instead reallocate them every time.


>
>
>>> In those cases the fault handler would just silence the retry fault
>>> and continue crunching on other faults.
>> I don't think that's the right approach. If you have a retry f
>> ault for a
>> virtual address, it means you already have something running on the GPU
>> accessing it. It can't be something that depends on an in-flight page
>> table update, because the scheduler would not have emitted that to the
>> ring. You either need to fill in a valid address, or if there is nothing
>> mapped at that address (yet), treat it as an application error and
>> convert it into a no-retry-fault which kills the application.
>
> Mhm, and what do we do if we want to migrate a page to VRAM in a fault 
> handler?
>
> I mean that's what HMM is mostly all about, isn't it?

Not really. I don't think we need or want to migrate in a page fault 
handler. It's the other way around. Page faults may be the result of a 
migration, because it would trigger an MMU notifier that invalidates 
PTEs. Page faults are GPU-specific. Migrations can affect page tables on 
multiple GPUs that access the same pages.

We'll need to deal with some interesting cases when multiple GPUs fault 
on the same page nearly at the same time. In that case it may be better 
to leave that page in system memory where it can be accessed by multiple 
GPUs, rather than bouncing it around between GPUs. With XGMI we'd have 
even more options to consider.

Initially I'm planning not to put too much intelligence into any 
automatic migration heuristics and rely more on hints from applications. 
Get the mechanics right first, then add policy and heuristics on top.

Regards,
   Felix


>
> Regards,
> Christian.
>
>>
>> Regards,
>>     Felix
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends
       [not found]                             ` <c1a9298a-cbb6-7de8-ebda-16755280e8af-5C7GfCeVMHo@public.gmane.org>
@ 2019-07-19 11:08                               ` Christian König
  0 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2019-07-19 11:08 UTC (permalink / raw)
  To: Kuehling, Felix, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 19.07.19 um 00:34 schrieb Kuehling, Felix:
> On 2019-07-18 4:47 a.m., Christian König wrote:
>
> [SNIP]
> There is also a more general issue with direct submission that I found
> while you were on vacation. There is no locking of the ring buffer. So
> direct and non-direct submission to the same ring is broken at the moment.

Correct. That's why I reserved an extra paging queue for this.

Thinking more about it I've found that this is actually quite a killer 
argument for using a separate scheduler entity, cause for updates 
outside of the fault handler we actually need to have everything in 
order here.

> [SNIP]
>>> Yeah, the root PD always exists as long as the VM exists. Everything
>>> else can be created/destroyed/moved dynamically.
>> Yeah, exactly. The question is how do we want to keep the root PD in
>> place?
>>
>> We could still add the fence or we could pin it permanently.
> Right. I was thinking permanent pinning can lead to fragmentation. It
> would be good if those small root PDs could be moved around to make room
> for bigger contiguous allocations when needed.

Yeah, exactly my fear as well.

It's just that adding the user CS to the root PD will definitely make it 
leak into the memory management.

>> [SNIP]
>> I mean it was your requirement that we have a mix of page fault and
>> pre-filled page tables in the same process.
> Right. There are a few different requirements:
>
>   1. Disable retry faults and instruction replay for a VM completely
>      (better performance for ML shaders)
>   2. Pre-fill page tables even when retry faults are enabled
>
> In case #2 we could deal with page tables being evicted (not fenced).
> But MM engines that don't support retry faults would throw a wrench in
> this idea.

Yeah, MM-engines still need to add all user CS fences to the BOs for the 
foreseeable future.

So there will be a mix of page fault and fenced handling in the MM 
anyway, the optional pre-filling doesn't make that any more complicated.

>>>> The only way around that is to allocate the new page tables with the
>>>> no_wait_gpu flag set and so avoid having any dependencies on ongoing
>>>> operations.
>>> We discussed this before. I suggested an emergency pool for page tables.
>>> That pool can have a limited size. If page tables don't have user fences
>>> on them, they can always be evicted, so we can always make room in this
>>> emergency pool.
>> You underestimate the problem. For page tables I can make sure rather
>> easily that we can always allocate something, but ALL allocations made
>> during page fault can't depend on user CS.
>>
>> This means we need to use this for pages which are used for HMM based
>> migration and for this you can't have a fixed pool.
> That is if you do migration in the page fault handler. We could do
> migration outside of the page fault handler. See below.

You can always have the case that a page fault will move back something 
which was evicted or swapped out.

>>>>> But you do need fences on page tables related to the allocation and
>>>>> migration of page tables themselves. And your page table updates must
>>>>> wait for those fences. Therefore I think the whole approach of direct
>>>>> submission for page table updates is fundamentally broken.
>>>> For the reasons noted above you can't have any fences related to the
>>>> allocation and migration on page tables.
>>>>
>>>> What can happen later on is that you need to wait for a BO move to
>>>> finish before we can update the page tables.
>>> A page table updated coming from a page fault handler should never
>>> have  could want to migrate memory
>>> to wait for any BO move. The fact that there was a page fault means,
>>> someone is trying to access this memory right now.
>> Well essentially with HMM we want to migrate memory to VRAM during the
>> page fault handler, don't we?
> The page fault handler could inform migration decisions. But the
> migration itself doesn't need to be in the page fault handler. A
> migration can be on an application thread (e.g. triggered by an hbind or
> similar call) or on a worker thread that gets triggered by asynchonous
> events such as page faults, polling of performance counters, etc. A
> migration would trigger an MMU notifier that would invalidate the
> respective page table entries. Updating the page table entries with the
> new physical addresses would happen likely in a page fault handler after
> the migration is complete.

Well now you are limiting the use case to HMM. As I noted before with 
both Vega and Navi based hw generations we probably can't always do this 
because of performance restrictions.

> To minimize the amount of page faults while the migration is in progress
> we could also put PTEs in silent retry mode first. After the migration
> is complete we could update the PTEs with non-silent retry or with the
> new physical addresses.

Way to risky. We can only do this if we can be absolutely sure that the 
migration is happening.

E.g. we can do this if we have a BO which is moving under driver 
control, but not with an MMU notifier. The handling here is simply not 
under driver control.

>>>> But I think that this is a completely different operation which
>>>> shouldn't be handled in the fault handler.
>>> Right. If you have page table updates done to prepare for a CS, they can
>>> depend on use fences. Page table updates done as part of the page fault
>>> handler must not. Again, I think this could be handled by using separate
>>> scheduler entities to avoid false dependencies.
>> Agreed, but using a separate entity means that we are sending the
>> updates to a separate kernel thread first which then commits them to
>> the ring buffer.
>>
>> I was already a step further and thought that we can avoid this extra
>> overhead and write directly to the ring buffer.
> OK. If we can justify the assumptions made in the direct submission
> code. Basically we can only rely on implicit synchronization with other
> operation that use direct submission. That means all page table
> migration would have to use direct submission. Or we can't migrate page
> tables and instead reallocate them every time.

Correct. Well I will drop this approach for now and just use a separate 
entity instead.

>>>> In those cases the fault handler would just silence the retry fault
>>>> and continue crunching on other faults.
>>> I don't think that's the right approach. If you have a retry f
>>> ault for a
>>> virtual address, it means you already have something running on the GPU
>>> accessing it. It can't be something that depends on an in-flight page
>>> table update, because the scheduler would not have emitted that to the
>>> ring. You either need to fill in a valid address, or if there is nothing
>>> mapped at that address (yet), treat it as an application error and
>>> convert it into a no-retry-fault which kills the application.
>> Mhm, and what do we do if we want to migrate a page to VRAM in a fault
>> handler?
>>
>> I mean that's what HMM is mostly all about, isn't it?
> Not really. I don't think we need or want to migrate in a page fault
> handler. It's the other way around. Page faults may be the result of a
> migration, because it would trigger an MMU notifier that invalidates
> PTEs. Page faults are GPU-specific. Migrations can affect page tables on
> multiple GPUs that access the same pages.
>
> We'll need to deal with some interesting cases when multiple GPUs fault
> on the same page nearly at the same time. In that case it may be better
> to leave that page in system memory where it can be accessed by multiple
> GPUs, rather than bouncing it around between GPUs. With XGMI we'd have
> even more options to consider.
>
> Initially I'm planning not to put too much intelligence into any
> automatic migration heuristics and rely more on hints from applications.
> Get the mechanics right first, then add policy and heuristics on top.

Yeah, completely agree.

Christian.

>
> Regards,
>     Felix
>
>
>> Regards,
>> Christian.
>>
>>> Regards,
>>>      Felix
>>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2019-07-19 11:08 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-28 12:18 [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends Christian König
     [not found] ` <20190628121812.1400-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-06-28 12:18   ` [PATCH 2/5] drm/amdgpu: allow direct submission of PDE updates Christian König
2019-06-28 12:18   ` [PATCH 3/5] drm/amdgpu: allow direct submission of PTE updates Christian König
2019-06-28 12:18   ` [PATCH 4/5] drm/amdgpu: allow direct submission of clears Christian König
2019-06-28 12:18   ` [PATCH 5/5] drm/amdgpu: add graceful VM fault handling Christian König
2019-06-28 14:30   ` [PATCH 1/5] drm/amdgpu: allow direct submission in the VM backends Chunming Zhou
     [not found]     ` <58a7ff4f-47f2-42a6-f9af-ca28726535bf-5C7GfCeVMHo@public.gmane.org>
2019-06-28 15:26       ` Christian König
2019-07-02 19:35   ` Kuehling, Felix
     [not found]     ` <a6e1e599-d972-1dc0-e0e7-5201895dee35-5C7GfCeVMHo@public.gmane.org>
2019-07-16 13:36       ` Christian König
     [not found]         ` <0ad76267-d76d-c2fc-0397-7a696cc9926c-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-07-16 16:40           ` Kuehling, Felix
     [not found]             ` <18ebb588-efba-2c5c-3f0b-05ef9e3542ad-5C7GfCeVMHo@public.gmane.org>
2019-07-17  9:10               ` Christian König
     [not found]                 ` <5f35fc24-3c35-4fe8-6b54-bafb9c9f069e-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-07-17 20:31                   ` Kuehling, Felix
     [not found]                     ` <0a70b4ff-fe80-5f2a-1727-48abc3f71278-5C7GfCeVMHo@public.gmane.org>
2019-07-18  8:47                       ` Christian König
     [not found]                         ` <005fc745-8c58-4f56-326f-b0ebc5b3a5e6-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-07-18 22:34                           ` Kuehling, Felix
     [not found]                             ` <c1a9298a-cbb6-7de8-ebda-16755280e8af-5C7GfCeVMHo@public.gmane.org>
2019-07-19 11:08                               ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.