amd-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/amdgpu: cleanup freeing delayed mappings
@ 2022-03-31  9:47 Christian König
  2022-03-31  9:47 ` [PATCH 2/5] drm/amdgpu: add separate last_clear tracking Christian König
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Christian König @ 2022-03-31  9:47 UTC (permalink / raw)
  To: bas, alexander.deucher, amd-gfx; +Cc: Christian König

Move the list_del() to a central place and use list_move for the rest.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1cac90ee3318..11ebfef6962f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1215,6 +1215,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
 				   struct amdgpu_bo_va_mapping *mapping,
 				   struct dma_fence *fence)
 {
+	list_del(&mapping->list);
 	if (mapping->flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_add_prt_cb(adev, fence);
 	kfree(mapping);
@@ -1269,7 +1270,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 	while (!list_empty(&vm->freed)) {
 		mapping = list_first_entry(&vm->freed,
 			struct amdgpu_bo_va_mapping, list);
-		list_del(&mapping->list);
 
 		if (vm->pte_support_ats &&
 		    mapping->start < AMDGPU_GMC_HOLE_START)
@@ -1597,13 +1597,12 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 			return -ENOENT;
 	}
 
-	list_del(&mapping->list);
 	amdgpu_vm_it_remove(mapping, &vm->va);
 	mapping->bo_va = NULL;
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
 	if (valid)
-		list_add(&mapping->list, &vm->freed);
+		list_move(&mapping->list, &vm->freed);
 	else
 		amdgpu_vm_free_mapping(adev, vm, mapping,
 				       bo_va->last_pt_update);
@@ -1803,14 +1802,12 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
 	spin_unlock(&vm->invalidated_lock);
 
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
-		list_del(&mapping->list);
 		amdgpu_vm_it_remove(mapping, &vm->va);
 		mapping->bo_va = NULL;
 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
-		list_add(&mapping->list, &vm->freed);
+		list_move(&mapping->list, &vm->freed);
 	}
 	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
-		list_del(&mapping->list);
 		amdgpu_vm_it_remove(mapping, &vm->va);
 		amdgpu_vm_free_mapping(adev, vm, mapping,
 				       bo_va->last_pt_update);
@@ -2258,7 +2255,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 			prt_fini_needed = false;
 		}
 
-		list_del(&mapping->list);
 		amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
 	}
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] drm/amdgpu: add separate last_clear tracking
  2022-03-31  9:47 [PATCH 1/5] drm/amdgpu: cleanup freeing delayed mappings Christian König
@ 2022-03-31  9:47 ` Christian König
  2022-03-31  9:47 ` [PATCH 3/5] drm/amdgpu: add last update fence to amdgpu_vm_bo_update Christian König
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 11+ messages in thread
From: Christian König @ 2022-03-31  9:47 UTC (permalink / raw)
  To: bas, alexander.deucher, amd-gfx; +Cc: Christian König

Add separate tracking of last cleared of freed mappings and use this as
dependency when freeing GEM handles.

Still heavily oversyncing, but better than undersyncing.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 14 +++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 2e16484bf606..4509a59d499a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -232,12 +232,12 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
 		fence = NULL;
 	}
 
-	r = amdgpu_vm_clear_freed(adev, vm, &fence);
+	r = amdgpu_vm_clear_freed(adev, vm, NULL);
+	fence = vm->last_clear;
 	if (r || !fence)
 		goto out_unlock;
 
 	amdgpu_bo_fence(bo, fence, true);
-	dma_fence_put(fence);
 
 out_unlock:
 	if (unlikely(r < 0))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 11ebfef6962f..3f73e6097e6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1286,15 +1286,17 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		}
 	}
 
-	if (fence && f) {
+	if (!f)
+		return 0;
+
+	if (fence) {
 		dma_fence_put(*fence);
-		*fence = f;
-	} else {
-		dma_fence_put(f);
+		*fence = dma_fence_get(f);
 	}
 
+	swap(vm->last_clear, f);
+	dma_fence_put(f);
 	return 0;
-
 }
 
 /**
@@ -2084,6 +2086,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	else
 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
 	vm->last_update = NULL;
+	vm->last_clear = NULL;
 	vm->last_unlocked = dma_fence_get_stub();
 
 	mutex_init(&vm->eviction_lock);
@@ -2198,6 +2201,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
 	}
 	dma_fence_put(vm->last_update);
+	dma_fence_put(vm->last_clear);
 	vm->last_update = NULL;
 	vm->is_compute_context = true;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 1a814fbffff8..be82ef170926 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -272,12 +272,13 @@ struct amdgpu_vm {
 
 	/* BO mappings freed, but not yet updated in the PT */
 	struct list_head	freed;
+	struct dma_fence	*last_clear;
 
 	/* BOs which are invalidated, has been updated in the PTs */
 	struct list_head        done;
 
 	/* contains the page directory */
-	struct amdgpu_vm_bo_base     root;
+	struct amdgpu_vm_bo_base root;
 	struct dma_fence	*last_update;
 
 	/* Scheduler entities for page table updates */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] drm/amdgpu: add last update fence to amdgpu_vm_bo_update
  2022-03-31  9:47 [PATCH 1/5] drm/amdgpu: cleanup freeing delayed mappings Christian König
  2022-03-31  9:47 ` [PATCH 2/5] drm/amdgpu: add separate last_clear tracking Christian König
@ 2022-03-31  9:47 ` Christian König
  2022-03-31  9:47 ` [PATCH 4/5] drm/amdgpu: add last_update to amdgpu_vm_update_pdes Christian König
  2022-03-31  9:47 ` [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations Christian König
  3 siblings, 0 replies; 11+ messages in thread
From: Christian König @ 2022-03-31  9:47 UTC (permalink / raw)
  To: bas, alexander.deucher, amd-gfx; +Cc: Christian König

Allows separate tracking of VM updates.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c          |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           | 14 ++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h           |  5 ++---
 5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8b14c55a0793..57b521bb1eec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1104,7 +1104,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
 		return ret;
 
 	/* Update the page tables  */
-	ret = amdgpu_vm_bo_update(adev, bo_va, false);
+	ret = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 	if (ret) {
 		pr_err("amdgpu_vm_bo_update failed\n");
 		return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ea28942b0ede..50fddd6715db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -806,7 +806,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
 	if (r)
 		return r;
 
@@ -817,7 +817,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 		bo_va = fpriv->csa_va;
 		BUG_ON(!bo_va);
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			return r;
 
@@ -836,7 +836,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 		if (bo_va == NULL)
 			continue;
 
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4509a59d499a..3f84eedb4d96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -618,7 +618,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 
 	if (operation == AMDGPU_VA_OP_MAP ||
 	    operation == AMDGPU_VA_OP_REPLACE) {
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			goto error;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3f73e6097e6c..05024999556b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -992,6 +992,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
  * @adev: amdgpu_device pointer
  * @bo_va: requested BO and VM object
  * @clear: if true clear the entries
+ * @last_update: the last update operation
  *
  * Fill in the page table entries for @bo_va.
  *
@@ -999,14 +1000,13 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
  * 0 for success, -EINVAL for failure.
  */
 int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
-			bool clear)
+			bool clear, struct dma_fence **last_update)
 {
 	struct amdgpu_bo *bo = bo_va->base.bo;
 	struct amdgpu_vm *vm = bo_va->base.vm;
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
 	struct ttm_resource *mem;
-	struct dma_fence **last_update;
 	bool flush_tlb = clear;
 	struct dma_resv *resv;
 	uint64_t vram_base;
@@ -1049,8 +1049,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 		vram_base = 0;
 	}
 
-	if (clear || (bo && bo->tbo.base.resv ==
-		      vm->root.bo->tbo.base.resv))
+	if (last_update)
+		dma_resv_assert_held(bo->tbo.base.resv);
+	else if (clear || (bo && bo->tbo.base.resv ==
+			   vm->root.bo->tbo.base.resv))
 		last_update = &vm->last_update;
 	else
 		last_update = &bo_va->last_pt_update;
@@ -1322,7 +1324,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 
 	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
 		/* Per VM BOs never need to bo cleared in the page tables */
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			return r;
 	}
@@ -1341,7 +1343,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 		else
 			clear = true;
 
-		r = amdgpu_vm_bo_update(adev, bo_va, clear);
+		r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
 		if (r)
 			return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index be82ef170926..3c7df1b8e3c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -409,9 +409,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			   uint64_t flags, uint64_t offset, uint64_t vram_base,
 			   struct ttm_resource *res, dma_addr_t *pages_addr,
 			   struct dma_fence **fence);
-int amdgpu_vm_bo_update(struct amdgpu_device *adev,
-			struct amdgpu_bo_va *bo_va,
-			bool clear);
+int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
+			bool clear, struct dma_fence **last_update);
 bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 			     struct amdgpu_bo *bo, bool evicted);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] drm/amdgpu: add last_update to amdgpu_vm_update_pdes
  2022-03-31  9:47 [PATCH 1/5] drm/amdgpu: cleanup freeing delayed mappings Christian König
  2022-03-31  9:47 ` [PATCH 2/5] drm/amdgpu: add separate last_clear tracking Christian König
  2022-03-31  9:47 ` [PATCH 3/5] drm/amdgpu: add last update fence to amdgpu_vm_bo_update Christian König
@ 2022-03-31  9:47 ` Christian König
  2022-04-06  2:26   ` Bas Nieuwenhuizen
  2022-03-31  9:47 ` [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations Christian König
  3 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2022-03-31  9:47 UTC (permalink / raw)
  To: bas, alexander.deucher, amd-gfx; +Cc: Christian König

Allows separate tracking of VM updates.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c          | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           | 9 +++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h           | 4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c             | 2 +-
 6 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 57b521bb1eec..92a6ca415ab8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -410,7 +410,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 	int ret;
 
-	ret = amdgpu_vm_update_pdes(adev, vm, false);
+	ret = amdgpu_vm_update_pdes(adev, vm, false, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 50fddd6715db..87daa75b57c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -849,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_vm_update_pdes(adev, vm, false);
+	r = amdgpu_vm_update_pdes(adev, vm, false, NULL);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 3f84eedb4d96..9cdfee67efeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -623,7 +623,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 			goto error;
 	}
 
-	r = amdgpu_vm_update_pdes(adev, vm, false);
+	r = amdgpu_vm_update_pdes(adev, vm, false, NULL);
 
 error:
 	if (r && r != -ERESTARTSYS)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 05024999556b..3391256e2448 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -725,14 +725,15 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
  * @adev: amdgpu_device pointer
  * @vm: requested vm
  * @immediate: submit immediately to the paging queue
+ * @last:update: optional returned last update
  *
  * Makes sure all directories are up to date.
  *
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
-			  struct amdgpu_vm *vm, bool immediate)
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  bool immediate, struct dma_fence **last_update)
 {
 	struct amdgpu_vm_update_params params;
 	struct amdgpu_vm_bo_base *entry;
@@ -759,7 +760,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
 			goto error;
 	}
 
-	r = vm->update_funcs->commit(&params, &vm->last_update);
+	r = vm->update_funcs->commit(&params, last_update ? : &vm->last_update);
 	if (r)
 		goto error;
 
@@ -2529,7 +2530,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 	if (r)
 		goto error_unlock;
 
-	r = amdgpu_vm_update_pdes(adev, vm, true);
+	r = amdgpu_vm_update_pdes(adev, vm, true, NULL);
 
 error_unlock:
 	amdgpu_bo_unreserve(root);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 3c7df1b8e3c9..692ec7b51ac2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -394,8 +394,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
-int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
-			  struct amdgpu_vm *vm, bool immediate);
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  bool immediate, struct dma_fence **last_update);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 907b02045824..74824e3806d5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1294,7 +1294,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
 		last_start = prange->start + i + 1;
 	}
 
-	r = amdgpu_vm_update_pdes(adev, vm, false);
+	r = amdgpu_vm_update_pdes(adev, vm, false, NULL);
 	if (r) {
 		pr_debug("failed %d to update directories 0x%lx\n", r,
 			 prange->start);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations
  2022-03-31  9:47 [PATCH 1/5] drm/amdgpu: cleanup freeing delayed mappings Christian König
                   ` (2 preceding siblings ...)
  2022-03-31  9:47 ` [PATCH 4/5] drm/amdgpu: add last_update to amdgpu_vm_update_pdes Christian König
@ 2022-03-31  9:47 ` Christian König
  2022-04-04  1:32   ` Bas Nieuwenhuizen
  3 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2022-03-31  9:47 UTC (permalink / raw)
  To: bas, alexander.deucher, amd-gfx; +Cc: Christian König

To improve synchronization of command submissions with page table updates RADV
wants to manually wait for the updates to be completed without affecting
parallel submissions.

Implement this by allowing to specify a drm_sync_obj handle and a timeline
point for the GEM_VA IOCTL.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 79 ++++++++++++++++++++-----
 include/uapi/drm/amdgpu_drm.h           |  5 +-
 2 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 9cdfee67efeb..bf0092f629f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -33,6 +33,7 @@
 
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_syncobj.h>
 #include <drm/drm_gem_ttm_helper.h>
 
 #include "amdgpu.h"
@@ -598,6 +599,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
  * @vm: vm to update
  * @bo_va: bo_va to update
  * @operation: map, unmap or clear
+ * @last_update: optional pointer to a dma_fence for the last VM update
  *
  * Update the bo_va directly after setting its address. Errors are not
  * vital here, so they are not reported back to userspace.
@@ -605,20 +607,21 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
 static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct amdgpu_vm *vm,
 				    struct amdgpu_bo_va *bo_va,
-				    uint32_t operation)
+				    uint32_t operation,
+				    struct dma_fence **last_update)
 {
 	int r;
 
 	if (!amdgpu_vm_ready(vm))
 		return;
 
-	r = amdgpu_vm_clear_freed(adev, vm, NULL);
+	r = amdgpu_vm_clear_freed(adev, vm, last_update);
 	if (r)
 		goto error;
 
 	if (operation == AMDGPU_VA_OP_MAP ||
 	    operation == AMDGPU_VA_OP_REPLACE) {
-		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, last_update);
 		if (r)
 			goto error;
 	}
@@ -671,6 +674,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	struct drm_gem_object *gobj;
 	struct amdgpu_device *adev = drm_to_adev(dev);
 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct dma_fence *fence = dma_fence_get_stub();
+	struct dma_fence_chain *chain = NULL;
+	struct drm_syncobj *syncobj = NULL;
 	struct amdgpu_bo *abo;
 	struct amdgpu_bo_va *bo_va;
 	struct amdgpu_bo_list_entry vm_pd;
@@ -714,17 +720,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	switch (args->operation) {
-	case AMDGPU_VA_OP_MAP:
-	case AMDGPU_VA_OP_UNMAP:
-	case AMDGPU_VA_OP_CLEAR:
-	case AMDGPU_VA_OP_REPLACE:
-		break;
-	default:
-		dev_dbg(dev->dev, "unsupported operation %d\n",
-			args->operation);
-		return -EINVAL;
-	}
+	/* For debugging delay all VM updates till CS time */
+	if (!amdgpu_vm_debug)
+		args->flags |= AMDGPU_VM_DELAY_UPDATE;
 
 	INIT_LIST_HEAD(&list);
 	INIT_LIST_HEAD(&duplicates);
@@ -763,6 +761,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		bo_va = NULL;
 	}
 
+	if (args->syncobj) {
+		syncobj = drm_syncobj_find(filp, args->syncobj);
+		if (!syncobj) {
+			r = -EINVAL;
+			goto error_backoff;
+		}
+
+		if (args->timeline_point) {
+			chain = dma_fence_chain_alloc();
+			if (!chain) {
+				r = -ENOMEM;
+				goto error_put_syncobj;
+			}
+		}
+
+		/*
+		 * Update the VM once before to make sure there are no other
+		 * pending updates.
+		 */
+		if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
+			amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
+						args->operation, NULL);
+	}
+
 	switch (args->operation) {
 	case AMDGPU_VA_OP_MAP:
 		va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
@@ -786,17 +808,42 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 					     va_flags);
 		break;
 	default:
+		dev_dbg(dev->dev, "unsupported operation %d\n",
+			args->operation);
+		r = -EINVAL;
 		break;
 	}
-	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
+	if (r)
+		goto error_free_chain;
+
+	if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
 		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
-					args->operation);
+					args->operation, syncobj ?
+					&fence : NULL);
+
+	if (syncobj) {
+		if (chain) {
+			drm_syncobj_add_point(syncobj, chain, fence,
+					      args->timeline_point);
+			chain = NULL;
+		} else {
+			drm_syncobj_replace_fence(syncobj, fence);
+		}
+	}
+
+error_free_chain:
+	dma_fence_chain_free(chain);
+
+error_put_syncobj:
+	if (syncobj)
+		drm_syncobj_put(syncobj);
 
 error_backoff:
 	ttm_eu_backoff_reservation(&ticket, &list);
 
 error_unref:
 	drm_gem_object_put(gobj);
+	dma_fence_put(fence);
 	return r;
 }
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 1d65c1fbc4ec..f84b5f2c817c 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -533,7 +533,8 @@ struct drm_amdgpu_gem_op {
 struct drm_amdgpu_gem_va {
 	/** GEM object handle */
 	__u32 handle;
-	__u32 _pad;
+	/** Optional DRM Syncobj to signal when operation completes */
+	__u32 syncobj;
 	/** AMDGPU_VA_OP_* */
 	__u32 operation;
 	/** AMDGPU_VM_PAGE_* */
@@ -544,6 +545,8 @@ struct drm_amdgpu_gem_va {
 	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
 	__u64 map_size;
+	/** Optional Syncobj timeline point to signal */
+	__u64 timeline_point;
 };
 
 #define AMDGPU_HW_IP_GFX          0
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations
  2022-03-31  9:47 ` [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations Christian König
@ 2022-04-04  1:32   ` Bas Nieuwenhuizen
  2022-04-04  6:43     ` Christian König
  0 siblings, 1 reply; 11+ messages in thread
From: Bas Nieuwenhuizen @ 2022-04-04  1:32 UTC (permalink / raw)
  To: Christian König
  Cc: Alex Deucher, Christian König, amd-gfx mailing list

Hi Christian,

Couple of concerns here:

1) This is missing the option to wait on a syncobj before executing the VA op.
2) There are no mechanisms to disable implicit sync yet.
3) in radv we typically signal multiple syncobj, so it would be nice
if we could have that capability here too. Technically we can use the
transfer ioctls, but it is kinda annoying.

I had https://github.com/BNieuwenhuizen/linux/commit/d8a1cce0c4c5c87522ae8866faf4f67be7189ef0
+ radv implementation before we ended up in the situation of waits not
being a doable thing.

For (1) we can emulate in userspace by waiting for all syncobj to
signal first, but I'm concerned that will result in GPU bubbles due to
CPU work.  To test this out I'm starting to hook up more implicit sync
disabling stuff (starting with the submissions themselves, WIP at
https://github.com/BNieuwenhuizen/linux/tree/no-implicit-sync), which
is why you're seeing some random comments on your dma resv usage
series coming your way.

- Bas


On Thu, Mar 31, 2022 at 11:47 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> To improve synchronization of command submissions with page table updates RADV
> wants to manually wait for the updates to be completed without affecting
> parallel submissions.
>
> Implement this by allowing to specify a drm_sync_obj handle and a timeline
> point for the GEM_VA IOCTL.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 79 ++++++++++++++++++++-----
>  include/uapi/drm/amdgpu_drm.h           |  5 +-
>  2 files changed, 67 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9cdfee67efeb..bf0092f629f9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -33,6 +33,7 @@
>
>  #include <drm/amdgpu_drm.h>
>  #include <drm/drm_drv.h>
> +#include <drm/drm_syncobj.h>
>  #include <drm/drm_gem_ttm_helper.h>
>
>  #include "amdgpu.h"
> @@ -598,6 +599,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>   * @vm: vm to update
>   * @bo_va: bo_va to update
>   * @operation: map, unmap or clear
> + * @last_update: optional pointer to a dma_fence for the last VM update
>   *
>   * Update the bo_va directly after setting its address. Errors are not
>   * vital here, so they are not reported back to userspace.
> @@ -605,20 +607,21 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>  static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>                                     struct amdgpu_vm *vm,
>                                     struct amdgpu_bo_va *bo_va,
> -                                   uint32_t operation)
> +                                   uint32_t operation,
> +                                   struct dma_fence **last_update)
>  {
>         int r;
>
>         if (!amdgpu_vm_ready(vm))
>                 return;
>
> -       r = amdgpu_vm_clear_freed(adev, vm, NULL);
> +       r = amdgpu_vm_clear_freed(adev, vm, last_update);
>         if (r)
>                 goto error;
>
>         if (operation == AMDGPU_VA_OP_MAP ||
>             operation == AMDGPU_VA_OP_REPLACE) {
> -               r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
> +               r = amdgpu_vm_bo_update(adev, bo_va, false, last_update);
>                 if (r)
>                         goto error;
>         }
> @@ -671,6 +674,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>         struct drm_gem_object *gobj;
>         struct amdgpu_device *adev = drm_to_adev(dev);
>         struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +       struct dma_fence *fence = dma_fence_get_stub();
> +       struct dma_fence_chain *chain = NULL;
> +       struct drm_syncobj *syncobj = NULL;
>         struct amdgpu_bo *abo;
>         struct amdgpu_bo_va *bo_va;
>         struct amdgpu_bo_list_entry vm_pd;
> @@ -714,17 +720,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>                 return -EINVAL;
>         }
>
> -       switch (args->operation) {
> -       case AMDGPU_VA_OP_MAP:
> -       case AMDGPU_VA_OP_UNMAP:
> -       case AMDGPU_VA_OP_CLEAR:
> -       case AMDGPU_VA_OP_REPLACE:
> -               break;
> -       default:
> -               dev_dbg(dev->dev, "unsupported operation %d\n",
> -                       args->operation);
> -               return -EINVAL;
> -       }
> +       /* For debugging delay all VM updates till CS time */
> +       if (!amdgpu_vm_debug)
> +               args->flags |= AMDGPU_VM_DELAY_UPDATE;
>
>         INIT_LIST_HEAD(&list);
>         INIT_LIST_HEAD(&duplicates);
> @@ -763,6 +761,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>                 bo_va = NULL;
>         }
>
> +       if (args->syncobj) {
> +               syncobj = drm_syncobj_find(filp, args->syncobj);
> +               if (!syncobj) {
> +                       r = -EINVAL;
> +                       goto error_backoff;
> +               }
> +
> +               if (args->timeline_point) {
> +                       chain = dma_fence_chain_alloc();
> +                       if (!chain) {
> +                               r = -ENOMEM;
> +                               goto error_put_syncobj;
> +                       }
> +               }
> +
> +               /*
> +                * Update the VM once before to make sure there are no other
> +                * pending updates.
> +                */
> +               if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
> +                       amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> +                                               args->operation, NULL);
> +       }
> +
>         switch (args->operation) {
>         case AMDGPU_VA_OP_MAP:
>                 va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
> @@ -786,17 +808,42 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>                                              va_flags);
>                 break;
>         default:
> +               dev_dbg(dev->dev, "unsupported operation %d\n",
> +                       args->operation);
> +               r = -EINVAL;
>                 break;
>         }
> -       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
> +       if (r)
> +               goto error_free_chain;
> +
> +       if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
>                 amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> -                                       args->operation);
> +                                       args->operation, syncobj ?
> +                                       &fence : NULL);
> +
> +       if (syncobj) {
> +               if (chain) {
> +                       drm_syncobj_add_point(syncobj, chain, fence,
> +                                             args->timeline_point);
> +                       chain = NULL;
> +               } else {
> +                       drm_syncobj_replace_fence(syncobj, fence);
> +               }
> +       }
> +
> +error_free_chain:
> +       dma_fence_chain_free(chain);
> +
> +error_put_syncobj:
> +       if (syncobj)
> +               drm_syncobj_put(syncobj);
>
>  error_backoff:
>         ttm_eu_backoff_reservation(&ticket, &list);
>
>  error_unref:
>         drm_gem_object_put(gobj);
> +       dma_fence_put(fence);
>         return r;
>  }
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 1d65c1fbc4ec..f84b5f2c817c 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -533,7 +533,8 @@ struct drm_amdgpu_gem_op {
>  struct drm_amdgpu_gem_va {
>         /** GEM object handle */
>         __u32 handle;
> -       __u32 _pad;
> +       /** Optional DRM Syncobj to signal when operation completes */
> +       __u32 syncobj;
>         /** AMDGPU_VA_OP_* */
>         __u32 operation;
>         /** AMDGPU_VM_PAGE_* */
> @@ -544,6 +545,8 @@ struct drm_amdgpu_gem_va {
>         __u64 offset_in_bo;
>         /** Specify mapping size. Must be correctly aligned. */
>         __u64 map_size;
> +       /** Optional Syncobj timeline point to signal */
> +       __u64 timeline_point;
>  };
>
>  #define AMDGPU_HW_IP_GFX          0
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations
  2022-04-04  1:32   ` Bas Nieuwenhuizen
@ 2022-04-04  6:43     ` Christian König
  2022-04-06  2:25       ` Bas Nieuwenhuizen
  0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2022-04-04  6:43 UTC (permalink / raw)
  To: Bas Nieuwenhuizen, Christian König
  Cc: Alex Deucher, amd-gfx mailing list

Hi Bas,

Am 04.04.22 um 03:32 schrieb Bas Nieuwenhuizen:
> Hi Christian,
>
> Couple of concerns here:
>
> 1) This is missing the option to wait on a syncobj before executing the VA op.

Uff, that was not really planned in any way.

Currently all VM operations are scheduled to run behind all CS 
operations and that is not easily changeable.

In other words we planned that disable the VM->CS implicit sync, but not 
CS->VM.

> 2) There are no mechanisms to disable implicit sync yet.

Specifying a sync_obj will do that.

> 3) in radv we typically signal multiple syncobj, so it would be nice
> if we could have that capability here too. Technically we can use the
> transfer ioctls, but it is kinda annoying.

That shouldn't be much of a problem.

> I had https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FBNieuwenhuizen%2Flinux%2Fcommit%2Fd8a1cce0c4c5c87522ae8866faf4f67be7189ef0&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C6d4dec03a52b4ce510b208da15daf2fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637846327828400310%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=K6J%2FHb2NAL8NpoTkaqmpjq4xJS9ozpu2UJBZwSA8OIk%3D&amp;reserved=0
> + radv implementation before we ended up in the situation of waits not
> being a doable thing.

Well I changed the TLB handling so that waits are doable now :)

>
> For (1) we can emulate in userspace by waiting for all syncobj to
> signal first, but I'm concerned that will result in GPU bubbles due to
> CPU work.  To test this out I'm starting to hook up more implicit sync
> disabling stuff (starting with the submissions themselves, WIP at
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FBNieuwenhuizen%2Flinux%2Ftree%2Fno-implicit-sync&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C6d4dec03a52b4ce510b208da15daf2fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637846327828400310%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=taf3fjRYd2OT9GR%2FgtBsCcoIOtOW0pYjdcsGAe%2BnJSw%3D&amp;reserved=0), which
> is why you're seeing some random comments on your dma resv usage
> series coming your way.

Which is rather welcomed. That patch set certainly needs more eyes on it.

Thanks,
Christian.

>
> - Bas
>
>
> On Thu, Mar 31, 2022 at 11:47 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> To improve synchronization of command submissions with page table updates RADV
>> wants to manually wait for the updates to be completed without affecting
>> parallel submissions.
>>
>> Implement this by allowing to specify a drm_sync_obj handle and a timeline
>> point for the GEM_VA IOCTL.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 79 ++++++++++++++++++++-----
>>   include/uapi/drm/amdgpu_drm.h           |  5 +-
>>   2 files changed, 67 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 9cdfee67efeb..bf0092f629f9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -33,6 +33,7 @@
>>
>>   #include <drm/amdgpu_drm.h>
>>   #include <drm/drm_drv.h>
>> +#include <drm/drm_syncobj.h>
>>   #include <drm/drm_gem_ttm_helper.h>
>>
>>   #include "amdgpu.h"
>> @@ -598,6 +599,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>>    * @vm: vm to update
>>    * @bo_va: bo_va to update
>>    * @operation: map, unmap or clear
>> + * @last_update: optional pointer to a dma_fence for the last VM update
>>    *
>>    * Update the bo_va directly after setting its address. Errors are not
>>    * vital here, so they are not reported back to userspace.
>> @@ -605,20 +607,21 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>>   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>>                                      struct amdgpu_vm *vm,
>>                                      struct amdgpu_bo_va *bo_va,
>> -                                   uint32_t operation)
>> +                                   uint32_t operation,
>> +                                   struct dma_fence **last_update)
>>   {
>>          int r;
>>
>>          if (!amdgpu_vm_ready(vm))
>>                  return;
>>
>> -       r = amdgpu_vm_clear_freed(adev, vm, NULL);
>> +       r = amdgpu_vm_clear_freed(adev, vm, last_update);
>>          if (r)
>>                  goto error;
>>
>>          if (operation == AMDGPU_VA_OP_MAP ||
>>              operation == AMDGPU_VA_OP_REPLACE) {
>> -               r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>> +               r = amdgpu_vm_bo_update(adev, bo_va, false, last_update);
>>                  if (r)
>>                          goto error;
>>          }
>> @@ -671,6 +674,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>          struct drm_gem_object *gobj;
>>          struct amdgpu_device *adev = drm_to_adev(dev);
>>          struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +       struct dma_fence *fence = dma_fence_get_stub();
>> +       struct dma_fence_chain *chain = NULL;
>> +       struct drm_syncobj *syncobj = NULL;
>>          struct amdgpu_bo *abo;
>>          struct amdgpu_bo_va *bo_va;
>>          struct amdgpu_bo_list_entry vm_pd;
>> @@ -714,17 +720,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>                  return -EINVAL;
>>          }
>>
>> -       switch (args->operation) {
>> -       case AMDGPU_VA_OP_MAP:
>> -       case AMDGPU_VA_OP_UNMAP:
>> -       case AMDGPU_VA_OP_CLEAR:
>> -       case AMDGPU_VA_OP_REPLACE:
>> -               break;
>> -       default:
>> -               dev_dbg(dev->dev, "unsupported operation %d\n",
>> -                       args->operation);
>> -               return -EINVAL;
>> -       }
>> +       /* For debugging delay all VM updates till CS time */
>> +       if (!amdgpu_vm_debug)
>> +               args->flags |= AMDGPU_VM_DELAY_UPDATE;
>>
>>          INIT_LIST_HEAD(&list);
>>          INIT_LIST_HEAD(&duplicates);
>> @@ -763,6 +761,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>                  bo_va = NULL;
>>          }
>>
>> +       if (args->syncobj) {
>> +               syncobj = drm_syncobj_find(filp, args->syncobj);
>> +               if (!syncobj) {
>> +                       r = -EINVAL;
>> +                       goto error_backoff;
>> +               }
>> +
>> +               if (args->timeline_point) {
>> +                       chain = dma_fence_chain_alloc();
>> +                       if (!chain) {
>> +                               r = -ENOMEM;
>> +                               goto error_put_syncobj;
>> +                       }
>> +               }
>> +
>> +               /*
>> +                * Update the VM once before to make sure there are no other
>> +                * pending updates.
>> +                */
>> +               if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
>> +                       amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>> +                                               args->operation, NULL);
>> +       }
>> +
>>          switch (args->operation) {
>>          case AMDGPU_VA_OP_MAP:
>>                  va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
>> @@ -786,17 +808,42 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>                                               va_flags);
>>                  break;
>>          default:
>> +               dev_dbg(dev->dev, "unsupported operation %d\n",
>> +                       args->operation);
>> +               r = -EINVAL;
>>                  break;
>>          }
>> -       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
>> +       if (r)
>> +               goto error_free_chain;
>> +
>> +       if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
>>                  amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>> -                                       args->operation);
>> +                                       args->operation, syncobj ?
>> +                                       &fence : NULL);
>> +
>> +       if (syncobj) {
>> +               if (chain) {
>> +                       drm_syncobj_add_point(syncobj, chain, fence,
>> +                                             args->timeline_point);
>> +                       chain = NULL;
>> +               } else {
>> +                       drm_syncobj_replace_fence(syncobj, fence);
>> +               }
>> +       }
>> +
>> +error_free_chain:
>> +       dma_fence_chain_free(chain);
>> +
>> +error_put_syncobj:
>> +       if (syncobj)
>> +               drm_syncobj_put(syncobj);
>>
>>   error_backoff:
>>          ttm_eu_backoff_reservation(&ticket, &list);
>>
>>   error_unref:
>>          drm_gem_object_put(gobj);
>> +       dma_fence_put(fence);
>>          return r;
>>   }
>>
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 1d65c1fbc4ec..f84b5f2c817c 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -533,7 +533,8 @@ struct drm_amdgpu_gem_op {
>>   struct drm_amdgpu_gem_va {
>>          /** GEM object handle */
>>          __u32 handle;
>> -       __u32 _pad;
>> +       /** Optional DRM Syncobj to signal when operation completes */
>> +       __u32 syncobj;
>>          /** AMDGPU_VA_OP_* */
>>          __u32 operation;
>>          /** AMDGPU_VM_PAGE_* */
>> @@ -544,6 +545,8 @@ struct drm_amdgpu_gem_va {
>>          __u64 offset_in_bo;
>>          /** Specify mapping size. Must be correctly aligned. */
>>          __u64 map_size;
>> +       /** Optional Syncobj timeline point to signal */
>> +       __u64 timeline_point;
>>   };
>>
>>   #define AMDGPU_HW_IP_GFX          0
>> --
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations
  2022-04-04  6:43     ` Christian König
@ 2022-04-06  2:25       ` Bas Nieuwenhuizen
  2022-04-16 18:43         ` Bas Nieuwenhuizen
  0 siblings, 1 reply; 11+ messages in thread
From: Bas Nieuwenhuizen @ 2022-04-06  2:25 UTC (permalink / raw)
  To: Christian König
  Cc: Alex Deucher, Christian König, amd-gfx mailing list

On Mon, Apr 4, 2022 at 8:43 AM Christian König <christian.koenig@amd.com> wrote:
>
> Hi Bas,
>
> Am 04.04.22 um 03:32 schrieb Bas Nieuwenhuizen:
> > Hi Christian,
> >
> > Couple of concerns here:
> >
> > 1) This is missing the option to wait on a syncobj before executing the VA op.
>
> Uff, that was not really planned in any way.
>
> Currently all VM operations are scheduled to run behind all CS
> operations and that is not easily changeable.
>
> In other words we planned that disable the VM->CS implicit sync, but not
> CS->VM.
>
> > 2) There are no mechanisms to disable implicit sync yet.
>
> Specifying a sync_obj will do that.
>
> > 3) in radv we typically signal multiple syncobj, so it would be nice
> > if we could have that capability here too. Technically we can use the
> > transfer ioctls, but it is kinda annoying.
>
> That shouldn't be much of a problem.
>
> > I had https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FBNieuwenhuizen%2Flinux%2Fcommit%2Fd8a1cce0c4c5c87522ae8866faf4f67be7189ef0&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C6d4dec03a52b4ce510b208da15daf2fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637846327828400310%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=K6J%2FHb2NAL8NpoTkaqmpjq4xJS9ozpu2UJBZwSA8OIk%3D&amp;reserved=0
> > + radv implementation before we ended up in the situation of waits not
> > being a doable thing.
>
> Well I changed the TLB handling so that waits are doable now :)
>
> >
> > For (1) we can emulate in userspace by waiting for all syncobj to
> > signal first, but I'm concerned that will result in GPU bubbles due to
> > CPU work.  To test this out I'm starting to hook up more implicit sync
> > disabling stuff (starting with the submissions themselves, WIP at
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FBNieuwenhuizen%2Flinux%2Ftree%2Fno-implicit-sync&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C6d4dec03a52b4ce510b208da15daf2fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637846327828400310%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=taf3fjRYd2OT9GR%2FgtBsCcoIOtOW0pYjdcsGAe%2BnJSw%3D&amp;reserved=0), which
> > is why you're seeing some random comments on your dma resv usage
> > series coming your way.
>
> Which is rather welcomed. That patch set certainly needs more eyes on it.
>
> Thanks,
> Christian.
>
> >
> > - Bas
> >
> >
> > On Thu, Mar 31, 2022 at 11:47 AM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> To improve synchronization of command submissions with page table updates RADV
> >> wants to manually wait for the updates to be completed without affecting
> >> parallel submissions.
> >>
> >> Implement this by allowing to specify a drm_sync_obj handle and a timeline
> >> point for the GEM_VA IOCTL.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 79 ++++++++++++++++++++-----
> >>   include/uapi/drm/amdgpu_drm.h           |  5 +-
> >>   2 files changed, 67 insertions(+), 17 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> index 9cdfee67efeb..bf0092f629f9 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> >> @@ -33,6 +33,7 @@
> >>
> >>   #include <drm/amdgpu_drm.h>
> >>   #include <drm/drm_drv.h>
> >> +#include <drm/drm_syncobj.h>
> >>   #include <drm/drm_gem_ttm_helper.h>
> >>
> >>   #include "amdgpu.h"
> >> @@ -598,6 +599,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
> >>    * @vm: vm to update
> >>    * @bo_va: bo_va to update
> >>    * @operation: map, unmap or clear
> >> + * @last_update: optional pointer to a dma_fence for the last VM update
> >>    *
> >>    * Update the bo_va directly after setting its address. Errors are not
> >>    * vital here, so they are not reported back to userspace.
> >> @@ -605,20 +607,21 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
> >>   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> >>                                      struct amdgpu_vm *vm,
> >>                                      struct amdgpu_bo_va *bo_va,
> >> -                                   uint32_t operation)
> >> +                                   uint32_t operation,
> >> +                                   struct dma_fence **last_update)
> >>   {
> >>          int r;
> >>
> >>          if (!amdgpu_vm_ready(vm))
> >>                  return;
> >>
> >> -       r = amdgpu_vm_clear_freed(adev, vm, NULL);
> >> +       r = amdgpu_vm_clear_freed(adev, vm, last_update);
> >>          if (r)
> >>                  goto error;
> >>
> >>          if (operation == AMDGPU_VA_OP_MAP ||
> >>              operation == AMDGPU_VA_OP_REPLACE) {
> >> -               r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
> >> +               r = amdgpu_vm_bo_update(adev, bo_va, false, last_update);
> >>                  if (r)
> >>                          goto error;
> >>          }
> >> @@ -671,6 +674,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> >>          struct drm_gem_object *gobj;
> >>          struct amdgpu_device *adev = drm_to_adev(dev);
> >>          struct amdgpu_fpriv *fpriv = filp->driver_priv;
> >> +       struct dma_fence *fence = dma_fence_get_stub();
> >> +       struct dma_fence_chain *chain = NULL;
> >> +       struct drm_syncobj *syncobj = NULL;
> >>          struct amdgpu_bo *abo;
> >>          struct amdgpu_bo_va *bo_va;
> >>          struct amdgpu_bo_list_entry vm_pd;
> >> @@ -714,17 +720,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> >>                  return -EINVAL;
> >>          }
> >>
> >> -       switch (args->operation) {
> >> -       case AMDGPU_VA_OP_MAP:
> >> -       case AMDGPU_VA_OP_UNMAP:
> >> -       case AMDGPU_VA_OP_CLEAR:
> >> -       case AMDGPU_VA_OP_REPLACE:
> >> -               break;
> >> -       default:
> >> -               dev_dbg(dev->dev, "unsupported operation %d\n",
> >> -                       args->operation);
> >> -               return -EINVAL;
> >> -       }
> >> +       /* For debugging delay all VM updates till CS time */
> >> +       if (!amdgpu_vm_debug)
> >> +               args->flags |= AMDGPU_VM_DELAY_UPDATE;

FWIW in my testing syncobj + AMDGPU_VM_DELAY_UPDATE seems to make the
explicit sync not work (because we don't execute the map operations,
and then the delayed operations don't know about the syncobj). So
besides deleting this test code, should we reject
AMDGPU_VM_DELAY_UPDATE + syncobj?

With that I've had success getting some overlap (e.g.
https://drive.google.com/file/d/1ER1fmx6jmZi1yHDyn0gefitmqHUGM0iL/view?usp=sharing),
though I still got a bit more than expected implicit synchronization
going on due to creation of new buffers and their initial map
operations. Though that seems to be squarely a radv problem :)

As a side note, are MAP/UNMAP operations without AMDGPU_VM_PAGE_PRT
valid or do we need to guard for them?


> >>
> >>          INIT_LIST_HEAD(&list);
> >>          INIT_LIST_HEAD(&duplicates);
> >> @@ -763,6 +761,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> >>                  bo_va = NULL;
> >>          }
> >>
> >> +       if (args->syncobj) {
> >> +               syncobj = drm_syncobj_find(filp, args->syncobj);
> >> +               if (!syncobj) {
> >> +                       r = -EINVAL;
> >> +                       goto error_backoff;
> >> +               }
> >> +
> >> +               if (args->timeline_point) {
> >> +                       chain = dma_fence_chain_alloc();
> >> +                       if (!chain) {
> >> +                               r = -ENOMEM;
> >> +                               goto error_put_syncobj;
> >> +                       }
> >> +               }
> >> +
> >> +               /*
> >> +                * Update the VM once before to make sure there are no other
> >> +                * pending updates.
> >> +                */
> >> +               if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
> >> +                       amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> >> +                                               args->operation, NULL);
> >> +       }
> >> +
> >>          switch (args->operation) {
> >>          case AMDGPU_VA_OP_MAP:
> >>                  va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
> >> @@ -786,17 +808,42 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> >>                                               va_flags);
> >>                  break;
> >>          default:
> >> +               dev_dbg(dev->dev, "unsupported operation %d\n",
> >> +                       args->operation);
> >> +               r = -EINVAL;
> >>                  break;
> >>          }
> >> -       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
> >> +       if (r)
> >> +               goto error_free_chain;
> >> +
> >> +       if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
> >>                  amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> >> -                                       args->operation);
> >> +                                       args->operation, syncobj ?
> >> +                                       &fence : NULL);
> >> +
> >> +       if (syncobj) {
> >> +               if (chain) {
> >> +                       drm_syncobj_add_point(syncobj, chain, fence,
> >> +                                             args->timeline_point);
> >> +                       chain = NULL;
> >> +               } else {
> >> +                       drm_syncobj_replace_fence(syncobj, fence);
> >> +               }
> >> +       }
> >> +
> >> +error_free_chain:
> >> +       dma_fence_chain_free(chain);
> >> +
> >> +error_put_syncobj:
> >> +       if (syncobj)
> >> +               drm_syncobj_put(syncobj);
> >>
> >>   error_backoff:
> >>          ttm_eu_backoff_reservation(&ticket, &list);
> >>
> >>   error_unref:
> >>          drm_gem_object_put(gobj);
> >> +       dma_fence_put(fence);
> >>          return r;
> >>   }
> >>
> >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >> index 1d65c1fbc4ec..f84b5f2c817c 100644
> >> --- a/include/uapi/drm/amdgpu_drm.h
> >> +++ b/include/uapi/drm/amdgpu_drm.h
> >> @@ -533,7 +533,8 @@ struct drm_amdgpu_gem_op {
> >>   struct drm_amdgpu_gem_va {
> >>          /** GEM object handle */
> >>          __u32 handle;
> >> -       __u32 _pad;
> >> +       /** Optional DRM Syncobj to signal when operation completes */
> >> +       __u32 syncobj;
> >>          /** AMDGPU_VA_OP_* */
> >>          __u32 operation;
> >>          /** AMDGPU_VM_PAGE_* */
> >> @@ -544,6 +545,8 @@ struct drm_amdgpu_gem_va {
> >>          __u64 offset_in_bo;
> >>          /** Specify mapping size. Must be correctly aligned. */
> >>          __u64 map_size;
> >> +       /** Optional Syncobj timeline point to signal */
> >> +       __u64 timeline_point;
> >>   };
> >>
> >>   #define AMDGPU_HW_IP_GFX          0
> >> --
> >> 2.25.1
> >>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/5] drm/amdgpu: add last_update to amdgpu_vm_update_pdes
  2022-03-31  9:47 ` [PATCH 4/5] drm/amdgpu: add last_update to amdgpu_vm_update_pdes Christian König
@ 2022-04-06  2:26   ` Bas Nieuwenhuizen
  0 siblings, 0 replies; 11+ messages in thread
From: Bas Nieuwenhuizen @ 2022-04-06  2:26 UTC (permalink / raw)
  To: Christian König
  Cc: Alex Deucher, Christian König, amd-gfx mailing list

On Thu, Mar 31, 2022 at 11:47 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Allows separate tracking of VM updates.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           | 2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c          | 2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           | 9 +++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h           | 4 ++--
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c             | 2 +-
>  6 files changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 57b521bb1eec..92a6ca415ab8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -410,7 +410,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
>         struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
>         int ret;
>
> -       ret = amdgpu_vm_update_pdes(adev, vm, false);
> +       ret = amdgpu_vm_update_pdes(adev, vm, false, NULL);
>         if (ret)
>                 return ret;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 50fddd6715db..87daa75b57c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -849,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>         if (r)
>                 return r;
>
> -       r = amdgpu_vm_update_pdes(adev, vm, false);
> +       r = amdgpu_vm_update_pdes(adev, vm, false, NULL);
>         if (r)
>                 return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 3f84eedb4d96..9cdfee67efeb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -623,7 +623,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>                         goto error;
>         }
>
> -       r = amdgpu_vm_update_pdes(adev, vm, false);
> +       r = amdgpu_vm_update_pdes(adev, vm, false, NULL);
>
>  error:
>         if (r && r != -ERESTARTSYS)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 05024999556b..3391256e2448 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -725,14 +725,15 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
>   * @adev: amdgpu_device pointer
>   * @vm: requested vm
>   * @immediate: submit immediately to the paging queue
> + * @last:update: optional returned last update
>   *
>   * Makes sure all directories are up to date.
>   *
>   * Returns:
>   * 0 for success, error for failure.
>   */
> -int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
> -                         struct amdgpu_vm *vm, bool immediate)
> +int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +                         bool immediate, struct dma_fence **last_update)
>  {
>         struct amdgpu_vm_update_params params;
>         struct amdgpu_vm_bo_base *entry;
> @@ -759,7 +760,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>                         goto error;
>         }
>
> -       r = vm->update_funcs->commit(&params, &vm->last_update);
> +       r = vm->update_funcs->commit(&params, last_update ? : &vm->last_update);

This ternary is pretty incomplete.
>         if (r)
>                 goto error;
>
> @@ -2529,7 +2530,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>         if (r)
>                 goto error_unlock;
>
> -       r = amdgpu_vm_update_pdes(adev, vm, true);
> +       r = amdgpu_vm_update_pdes(adev, vm, true, NULL);
>
>  error_unlock:
>         amdgpu_bo_unreserve(root);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 3c7df1b8e3c9..692ec7b51ac2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -394,8 +394,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>                               int (*callback)(void *p, struct amdgpu_bo *bo),
>                               void *param);
>  int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
> -int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
> -                         struct amdgpu_vm *vm, bool immediate);
> +int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +                         bool immediate, struct dma_fence **last_update);
>  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>                           struct amdgpu_vm *vm,
>                           struct dma_fence **fence);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 907b02045824..74824e3806d5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -1294,7 +1294,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>                 last_start = prange->start + i + 1;
>         }
>
> -       r = amdgpu_vm_update_pdes(adev, vm, false);
> +       r = amdgpu_vm_update_pdes(adev, vm, false, NULL);
>         if (r) {
>                 pr_debug("failed %d to update directories 0x%lx\n", r,
>                          prange->start);
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations
  2022-04-06  2:25       ` Bas Nieuwenhuizen
@ 2022-04-16 18:43         ` Bas Nieuwenhuizen
  2022-04-16 21:08           ` Bas Nieuwenhuizen
  0 siblings, 1 reply; 11+ messages in thread
From: Bas Nieuwenhuizen @ 2022-04-16 18:43 UTC (permalink / raw)
  To: Christian König
  Cc: Alex Deucher, Christian König, amd-gfx mailing list

On Wed, Apr 6, 2022 at 4:25 AM Bas Nieuwenhuizen
<bas@basnieuwenhuizen.nl> wrote:
>
> On Mon, Apr 4, 2022 at 8:43 AM Christian König <christian.koenig@amd.com> wrote:
> >
> > Hi Bas,
> >
> > Am 04.04.22 um 03:32 schrieb Bas Nieuwenhuizen:
> > > Hi Christian,
> > >
> > > Couple of concerns here:
> > >
> > > 1) This is missing the option to wait on a syncobj before executing the VA op.
> >
> > Uff, that was not really planned in any way.
> >
> > Currently all VM operations are scheduled to run behind all CS
> > operations and that is not easily changeable.
> >
> > In other words we planned that disable the VM->CS implicit sync, but not
> > CS->VM.
> >
> > > 2) There are no mechanisms to disable implicit sync yet.
> >
> > Specifying a sync_obj will do that.
> >
> > > 3) in radv we typically signal multiple syncobj, so it would be nice
> > > if we could have that capability here too. Technically we can use the
> > > transfer ioctls, but it is kinda annoying.
> >
> > That shouldn't be much of a problem.

Update: I don't really think this would help, and is not a big issue
anyway. The overhead is only 1-5% on the remap operation and the
alternative doesn't provide the savings I expected.
> >
> > > I had https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FBNieuwenhuizen%2Flinux%2Fcommit%2Fd8a1cce0c4c5c87522ae8866faf4f67be7189ef0&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C6d4dec03a52b4ce510b208da15daf2fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637846327828400310%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=K6J%2FHb2NAL8NpoTkaqmpjq4xJS9ozpu2UJBZwSA8OIk%3D&amp;reserved=0
> > > + radv implementation before we ended up in the situation of waits not
> > > being a doable thing.
> >
> > Well I changed the TLB handling so that waits are doable now :)
> >
> > >
> > > For (1) we can emulate in userspace by waiting for all syncobj to
> > > signal first, but I'm concerned that will result in GPU bubbles due to
> > > CPU work.  To test this out I'm starting to hook up more implicit sync
> > > disabling stuff (starting with the submissions themselves, WIP at
> > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FBNieuwenhuizen%2Flinux%2Ftree%2Fno-implicit-sync&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C6d4dec03a52b4ce510b208da15daf2fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637846327828400310%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=taf3fjRYd2OT9GR%2FgtBsCcoIOtOW0pYjdcsGAe%2BnJSw%3D&amp;reserved=0), which
> > > is why you're seeing some random comments on your dma resv usage
> > > series coming your way.
> >
> > Which is rather welcomed. That patch set certainly needs more eyes on it.
> >
> > Thanks,
> > Christian.
> >
> > >
> > > - Bas
> > >
> > >
> > > On Thu, Mar 31, 2022 at 11:47 AM Christian König
> > > <ckoenig.leichtzumerken@gmail.com> wrote:
> > >> To improve synchronization of command submissions with page table updates RADV
> > >> wants to manually wait for the updates to be completed without affecting
> > >> parallel submissions.
> > >>
> > >> Implement this by allowing to specify a drm_sync_obj handle and a timeline
> > >> point for the GEM_VA IOCTL.
> > >>
> > >> Signed-off-by: Christian König <christian.koenig@amd.com>
> > >> ---
> > >>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 79 ++++++++++++++++++++-----
> > >>   include/uapi/drm/amdgpu_drm.h           |  5 +-
> > >>   2 files changed, 67 insertions(+), 17 deletions(-)
> > >>
> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > >> index 9cdfee67efeb..bf0092f629f9 100644
> > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > >> @@ -33,6 +33,7 @@
> > >>
> > >>   #include <drm/amdgpu_drm.h>
> > >>   #include <drm/drm_drv.h>
> > >> +#include <drm/drm_syncobj.h>
> > >>   #include <drm/drm_gem_ttm_helper.h>
> > >>
> > >>   #include "amdgpu.h"
> > >> @@ -598,6 +599,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
> > >>    * @vm: vm to update
> > >>    * @bo_va: bo_va to update
> > >>    * @operation: map, unmap or clear
> > >> + * @last_update: optional pointer to a dma_fence for the last VM update
> > >>    *
> > >>    * Update the bo_va directly after setting its address. Errors are not
> > >>    * vital here, so they are not reported back to userspace.
> > >> @@ -605,20 +607,21 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
> > >>   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> > >>                                      struct amdgpu_vm *vm,
> > >>                                      struct amdgpu_bo_va *bo_va,
> > >> -                                   uint32_t operation)
> > >> +                                   uint32_t operation,
> > >> +                                   struct dma_fence **last_update)
> > >>   {
> > >>          int r;
> > >>
> > >>          if (!amdgpu_vm_ready(vm))
> > >>                  return;
> > >>
> > >> -       r = amdgpu_vm_clear_freed(adev, vm, NULL);
> > >> +       r = amdgpu_vm_clear_freed(adev, vm, last_update);
> > >>          if (r)
> > >>                  goto error;
> > >>
> > >>          if (operation == AMDGPU_VA_OP_MAP ||
> > >>              operation == AMDGPU_VA_OP_REPLACE) {
> > >> -               r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
> > >> +               r = amdgpu_vm_bo_update(adev, bo_va, false, last_update);
> > >>                  if (r)
> > >>                          goto error;

Two further issues here:

1) We're missing the fence in the pde update:
https://github.com/BNieuwenhuizen/linux/commit/0762af558151136a74681944ccb2d5e815f5cc57

2) in a replace, we're not emitting the shrunk mappings of the old
bo_va (e.g. the prt_va) in the ioctl, which ends up doing a mapping
update at CS time with implicit sync. I needed
https://github.com/BNieuwenhuizen/linux/commit/fe2559529f5ce3cafa4efb40b025d3deb8beab68
to avoid this, but I'm not sure about the lack of  reservation story
here (especially given you called something that looked like this
problem very deadlock prone in one of our earlier threads).


> > >>          }
> > >> @@ -671,6 +674,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> > >>          struct drm_gem_object *gobj;
> > >>          struct amdgpu_device *adev = drm_to_adev(dev);
> > >>          struct amdgpu_fpriv *fpriv = filp->driver_priv;
> > >> +       struct dma_fence *fence = dma_fence_get_stub();
> > >> +       struct dma_fence_chain *chain = NULL;
> > >> +       struct drm_syncobj *syncobj = NULL;
> > >>          struct amdgpu_bo *abo;
> > >>          struct amdgpu_bo_va *bo_va;
> > >>          struct amdgpu_bo_list_entry vm_pd;
> > >> @@ -714,17 +720,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> > >>                  return -EINVAL;
> > >>          }
> > >>
> > >> -       switch (args->operation) {
> > >> -       case AMDGPU_VA_OP_MAP:
> > >> -       case AMDGPU_VA_OP_UNMAP:
> > >> -       case AMDGPU_VA_OP_CLEAR:
> > >> -       case AMDGPU_VA_OP_REPLACE:
> > >> -               break;
> > >> -       default:
> > >> -               dev_dbg(dev->dev, "unsupported operation %d\n",
> > >> -                       args->operation);
> > >> -               return -EINVAL;
> > >> -       }
> > >> +       /* For debugging delay all VM updates till CS time */
> > >> +       if (!amdgpu_vm_debug)
> > >> +               args->flags |= AMDGPU_VM_DELAY_UPDATE;
>
> FWIW in my testing syncobj + AMDGPU_VM_DELAY_UPDATE seems to make the
> explicit sync not work (because we don't execute the map operations,
> and then the delayed operations don't know about the syncobj). So
> besides deleting this test code, should we reject
> AMDGPU_VM_DELAY_UPDATE + syncobj?
>
> With that I've had success getting some overlap (e.g.
> https://drive.google.com/file/d/1ER1fmx6jmZi1yHDyn0gefitmqHUGM0iL/view?usp=sharing),
> though I still got a bit more than expected implicit synchronization
> going on due to creation of new buffers and their initial map
> operations. Though that seems to be squarely a radv problem :)
>
> As a side note, are MAP/UNMAP operations without AMDGPU_VM_PAGE_PRT
> valid or do we need to guard for them?
>
>
> > >>
> > >>          INIT_LIST_HEAD(&list);
> > >>          INIT_LIST_HEAD(&duplicates);
> > >> @@ -763,6 +761,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> > >>                  bo_va = NULL;
> > >>          }
> > >>
> > >> +       if (args->syncobj) {
> > >> +               syncobj = drm_syncobj_find(filp, args->syncobj);
> > >> +               if (!syncobj) {
> > >> +                       r = -EINVAL;
> > >> +                       goto error_backoff;
> > >> +               }
> > >> +
> > >> +               if (args->timeline_point) {
> > >> +                       chain = dma_fence_chain_alloc();
> > >> +                       if (!chain) {
> > >> +                               r = -ENOMEM;
> > >> +                               goto error_put_syncobj;
> > >> +                       }
> > >> +               }
> > >> +
> > >> +               /*
> > >> +                * Update the VM once before to make sure there are no other
> > >> +                * pending updates.
> > >> +                */
> > >> +               if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
> > >> +                       amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> > >> +                                               args->operation, NULL);
> > >> +       }
> > >> +
> > >>          switch (args->operation) {
> > >>          case AMDGPU_VA_OP_MAP:
> > >>                  va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
> > >> @@ -786,17 +808,42 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> > >>                                               va_flags);
> > >>                  break;
> > >>          default:
> > >> +               dev_dbg(dev->dev, "unsupported operation %d\n",
> > >> +                       args->operation);
> > >> +               r = -EINVAL;
> > >>                  break;
> > >>          }
> > >> -       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
> > >> +       if (r)
> > >> +               goto error_free_chain;
> > >> +
> > >> +       if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
> > >>                  amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> > >> -                                       args->operation);
> > >> +                                       args->operation, syncobj ?
> > >> +                                       &fence : NULL);
> > >> +
> > >> +       if (syncobj) {
> > >> +               if (chain) {
> > >> +                       drm_syncobj_add_point(syncobj, chain, fence,
> > >> +                                             args->timeline_point);
> > >> +                       chain = NULL;
> > >> +               } else {
> > >> +                       drm_syncobj_replace_fence(syncobj, fence);
> > >> +               }
> > >> +       }
> > >> +
> > >> +error_free_chain:
> > >> +       dma_fence_chain_free(chain);
> > >> +
> > >> +error_put_syncobj:
> > >> +       if (syncobj)
> > >> +               drm_syncobj_put(syncobj);
> > >>
> > >>   error_backoff:
> > >>          ttm_eu_backoff_reservation(&ticket, &list);
> > >>
> > >>   error_unref:
> > >>          drm_gem_object_put(gobj);
> > >> +       dma_fence_put(fence);
> > >>          return r;
> > >>   }
> > >>
> > >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > >> index 1d65c1fbc4ec..f84b5f2c817c 100644
> > >> --- a/include/uapi/drm/amdgpu_drm.h
> > >> +++ b/include/uapi/drm/amdgpu_drm.h
> > >> @@ -533,7 +533,8 @@ struct drm_amdgpu_gem_op {
> > >>   struct drm_amdgpu_gem_va {
> > >>          /** GEM object handle */
> > >>          __u32 handle;
> > >> -       __u32 _pad;
> > >> +       /** Optional DRM Syncobj to signal when operation completes */
> > >> +       __u32 syncobj;
> > >>          /** AMDGPU_VA_OP_* */
> > >>          __u32 operation;
> > >>          /** AMDGPU_VM_PAGE_* */
> > >> @@ -544,6 +545,8 @@ struct drm_amdgpu_gem_va {
> > >>          __u64 offset_in_bo;
> > >>          /** Specify mapping size. Must be correctly aligned. */
> > >>          __u64 map_size;
> > >> +       /** Optional Syncobj timeline point to signal */
> > >> +       __u64 timeline_point;
> > >>   };
> > >>
> > >>   #define AMDGPU_HW_IP_GFX          0
> > >> --
> > >> 2.25.1
> > >>
> >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations
  2022-04-16 18:43         ` Bas Nieuwenhuizen
@ 2022-04-16 21:08           ` Bas Nieuwenhuizen
  0 siblings, 0 replies; 11+ messages in thread
From: Bas Nieuwenhuizen @ 2022-04-16 21:08 UTC (permalink / raw)
  To: Christian König
  Cc: Alex Deucher, Christian König, amd-gfx mailing list

On Sat, Apr 16, 2022 at 8:43 PM Bas Nieuwenhuizen
<bas@basnieuwenhuizen.nl> wrote:
>
> On Wed, Apr 6, 2022 at 4:25 AM Bas Nieuwenhuizen
> <bas@basnieuwenhuizen.nl> wrote:
> >
> > On Mon, Apr 4, 2022 at 8:43 AM Christian König <christian.koenig@amd.com> wrote:
> > >
> > > Hi Bas,
> > >
> > > Am 04.04.22 um 03:32 schrieb Bas Nieuwenhuizen:
> > > > Hi Christian,
> > > >
> > > > Couple of concerns here:
> > > >
> > > > 1) This is missing the option to wait on a syncobj before executing the VA op.
> > >
> > > Uff, that was not really planned in any way.
> > >
> > > Currently all VM operations are scheduled to run behind all CS
> > > operations and that is not easily changeable.
> > >
> > > In other words we planned that disable the VM->CS implicit sync, but not
> > > CS->VM.
> > >
> > > > 2) There are no mechanisms to disable implicit sync yet.
> > >
> > > Specifying a sync_obj will do that.
> > >
> > > > 3) in radv we typically signal multiple syncobj, so it would be nice
> > > > if we could have that capability here too. Technically we can use the
> > > > transfer ioctls, but it is kinda annoying.
> > >
> > > That shouldn't be much of a problem.
>
> Update: I don't really think this would help, and is not a big issue
> anyway. The overhead is only 1-5% on the remap operation and the
> alternative doesn't provide the savings I expected.
> > >
> > > > I had https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FBNieuwenhuizen%2Flinux%2Fcommit%2Fd8a1cce0c4c5c87522ae8866faf4f67be7189ef0&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C6d4dec03a52b4ce510b208da15daf2fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637846327828400310%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=K6J%2FHb2NAL8NpoTkaqmpjq4xJS9ozpu2UJBZwSA8OIk%3D&amp;reserved=0
> > > > + radv implementation before we ended up in the situation of waits not
> > > > being a doable thing.
> > >
> > > Well I changed the TLB handling so that waits are doable now :)
> > >
> > > >
> > > > For (1) we can emulate in userspace by waiting for all syncobj to
> > > > signal first, but I'm concerned that will result in GPU bubbles due to
> > > > CPU work.  To test this out I'm starting to hook up more implicit sync
> > > > disabling stuff (starting with the submissions themselves, WIP at
> > > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FBNieuwenhuizen%2Flinux%2Ftree%2Fno-implicit-sync&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C6d4dec03a52b4ce510b208da15daf2fa%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637846327828400310%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=taf3fjRYd2OT9GR%2FgtBsCcoIOtOW0pYjdcsGAe%2BnJSw%3D&amp;reserved=0), which
> > > > is why you're seeing some random comments on your dma resv usage
> > > > series coming your way.
> > >
> > > Which is rather welcomed. That patch set certainly needs more eyes on it.
> > >
> > > Thanks,
> > > Christian.
> > >
> > > >
> > > > - Bas
> > > >
> > > >
> > > > On Thu, Mar 31, 2022 at 11:47 AM Christian König
> > > > <ckoenig.leichtzumerken@gmail.com> wrote:
> > > >> To improve synchronization of command submissions with page table updates RADV
> > > >> wants to manually wait for the updates to be completed without affecting
> > > >> parallel submissions.
> > > >>
> > > >> Implement this by allowing to specify a drm_sync_obj handle and a timeline
> > > >> point for the GEM_VA IOCTL.
> > > >>
> > > >> Signed-off-by: Christian König <christian.koenig@amd.com>
> > > >> ---
> > > >>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 79 ++++++++++++++++++++-----
> > > >>   include/uapi/drm/amdgpu_drm.h           |  5 +-
> > > >>   2 files changed, 67 insertions(+), 17 deletions(-)
> > > >>
> > > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > > >> index 9cdfee67efeb..bf0092f629f9 100644
> > > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > > >> @@ -33,6 +33,7 @@
> > > >>
> > > >>   #include <drm/amdgpu_drm.h>
> > > >>   #include <drm/drm_drv.h>
> > > >> +#include <drm/drm_syncobj.h>
> > > >>   #include <drm/drm_gem_ttm_helper.h>
> > > >>
> > > >>   #include "amdgpu.h"
> > > >> @@ -598,6 +599,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
> > > >>    * @vm: vm to update
> > > >>    * @bo_va: bo_va to update
> > > >>    * @operation: map, unmap or clear
> > > >> + * @last_update: optional pointer to a dma_fence for the last VM update
> > > >>    *
> > > >>    * Update the bo_va directly after setting its address. Errors are not
> > > >>    * vital here, so they are not reported back to userspace.
> > > >> @@ -605,20 +607,21 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
> > > >>   static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> > > >>                                      struct amdgpu_vm *vm,
> > > >>                                      struct amdgpu_bo_va *bo_va,
> > > >> -                                   uint32_t operation)
> > > >> +                                   uint32_t operation,
> > > >> +                                   struct dma_fence **last_update)
> > > >>   {
> > > >>          int r;
> > > >>
> > > >>          if (!amdgpu_vm_ready(vm))
> > > >>                  return;
> > > >>
> > > >> -       r = amdgpu_vm_clear_freed(adev, vm, NULL);
> > > >> +       r = amdgpu_vm_clear_freed(adev, vm, last_update);
> > > >>          if (r)
> > > >>                  goto error;
> > > >>
> > > >>          if (operation == AMDGPU_VA_OP_MAP ||
> > > >>              operation == AMDGPU_VA_OP_REPLACE) {
> > > >> -               r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
> > > >> +               r = amdgpu_vm_bo_update(adev, bo_va, false, last_update);
> > > >>                  if (r)
> > > >>                          goto error;
>
> Two further issues here:
>
> 1) We're missing the fence in the pde update:
> https://github.com/BNieuwenhuizen/linux/commit/0762af558151136a74681944ccb2d5e815f5cc57
>
> 2) in a replace, we're not emitting the shrunk mappings of the old
> bo_va (e.g. the prt_va) in the ioctl, which ends up doing a mapping
> update at CS time with implicit sync. I needed
> https://github.com/BNieuwenhuizen/linux/commit/fe2559529f5ce3cafa4efb40b025d3deb8beab68
> to avoid this, but I'm not sure about the lack of  reservation story
> here (especially given you called something that looked like this
> problem very deadlock prone in one of our earlier threads).

also forgot: How do we trigger TLB flushes now?  I don't think we
update sync->last_vm_update anymore for these explicit fences, no?


>
>
> > > >>          }
> > > >> @@ -671,6 +674,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> > > >>          struct drm_gem_object *gobj;
> > > >>          struct amdgpu_device *adev = drm_to_adev(dev);
> > > >>          struct amdgpu_fpriv *fpriv = filp->driver_priv;
> > > >> +       struct dma_fence *fence = dma_fence_get_stub();
> > > >> +       struct dma_fence_chain *chain = NULL;
> > > >> +       struct drm_syncobj *syncobj = NULL;
> > > >>          struct amdgpu_bo *abo;
> > > >>          struct amdgpu_bo_va *bo_va;
> > > >>          struct amdgpu_bo_list_entry vm_pd;
> > > >> @@ -714,17 +720,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> > > >>                  return -EINVAL;
> > > >>          }
> > > >>
> > > >> -       switch (args->operation) {
> > > >> -       case AMDGPU_VA_OP_MAP:
> > > >> -       case AMDGPU_VA_OP_UNMAP:
> > > >> -       case AMDGPU_VA_OP_CLEAR:
> > > >> -       case AMDGPU_VA_OP_REPLACE:
> > > >> -               break;
> > > >> -       default:
> > > >> -               dev_dbg(dev->dev, "unsupported operation %d\n",
> > > >> -                       args->operation);
> > > >> -               return -EINVAL;
> > > >> -       }
> > > >> +       /* For debugging delay all VM updates till CS time */
> > > >> +       if (!amdgpu_vm_debug)
> > > >> +               args->flags |= AMDGPU_VM_DELAY_UPDATE;
> >
> > FWIW in my testing syncobj + AMDGPU_VM_DELAY_UPDATE seems to make the
> > explicit sync not work (because we don't execute the map operations,
> > and then the delayed operations don't know about the syncobj). So
> > besides deleting this test code, should we reject
> > AMDGPU_VM_DELAY_UPDATE + syncobj?
> >
> > With that I've had success getting some overlap (e.g.
> > https://drive.google.com/file/d/1ER1fmx6jmZi1yHDyn0gefitmqHUGM0iL/view?usp=sharing),
> > though I still got a bit more than expected implicit synchronization
> > going on due to creation of new buffers and their initial map
> > operations. Though that seems to be squarely a radv problem :)
> >
> > As a side note, are MAP/UNMAP operations without AMDGPU_VM_PAGE_PRT
> > valid or do we need to guard for them?
> >
> >
> > > >>
> > > >>          INIT_LIST_HEAD(&list);
> > > >>          INIT_LIST_HEAD(&duplicates);
> > > >> @@ -763,6 +761,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> > > >>                  bo_va = NULL;
> > > >>          }
> > > >>
> > > >> +       if (args->syncobj) {
> > > >> +               syncobj = drm_syncobj_find(filp, args->syncobj);
> > > >> +               if (!syncobj) {
> > > >> +                       r = -EINVAL;
> > > >> +                       goto error_backoff;
> > > >> +               }
> > > >> +
> > > >> +               if (args->timeline_point) {
> > > >> +                       chain = dma_fence_chain_alloc();
> > > >> +                       if (!chain) {
> > > >> +                               r = -ENOMEM;
> > > >> +                               goto error_put_syncobj;
> > > >> +                       }
> > > >> +               }
> > > >> +
> > > >> +               /*
> > > >> +                * Update the VM once before to make sure there are no other
> > > >> +                * pending updates.
> > > >> +                */
> > > >> +               if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
> > > >> +                       amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> > > >> +                                               args->operation, NULL);
> > > >> +       }
> > > >> +
> > > >>          switch (args->operation) {
> > > >>          case AMDGPU_VA_OP_MAP:
> > > >>                  va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
> > > >> @@ -786,17 +808,42 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> > > >>                                               va_flags);
> > > >>                  break;
> > > >>          default:
> > > >> +               dev_dbg(dev->dev, "unsupported operation %d\n",
> > > >> +                       args->operation);
> > > >> +               r = -EINVAL;
> > > >>                  break;
> > > >>          }
> > > >> -       if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
> > > >> +       if (r)
> > > >> +               goto error_free_chain;
> > > >> +
> > > >> +       if (!(args->flags & AMDGPU_VM_DELAY_UPDATE))
> > > >>                  amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> > > >> -                                       args->operation);
> > > >> +                                       args->operation, syncobj ?
> > > >> +                                       &fence : NULL);
> > > >> +
> > > >> +       if (syncobj) {
> > > >> +               if (chain) {
> > > >> +                       drm_syncobj_add_point(syncobj, chain, fence,
> > > >> +                                             args->timeline_point);
> > > >> +                       chain = NULL;
> > > >> +               } else {
> > > >> +                       drm_syncobj_replace_fence(syncobj, fence);
> > > >> +               }
> > > >> +       }
> > > >> +
> > > >> +error_free_chain:
> > > >> +       dma_fence_chain_free(chain);
> > > >> +
> > > >> +error_put_syncobj:
> > > >> +       if (syncobj)
> > > >> +               drm_syncobj_put(syncobj);
> > > >>
> > > >>   error_backoff:
> > > >>          ttm_eu_backoff_reservation(&ticket, &list);
> > > >>
> > > >>   error_unref:
> > > >>          drm_gem_object_put(gobj);
> > > >> +       dma_fence_put(fence);
> > > >>          return r;
> > > >>   }
> > > >>
> > > >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > > >> index 1d65c1fbc4ec..f84b5f2c817c 100644
> > > >> --- a/include/uapi/drm/amdgpu_drm.h
> > > >> +++ b/include/uapi/drm/amdgpu_drm.h
> > > >> @@ -533,7 +533,8 @@ struct drm_amdgpu_gem_op {
> > > >>   struct drm_amdgpu_gem_va {
> > > >>          /** GEM object handle */
> > > >>          __u32 handle;
> > > >> -       __u32 _pad;
> > > >> +       /** Optional DRM Syncobj to signal when operation completes */
> > > >> +       __u32 syncobj;
> > > >>          /** AMDGPU_VA_OP_* */
> > > >>          __u32 operation;
> > > >>          /** AMDGPU_VM_PAGE_* */
> > > >> @@ -544,6 +545,8 @@ struct drm_amdgpu_gem_va {
> > > >>          __u64 offset_in_bo;
> > > >>          /** Specify mapping size. Must be correctly aligned. */
> > > >>          __u64 map_size;
> > > >> +       /** Optional Syncobj timeline point to signal */
> > > >> +       __u64 timeline_point;
> > > >>   };
> > > >>
> > > >>   #define AMDGPU_HW_IP_GFX          0
> > > >> --
> > > >> 2.25.1
> > > >>
> > >

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-04-16 21:09 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-31  9:47 [PATCH 1/5] drm/amdgpu: cleanup freeing delayed mappings Christian König
2022-03-31  9:47 ` [PATCH 2/5] drm/amdgpu: add separate last_clear tracking Christian König
2022-03-31  9:47 ` [PATCH 3/5] drm/amdgpu: add last update fence to amdgpu_vm_bo_update Christian König
2022-03-31  9:47 ` [PATCH 4/5] drm/amdgpu: add last_update to amdgpu_vm_update_pdes Christian König
2022-04-06  2:26   ` Bas Nieuwenhuizen
2022-03-31  9:47 ` [PATCH 5/5] drm/amdgpu: allow specifying a syncobj for VM map operations Christian König
2022-04-04  1:32   ` Bas Nieuwenhuizen
2022-04-04  6:43     ` Christian König
2022-04-06  2:25       ` Bas Nieuwenhuizen
2022-04-16 18:43         ` Bas Nieuwenhuizen
2022-04-16 21:08           ` Bas Nieuwenhuizen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).