All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL
@ 2024-04-10  5:40 Matthew Brost
  2024-04-10  5:40 ` [PATCH 01/13] drm/xe: Lock all gpuva ops during " Matthew Brost
                   ` (13 more replies)
  0 siblings, 14 replies; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost

Pacthes posted on [1] which have gone through a round of reviews.

Tested throughly on TGL which IGT suite.

Matt

[1] https://patchwork.freedesktop.org/series/125608/

Matthew Brost (13):
  drm/xe: Lock all gpuva ops during VM bind IOCTL
  drm/xe: Add ops_execute function which returns a fence
  drm/xe: Move migrate to prefetch to op_lock_and_prep function
  drm/xe: Add struct xe_vma_ops abstraction
  drm/xe: Use xe_vma_ops to implement xe_vm_rebind
  drm/xe: Simplify VM bind IOCTL error handling and cleanup
  drm/xe: Use xe_vma_ops to implement page fault rebinds
  drm/xe: Add some members to xe_vma_ops
  drm/xe: Add vm_bind_ioctl_ops_fini helper
  drm/xe: Move ufence check to op_lock
  drm/xe: Move ufence add to vm_bind_ioctl_ops_fini
  drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use
    this
  drm/xe: Delete PT update selftest

 drivers/gpu/drm/xe/tests/xe_migrate.c       |  86 ---
 drivers/gpu/drm/xe/xe_gt_pagefault.c        |  16 +-
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |  59 +-
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h |   3 +
 drivers/gpu/drm/xe/xe_pt.c                  |  25 +-
 drivers/gpu/drm/xe/xe_sync.c                |  15 +
 drivers/gpu/drm/xe/xe_sync.h                |   1 +
 drivers/gpu/drm/xe/xe_vm.c                  | 673 ++++++++++++--------
 drivers/gpu/drm/xe/xe_vm.h                  |   2 +
 drivers/gpu/drm/xe/xe_vm_types.h            |  22 +-
 10 files changed, 526 insertions(+), 376 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 01/13] drm/xe: Lock all gpuva ops during VM bind IOCTL
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-16 15:51   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 02/13] drm/xe: Add ops_execute function which returns a fence Matthew Brost
                   ` (12 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost, Oak Zeng

Lock all BOs used in gpuva ops and validate all BOs in a single step
during the VM bind IOCTL.

This help with the transition to making all gpuva ops in a VM bind IOCTL
a single atomic job which is required for proper error handling.

v2:
 - Better commit message (Oak)
 - s/op_lock/op_lock_and_prep, few other renames too (Oak)
 - Use DRM_EXEC_IGNORE_DUPLICATES flag in drm_exec_init (local testing)
 - Do not reserve slots in locking step (direction based on series from Thomas)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 147 +++++++++++++++++++++++++++----------
 1 file changed, 107 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 66b70fd3d105..6375c136e21a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -414,19 +414,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
 
 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
 
-static void xe_vm_kill(struct xe_vm *vm)
+static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
 {
 	struct xe_exec_queue *q;
 
 	lockdep_assert_held(&vm->lock);
 
-	xe_vm_lock(vm, false);
+	if (unlocked)
+		xe_vm_lock(vm, false);
+
 	vm->flags |= XE_VM_FLAG_BANNED;
 	trace_xe_vm_kill(vm);
 
 	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
 		q->ops->kill(q);
-	xe_vm_unlock(vm);
+
+	if (unlocked)
+		xe_vm_unlock(vm);
 
 	/* TODO: Inform user the VM is banned */
 }
@@ -656,7 +660,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
 
 	if (err) {
 		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
-		xe_vm_kill(vm);
+		xe_vm_kill(vm, true);
 	}
 	up_write(&vm->lock);
 
@@ -1876,17 +1880,9 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue
 		      u32 num_syncs, bool immediate, bool first_op,
 		      bool last_op)
 {
-	int err;
-
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(bo);
 
-	if (bo && immediate) {
-		err = xe_bo_validate(bo, vm, true);
-		if (err)
-			return err;
-	}
-
 	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
 			    last_op);
 }
@@ -2539,17 +2535,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	return 0;
 }
 
-static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
-		      struct xe_vma *vma, struct xe_vma_op *op)
+static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
+		      struct xe_vma_op *op)
 {
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
 
-	err = xe_vm_lock_vma(exec, vma);
-	if (err)
-		return err;
-
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
 
@@ -2630,19 +2622,10 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
 			       struct xe_vma_op *op)
 {
-	struct drm_exec exec;
 	int err;
 
 retry_userptr:
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
-	drm_exec_until_all_locked(&exec) {
-		err = op_execute(&exec, vm, vma, op);
-		drm_exec_retry_on_contention(&exec);
-		if (err)
-			break;
-	}
-	drm_exec_fini(&exec);
-
+	err = op_execute(vm, vma, op);
 	if (err == -EAGAIN) {
 		lockdep_assert_held_write(&vm->lock);
 
@@ -2807,29 +2790,113 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
 	}
 }
 
+static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
+				 bool validate)
+{
+	struct xe_bo *bo = xe_vma_bo(vma);
+	int err = 0;
+
+	if (bo) {
+		if (!bo->vm)
+			err = drm_exec_prepare_obj(exec, &bo->ttm.base, 0);
+		if (!err && validate)
+			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
+	}
+
+	return err;
+}
+
+static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
+			    struct xe_vma_op *op)
+{
+	int err = 0;
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err = vma_lock_and_validate(exec, op->map.vma,
+					    !xe_vm_in_fault_mode(vm));
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.remap.unmap->va),
+					    false);
+		if (!err && op->remap.prev)
+			err = vma_lock_and_validate(exec, op->remap.prev, true);
+		if (!err && op->remap.next)
+			err = vma_lock_and_validate(exec, op->remap.next, true);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.unmap.va),
+					    false);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.prefetch.va), true);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
+					   struct xe_vm *vm,
+					   struct list_head *ops_list)
+{
+	struct xe_vma_op *op;
+	int err;
+
+	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 0);
+	if (err)
+		return err;
+
+	list_for_each_entry(op, ops_list, link) {
+		err = op_lock_and_prep(exec, vm, op);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 				     struct list_head *ops_list)
 {
+	struct drm_exec exec;
 	struct xe_vma_op *op, *next;
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
 
-	list_for_each_entry_safe(op, next, ops_list, link) {
-		err = xe_vma_op_execute(vm, op);
-		if (err) {
-			drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
-				 op->base.op, err);
-			/*
-			 * FIXME: Killing VM rather than proper error handling
-			 */
-			xe_vm_kill(vm);
-			return -ENOSPC;
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list);
+		drm_exec_retry_on_contention(&exec);
+		if (err)
+			goto unlock;
+
+		list_for_each_entry_safe(op, next, ops_list, link) {
+			err = xe_vma_op_execute(vm, op);
+			if (err) {
+				drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
+					 op->base.op, err);
+				/*
+				 * FIXME: Killing VM rather than proper error handling
+				 */
+				xe_vm_kill(vm, false);
+				err = -ENOSPC;
+				goto unlock;
+			}
+			xe_vma_op_cleanup(vm, op);
 		}
-		xe_vma_op_cleanup(vm, op);
 	}
 
-	return 0;
+unlock:
+	drm_exec_fini(&exec);
+	return err;
 }
 
 #define SUPPORTED_FLAGS	\
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 02/13] drm/xe: Add ops_execute function which returns a fence
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
  2024-04-10  5:40 ` [PATCH 01/13] drm/xe: Lock all gpuva ops during " Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-18 16:16   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 03/13] drm/xe: Move migrate to prefetch to op_lock_and_prep function Matthew Brost
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost, Oak Zeng

Add ops_execute function which returns a fence. This will be helpful to
initiate all binds (VM bind IOCTL, rebinds in exec IOCTL, rebinds in
preempt rebind worker, and rebinds in pagefaults) via a gpuva ops list.
Returning a fence is needed in various paths.

v2:
 - Rebase

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 211 +++++++++++++++++++------------------
 1 file changed, 111 insertions(+), 100 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 6375c136e21a..84c6b10b4b78 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1834,16 +1834,17 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
 	return NULL;
 }
 
-static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
-			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
-			u32 num_syncs, bool immediate, bool first_op,
-			bool last_op)
+static struct dma_fence *
+xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
+	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
+	   bool immediate, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 	struct xe_user_fence *ufence;
 
 	xe_vm_assert_held(vm);
+	xe_bo_assert_held(bo);
 
 	ufence = find_ufence_get(syncs, num_syncs);
 	if (vma->ufence && ufence)
@@ -1855,7 +1856,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
 				       last_op);
 		if (IS_ERR(fence))
-			return PTR_ERR(fence);
+			return fence;
 	} else {
 		int i;
 
@@ -1870,26 +1871,14 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 
 	if (last_op)
 		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-	dma_fence_put(fence);
-
-	return 0;
-}
-
-static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
-		      struct xe_bo *bo, struct xe_sync_entry *syncs,
-		      u32 num_syncs, bool immediate, bool first_op,
-		      bool last_op)
-{
-	xe_vm_assert_held(vm);
-	xe_bo_assert_held(bo);
 
-	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
-			    last_op);
+	return fence;
 }
 
-static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
-			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
-			u32 num_syncs, bool first_op, bool last_op)
+static struct dma_fence *
+xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+	     struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+	     u32 num_syncs, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
@@ -1899,14 +1888,13 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 
 	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
 	if (IS_ERR(fence))
-		return PTR_ERR(fence);
+		return fence;
 
 	xe_vma_destroy(vma, fence);
 	if (last_op)
 		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-	dma_fence_put(fence);
 
-	return 0;
+	return fence;
 }
 
 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
@@ -2049,10 +2037,11 @@ static const u32 region_to_mem_type[] = {
 	XE_PL_VRAM1,
 };
 
-static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
-			  struct xe_exec_queue *q, u32 region,
-			  struct xe_sync_entry *syncs, u32 num_syncs,
-			  bool first_op, bool last_op)
+static struct dma_fence *
+xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+	       struct xe_exec_queue *q, u32 region,
+	       struct xe_sync_entry *syncs, u32 num_syncs,
+	       bool first_op, bool last_op)
 {
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 	int err;
@@ -2062,27 +2051,24 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 	if (!xe_vma_has_no_bo(vma)) {
 		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
 		if (err)
-			return err;
+			return ERR_PTR(err);
 	}
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
 		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
 				  true, first_op, last_op);
 	} else {
+		struct dma_fence *fence =
+			xe_exec_queue_last_fence_get(wait_exec_queue, vm);
 		int i;
 
 		/* Nothing to do, signal fences now */
 		if (last_op) {
-			for (i = 0; i < num_syncs; i++) {
-				struct dma_fence *fence =
-					xe_exec_queue_last_fence_get(wait_exec_queue, vm);
-
+			for (i = 0; i < num_syncs; i++)
 				xe_sync_entry_signal(&syncs[i], fence);
-				dma_fence_put(fence);
-			}
 		}
 
-		return 0;
+		return fence;
 	}
 }
 
@@ -2535,10 +2521,10 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	return 0;
 }
 
-static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
-		      struct xe_vma_op *op)
+static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
+				    struct xe_vma_op *op)
 {
-	int err;
+	struct dma_fence *fence = NULL;
 
 	lockdep_assert_held_write(&vm->lock);
 
@@ -2547,11 +2533,11 @@ static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
 
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
-		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
-				 op->syncs, op->num_syncs,
-				 op->map.immediate || !xe_vm_in_fault_mode(vm),
-				 op->flags & XE_VMA_OP_FIRST,
-				 op->flags & XE_VMA_OP_LAST);
+		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
+				   op->syncs, op->num_syncs,
+				   op->map.immediate || !xe_vm_in_fault_mode(vm),
+				   op->flags & XE_VMA_OP_FIRST,
+				   op->flags & XE_VMA_OP_LAST);
 		break;
 	case DRM_GPUVA_OP_REMAP:
 	{
@@ -2561,37 +2547,39 @@ static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
 		if (!op->remap.unmap_done) {
 			if (prev || next)
 				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
-			err = xe_vm_unbind(vm, vma, op->q, op->syncs,
-					   op->num_syncs,
-					   op->flags & XE_VMA_OP_FIRST,
-					   op->flags & XE_VMA_OP_LAST &&
-					   !prev && !next);
-			if (err)
+			fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+					     op->num_syncs,
+					     op->flags & XE_VMA_OP_FIRST,
+					     op->flags & XE_VMA_OP_LAST &&
+					     !prev && !next);
+			if (IS_ERR(fence))
 				break;
 			op->remap.unmap_done = true;
 		}
 
 		if (prev) {
 			op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
-			err = xe_vm_bind(vm, op->remap.prev, op->q,
-					 xe_vma_bo(op->remap.prev), op->syncs,
-					 op->num_syncs, true, false,
-					 op->flags & XE_VMA_OP_LAST && !next);
+			dma_fence_put(fence);
+			fence = xe_vm_bind(vm, op->remap.prev, op->q,
+					   xe_vma_bo(op->remap.prev), op->syncs,
+					   op->num_syncs, true, false,
+					   op->flags & XE_VMA_OP_LAST && !next);
 			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
-			if (err)
+			if (IS_ERR(fence))
 				break;
 			op->remap.prev = NULL;
 		}
 
 		if (next) {
 			op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
-			err = xe_vm_bind(vm, op->remap.next, op->q,
-					 xe_vma_bo(op->remap.next),
-					 op->syncs, op->num_syncs,
-					 true, false,
-					 op->flags & XE_VMA_OP_LAST);
+			dma_fence_put(fence);
+			fence = xe_vm_bind(vm, op->remap.next, op->q,
+					   xe_vma_bo(op->remap.next),
+					   op->syncs, op->num_syncs,
+					   true, false,
+					   op->flags & XE_VMA_OP_LAST);
 			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
-			if (err)
+			if (IS_ERR(fence))
 				break;
 			op->remap.next = NULL;
 		}
@@ -2599,34 +2587,36 @@ static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
 		break;
 	}
 	case DRM_GPUVA_OP_UNMAP:
-		err = xe_vm_unbind(vm, vma, op->q, op->syncs,
-				   op->num_syncs, op->flags & XE_VMA_OP_FIRST,
-				   op->flags & XE_VMA_OP_LAST);
+		fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+				     op->num_syncs, op->flags & XE_VMA_OP_FIRST,
+				     op->flags & XE_VMA_OP_LAST);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
-		err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
-				     op->syncs, op->num_syncs,
-				     op->flags & XE_VMA_OP_FIRST,
-				     op->flags & XE_VMA_OP_LAST);
+		fence = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
+				       op->syncs, op->num_syncs,
+				       op->flags & XE_VMA_OP_FIRST,
+				       op->flags & XE_VMA_OP_LAST);
 		break;
 	default:
 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
 
-	if (err)
+	if (IS_ERR(fence))
 		trace_xe_vma_fail(vma);
 
-	return err;
+	return fence;
 }
 
-static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
-			       struct xe_vma_op *op)
+static struct dma_fence *
+__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
+		    struct xe_vma_op *op)
 {
+	struct dma_fence *fence;
 	int err;
 
 retry_userptr:
-	err = op_execute(vm, vma, op);
-	if (err == -EAGAIN) {
+	fence = op_execute(vm, vma, op);
+	if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
 		lockdep_assert_held_write(&vm->lock);
 
 		if (op->base.op == DRM_GPUVA_OP_REMAP) {
@@ -2643,22 +2633,24 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
 			if (!err)
 				goto retry_userptr;
 
+			fence = ERR_PTR(err);
 			trace_xe_vma_fail(vma);
 		}
 	}
 
-	return err;
+	return fence;
 }
 
-static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
+static struct dma_fence *
+xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 {
-	int ret = 0;
+	struct dma_fence *fence = ERR_PTR(-ENOMEM);
 
 	lockdep_assert_held_write(&vm->lock);
 
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
-		ret = __xe_vma_op_execute(vm, op->map.vma, op);
+		fence = __xe_vma_op_execute(vm, op->map.vma, op);
 		break;
 	case DRM_GPUVA_OP_REMAP:
 	{
@@ -2671,23 +2663,23 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 		else
 			vma = op->remap.next;
 
-		ret = __xe_vma_op_execute(vm, vma, op);
+		fence = __xe_vma_op_execute(vm, vma, op);
 		break;
 	}
 	case DRM_GPUVA_OP_UNMAP:
-		ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
-					  op);
+		fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
+					    op);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
-		ret = __xe_vma_op_execute(vm,
-					  gpuva_to_vma(op->base.prefetch.va),
-					  op);
+		fence = __xe_vma_op_execute(vm,
+					    gpuva_to_vma(op->base.prefetch.va),
+					    op);
 		break;
 	default:
 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
 
-	return ret;
+	return fence;
 }
 
 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
@@ -2861,11 +2853,35 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 	return 0;
 }
 
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+				     struct list_head *ops_list,
+				     bool cleanup)
+{
+	struct xe_vma_op *op, *next;
+	struct dma_fence *fence = NULL;
+
+	list_for_each_entry_safe(op, next, ops_list, link) {
+		if (!IS_ERR(fence)) {
+			dma_fence_put(fence);
+			fence = xe_vma_op_execute(vm, op);
+		}
+		if (IS_ERR(fence)) {
+			drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld",
+				 op->base.op, PTR_ERR(fence));
+			fence = ERR_PTR(-ENOSPC);
+		}
+		if (cleanup)
+			xe_vma_op_cleanup(vm, op);
+	}
+
+	return fence;
+}
+
 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 				     struct list_head *ops_list)
 {
 	struct drm_exec exec;
-	struct xe_vma_op *op, *next;
+	struct dma_fence *fence;
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
@@ -2878,19 +2894,14 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 		if (err)
 			goto unlock;
 
-		list_for_each_entry_safe(op, next, ops_list, link) {
-			err = xe_vma_op_execute(vm, op);
-			if (err) {
-				drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
-					 op->base.op, err);
-				/*
-				 * FIXME: Killing VM rather than proper error handling
-				 */
-				xe_vm_kill(vm, false);
-				err = -ENOSPC;
-				goto unlock;
-			}
-			xe_vma_op_cleanup(vm, op);
+		fence = ops_execute(vm, ops_list, true);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			/* FIXME: Killing VM rather than proper error handling */
+			xe_vm_kill(vm, false);
+			goto unlock;
+		} else {
+			dma_fence_put(fence);
 		}
 	}
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 03/13] drm/xe: Move migrate to prefetch to op_lock_and_prep function
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
  2024-04-10  5:40 ` [PATCH 01/13] drm/xe: Lock all gpuva ops during " Matthew Brost
  2024-04-10  5:40 ` [PATCH 02/13] drm/xe: Add ops_execute function which returns a fence Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-18 19:27   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 04/13] drm/xe: Add struct xe_vma_ops abstraction Matthew Brost
                   ` (10 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost, Oak Zeng

All non-binding operations in VM bind IOCTL should be in the lock and
prepare step rather than the execution step. Move prefetch to conform to
this pattern.

v2:
 - Rebase
 - New function names (Oak)
 - Update stale comment (Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 84c6b10b4b78..2c0521573154 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2039,20 +2039,10 @@ static const u32 region_to_mem_type[] = {
 
 static struct dma_fence *
 xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
-	       struct xe_exec_queue *q, u32 region,
-	       struct xe_sync_entry *syncs, u32 num_syncs,
-	       bool first_op, bool last_op)
+	       struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+	       u32 num_syncs, bool first_op, bool last_op)
 {
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
-	int err;
-
-	xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
-
-	if (!xe_vma_has_no_bo(vma)) {
-		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
-		if (err)
-			return ERR_PTR(err);
-	}
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
 		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
@@ -2592,8 +2582,7 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 				     op->flags & XE_VMA_OP_LAST);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
-		fence = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
-				       op->syncs, op->num_syncs,
+		fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs,
 				       op->flags & XE_VMA_OP_FIRST,
 				       op->flags & XE_VMA_OP_LAST);
 		break;
@@ -2823,9 +2812,20 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 					    false);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+		u32 region = op->prefetch.region;
+
+		xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+
 		err = vma_lock_and_validate(exec,
-					    gpuva_to_vma(op->base.prefetch.va), true);
+					    gpuva_to_vma(op->base.prefetch.va),
+					    false);
+		if (!err && !xe_vma_has_no_bo(vma))
+			err = xe_bo_migrate(xe_vma_bo(vma),
+					    region_to_mem_type[region]);
 		break;
+	}
 	default:
 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 04/13] drm/xe: Add struct xe_vma_ops abstraction
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (2 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 03/13] drm/xe: Move migrate to prefetch to op_lock_and_prep function Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-10  5:40 ` [PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind Matthew Brost
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost, Oak Zeng

Having a structure which encapsulates a list of VMA operations will help
enable 1 job for the entire list.

v2:
 - Rebase

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       | 37 ++++++++++++++++++--------------
 drivers/gpu/drm/xe/xe_vm_types.h |  7 ++++++
 2 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 2c0521573154..4cd485d5bc0a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2354,7 +2354,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				   struct drm_gpuva_ops *ops,
 				   struct xe_sync_entry *syncs, u32 num_syncs,
-				   struct list_head *ops_list, bool last)
+				   struct xe_vma_ops *vops, bool last)
 {
 	struct xe_device *xe = vm->xe;
 	struct xe_vma_op *last_op = NULL;
@@ -2366,11 +2366,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	drm_gpuva_for_each_op(__op, ops) {
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 		struct xe_vma *vma;
-		bool first = list_empty(ops_list);
+		bool first = list_empty(&vops->list);
 		unsigned int flags = 0;
 
 		INIT_LIST_HEAD(&op->link);
-		list_add_tail(&op->link, ops_list);
+		list_add_tail(&op->link, &vops->list);
 
 		if (first) {
 			op->flags |= XE_VMA_OP_FIRST;
@@ -2496,7 +2496,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	}
 
 	/* FIXME: Unhandled corner case */
-	XE_WARN_ON(!last_op && last && !list_empty(ops_list));
+	XE_WARN_ON(!last_op && last && !list_empty(&vops->list));
 
 	if (!last_op)
 		return 0;
@@ -2835,7 +2835,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 
 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 					   struct xe_vm *vm,
-					   struct list_head *ops_list)
+					   struct xe_vma_ops *vops)
 {
 	struct xe_vma_op *op;
 	int err;
@@ -2844,7 +2844,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 	if (err)
 		return err;
 
-	list_for_each_entry(op, ops_list, link) {
+	list_for_each_entry(op, &vops->list, link) {
 		err = op_lock_and_prep(exec, vm, op);
 		if (err)
 			return err;
@@ -2854,13 +2854,13 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 }
 
 static struct dma_fence *ops_execute(struct xe_vm *vm,
-				     struct list_head *ops_list,
+				     struct xe_vma_ops *vops,
 				     bool cleanup)
 {
 	struct xe_vma_op *op, *next;
 	struct dma_fence *fence = NULL;
 
-	list_for_each_entry_safe(op, next, ops_list, link) {
+	list_for_each_entry_safe(op, next, &vops->list, link) {
 		if (!IS_ERR(fence)) {
 			dma_fence_put(fence);
 			fence = xe_vma_op_execute(vm, op);
@@ -2878,7 +2878,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
 }
 
 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
-				     struct list_head *ops_list)
+				     struct xe_vma_ops *vops)
 {
 	struct drm_exec exec;
 	struct dma_fence *fence;
@@ -2889,12 +2889,12 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
 		      DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
-		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list);
+		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
 		drm_exec_retry_on_contention(&exec);
 		if (err)
 			goto unlock;
 
-		fence = ops_execute(vm, ops_list, true);
+		fence = ops_execute(vm, vops, true);
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
 			/* FIXME: Killing VM rather than proper error handling */
@@ -3055,6 +3055,11 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
 	return err;
 }
 
+static void xe_vma_ops_init(struct xe_vma_ops *vops)
+{
+	INIT_LIST_HEAD(&vops->list);
+}
+
 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
@@ -3068,7 +3073,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	u32 num_syncs, num_ufence = 0;
 	struct xe_sync_entry *syncs = NULL;
 	struct drm_xe_vm_bind_op *bind_ops;
-	LIST_HEAD(ops_list);
+	struct xe_vma_ops vops;
 	int err;
 	int i;
 
@@ -3219,6 +3224,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto free_syncs;
 	}
 
+	xe_vma_ops_init(&vops);
 	for (i = 0; i < args->num_binds; ++i) {
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
@@ -3238,14 +3244,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		}
 
 		err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
-					      &ops_list,
-					      i == args->num_binds - 1);
+					      &vops, i == args->num_binds - 1);
 		if (err)
 			goto unwind_ops;
 	}
 
 	/* Nothing to do */
-	if (list_empty(&ops_list)) {
+	if (list_empty(&vops.list)) {
 		err = -ENODATA;
 		goto unwind_ops;
 	}
@@ -3254,7 +3259,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (q)
 		xe_exec_queue_get(q);
 
-	err = vm_bind_ioctl_ops_execute(vm, &ops_list);
+	err = vm_bind_ioctl_ops_execute(vm, &vops);
 
 	up_write(&vm->lock);
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 0447c79c40a2..466b6c62d1f9 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -358,4 +358,11 @@ struct xe_vma_op {
 		struct xe_vma_op_prefetch prefetch;
 	};
 };
+
+/** struct xe_vma_ops - VMA operations */
+struct xe_vma_ops {
+	/** @list: list of VMA operations */
+	struct list_head list;
+};
+
 #endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (3 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 04/13] drm/xe: Add struct xe_vma_ops abstraction Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-19  3:43   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and cleanup Matthew Brost
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost

All page tables updates are moving to a xe_vma_ops interface to
implement 1 job per VM bind IOCTL. Convert xe_vm_rebind to use a
xe_vma_ops based interface.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 78 +++++++++++++++++++++++++++++++-------
 1 file changed, 64 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 4cd485d5bc0a..9d82396cf5d5 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -811,37 +811,87 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
 		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
 }
 
-static struct dma_fence *
-xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
-	       struct xe_sync_entry *syncs, u32 num_syncs,
-	       bool first_op, bool last_op);
+static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
+				  u8 tile_mask)
+{
+	INIT_LIST_HEAD(&op->link);
+	op->base.op = DRM_GPUVA_OP_MAP;
+	op->base.map.va.addr = vma->gpuva.va.addr;
+	op->base.map.va.range = vma->gpuva.va.range;
+	op->base.map.gem.obj = vma->gpuva.gem.obj;
+	op->base.map.gem.offset = vma->gpuva.gem.offset;
+	op->map.vma = vma;
+	op->map.immediate = true;
+	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
+	op->map.is_null = xe_vma_is_null(vma);
+}
+
+static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
+				u8 tile_mask)
+{
+	struct xe_vma_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return -ENOMEM;
+
+	xe_vm_populate_rebind(op, vma, tile_mask);
+	list_add_tail(&op->link, &vops->list);
+
+	return 0;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+				     struct xe_vma_ops *vops,
+				     bool cleanup);
+static void xe_vma_ops_init(struct xe_vma_ops *vops);
 
 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 {
 	struct dma_fence *fence;
 	struct xe_vma *vma, *next;
+	struct xe_vma_ops vops;
+	struct xe_vma_op *op, *next_op;
+	int err;
 
 	lockdep_assert_held(&vm->lock);
-	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
+	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
+	    list_empty(&vm->rebind_list))
 		return 0;
 
+	xe_vma_ops_init(&vops);
+
 	xe_vm_assert_held(vm);
-	list_for_each_entry_safe(vma, next, &vm->rebind_list,
-				 combined_links.rebind) {
+	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
 		xe_assert(vm->xe, vma->tile_present);
 
-		list_del_init(&vma->combined_links.rebind);
 		if (rebind_worker)
 			trace_xe_vma_rebind_worker(vma);
 		else
 			trace_xe_vma_rebind_exec(vma);
-		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
-		if (IS_ERR(fence))
-			return PTR_ERR(fence);
+
+		err = xe_vm_ops_add_rebind(&vops, vma,
+					   vma->tile_present);
+		if (err)
+			goto free_ops;
+	}
+
+	fence = ops_execute(vm, &vops, false);
+	if (IS_ERR(fence)) {
+		err = PTR_ERR(fence);
+	} else {
 		dma_fence_put(fence);
+		list_for_each_entry_safe(vma, next, &vm->rebind_list,
+					 combined_links.rebind)
+			list_del_init(&vma->combined_links.rebind);
+	}
+free_ops:
+	list_for_each_entry_safe(op, next_op, &vops.list, link) {
+		list_del(&op->link);
+		kfree(op);
 	}
 
-	return 0;
+	return err;
 }
 
 static void xe_vma_free(struct xe_vma *vma)
@@ -2516,7 +2566,7 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 {
 	struct dma_fence *fence = NULL;
 
-	lockdep_assert_held_write(&vm->lock);
+	lockdep_assert_held(&vm->lock);
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
@@ -2635,7 +2685,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 {
 	struct dma_fence *fence = ERR_PTR(-ENOMEM);
 
-	lockdep_assert_held_write(&vm->lock);
+	lockdep_assert_held(&vm->lock);
 
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and cleanup
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (4 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-19  4:19   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds Matthew Brost
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost, Oak Zeng

Clean up everything in VM bind IOCTL in 1 path for both errors and
non-errors. Also move VM bind IOCTL cleanup from ops (also used by
non-IOCTL binds) to the VM bind IOCTL.

v2:
 - Break ops_execute on error (Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       | 67 ++++++--------------------------
 drivers/gpu/drm/xe/xe_vm_types.h |  5 ---
 2 files changed, 12 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 9d82396cf5d5..8f5b24c8f6cd 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -842,8 +842,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
 }
 
 static struct dma_fence *ops_execute(struct xe_vm *vm,
-				     struct xe_vma_ops *vops,
-				     bool cleanup);
+				     struct xe_vma_ops *vops);
 static void xe_vma_ops_init(struct xe_vma_ops *vops);
 
 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
@@ -876,7 +875,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 			goto free_ops;
 	}
 
-	fence = ops_execute(vm, &vops, false);
+	fence = ops_execute(vm, &vops);
 	if (IS_ERR(fence)) {
 		err = PTR_ERR(fence);
 	} else {
@@ -2551,7 +2550,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	if (!last_op)
 		return 0;
 
-	last_op->ops = ops;
 	if (last) {
 		last_op->flags |= XE_VMA_OP_LAST;
 		last_op->num_syncs = num_syncs;
@@ -2721,25 +2719,6 @@ xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 	return fence;
 }
 
-static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
-{
-	bool last = op->flags & XE_VMA_OP_LAST;
-
-	if (last) {
-		while (op->num_syncs--)
-			xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
-		kfree(op->syncs);
-		if (op->q)
-			xe_exec_queue_put(op->q);
-	}
-	if (!list_empty(&op->link))
-		list_del(&op->link);
-	if (op->ops)
-		drm_gpuva_ops_free(&vm->gpuvm, op->ops);
-	if (last)
-		xe_vm_put(vm);
-}
-
 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
 			     bool post_commit, bool prev_post_commit,
 			     bool next_post_commit)
@@ -2816,8 +2795,6 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
 					 op->flags & XE_VMA_OP_PREV_COMMITTED,
 					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
 		}
-
-		drm_gpuva_ops_free(&vm->gpuvm, __ops);
 	}
 }
 
@@ -2904,24 +2881,20 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 }
 
 static struct dma_fence *ops_execute(struct xe_vm *vm,
-				     struct xe_vma_ops *vops,
-				     bool cleanup)
+				     struct xe_vma_ops *vops)
 {
 	struct xe_vma_op *op, *next;
 	struct dma_fence *fence = NULL;
 
 	list_for_each_entry_safe(op, next, &vops->list, link) {
-		if (!IS_ERR(fence)) {
-			dma_fence_put(fence);
-			fence = xe_vma_op_execute(vm, op);
-		}
+		dma_fence_put(fence);
+		fence = xe_vma_op_execute(vm, op);
 		if (IS_ERR(fence)) {
 			drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld",
 				 op->base.op, PTR_ERR(fence));
 			fence = ERR_PTR(-ENOSPC);
+			break;
 		}
-		if (cleanup)
-			xe_vma_op_cleanup(vm, op);
 	}
 
 	return fence;
@@ -2944,7 +2917,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 		if (err)
 			goto unlock;
 
-		fence = ops_execute(vm, vops, true);
+		fence = ops_execute(vm, vops);
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
 			/* FIXME: Killing VM rather than proper error handling */
@@ -3305,30 +3278,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto unwind_ops;
 	}
 
-	xe_vm_get(vm);
-	if (q)
-		xe_exec_queue_get(q);
-
 	err = vm_bind_ioctl_ops_execute(vm, &vops);
 
-	up_write(&vm->lock);
-
-	if (q)
-		xe_exec_queue_put(q);
-	xe_vm_put(vm);
-
-	for (i = 0; bos && i < args->num_binds; ++i)
-		xe_bo_put(bos[i]);
-
-	kvfree(bos);
-	kvfree(ops);
-	if (args->num_binds > 1)
-		kvfree(bind_ops);
-
-	return err;
-
 unwind_ops:
-	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+	if (err && err != -ENODATA)
+		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+	for (i = args->num_binds - 1; i >= 0; --i)
+		if (ops[i])
+			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
 free_syncs:
 	if (err == -ENODATA)
 		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 466b6c62d1f9..149ab892967e 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -330,11 +330,6 @@ enum xe_vma_op_flags {
 struct xe_vma_op {
 	/** @base: GPUVA base operation */
 	struct drm_gpuva_op base;
-	/**
-	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
-	 * operations is processed
-	 */
-	struct drm_gpuva_ops *ops;
 	/** @q: exec queue for this operation */
 	struct xe_exec_queue *q;
 	/**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (5 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and cleanup Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-19 14:22   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 08/13] drm/xe: Add some members to xe_vma_ops Matthew Brost
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost

All page tables updates are moving to a xe_vma_ops interface to
implement 1 job per VM bind IOCTL. Add xe_vma_rebind function which is
implemented using xe_vma_ops interface. Use xe_vma_rebind in page faults
for rebinds.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 16 ++++----
 drivers/gpu/drm/xe/xe_vm.c           | 57 +++++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_vm.h           |  2 +
 drivers/gpu/drm/xe/xe_vm_types.h     |  2 +
 4 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index fa9e9853c53b..040dd142c49c 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -19,7 +19,6 @@
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_migrate.h"
-#include "xe_pt.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
 
@@ -204,15 +203,14 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
 		drm_exec_retry_on_contention(&exec);
 		if (ret)
 			goto unlock_dma_resv;
-	}
 
-	/* Bind VMA only to the GT that has faulted */
-	trace_xe_vma_pf_bind(vma);
-	fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0,
-				 vma->tile_present & BIT(tile->id));
-	if (IS_ERR(fence)) {
-		ret = PTR_ERR(fence);
-		goto unlock_dma_resv;
+		/* Bind VMA only to the GT that has faulted */
+		trace_xe_vma_pf_bind(vma);
+		fence = xe_vma_rebind(vm, vma, BIT(tile->id));
+		if (IS_ERR(fence)) {
+			ret = PTR_ERR(fence);
+			goto unlock_dma_resv;
+		}
 	}
 
 	/*
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 8f5b24c8f6cd..54a69fbfbb00 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -815,6 +815,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
 				  u8 tile_mask)
 {
 	INIT_LIST_HEAD(&op->link);
+	op->tile_mask = tile_mask;
 	op->base.op = DRM_GPUVA_OP_MAP;
 	op->base.map.va.addr = vma->gpuva.va.addr;
 	op->base.map.va.range = vma->gpuva.va.range;
@@ -893,6 +894,33 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 	return err;
 }
 
+struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
+{
+	struct dma_fence *fence = NULL;
+	struct xe_vma_ops vops;
+	struct xe_vma_op *op, *next_op;
+	int err;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
+
+	xe_vma_ops_init(&vops);
+
+	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
+	if (err)
+		return ERR_PTR(err);
+
+	fence = ops_execute(vm, &vops);
+
+	list_for_each_entry_safe(op, next_op, &vops.list, link) {
+		list_del(&op->link);
+		kfree(op);
+	}
+
+	return fence;
+}
+
 static void xe_vma_free(struct xe_vma *vma)
 {
 	if (xe_vma_is_userptr(vma))
@@ -1796,7 +1824,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 static struct dma_fence *
 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	       struct xe_sync_entry *syncs, u32 num_syncs,
-	       bool first_op, bool last_op)
+	       u8 tile_mask, bool first_op, bool last_op)
 {
 	struct xe_tile *tile;
 	struct dma_fence *fence;
@@ -1804,7 +1832,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	struct dma_fence_array *cf = NULL;
 	struct xe_vm *vm = xe_vma_vm(vma);
 	int cur_fence = 0, i;
-	int number_tiles = hweight8(vma->tile_mask);
+	int number_tiles = hweight8(tile_mask);
 	int err;
 	u8 id;
 
@@ -1818,7 +1846,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	}
 
 	for_each_tile(tile, vm->xe, id) {
-		if (!(vma->tile_mask & BIT(id)))
+		if (!(tile_mask & BIT(id)))
 			goto next;
 
 		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
@@ -1886,7 +1914,7 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
 static struct dma_fence *
 xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
-	   bool immediate, bool first_op, bool last_op)
+	   u8 tile_mask, bool immediate, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
@@ -1902,8 +1930,8 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 	vma->ufence = ufence ?: vma->ufence;
 
 	if (immediate) {
-		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
-				       last_op);
+		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask,
+				       first_op, last_op);
 		if (IS_ERR(fence))
 			return fence;
 	} else {
@@ -2095,7 +2123,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
 		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
-				  true, first_op, last_op);
+				  vma->tile_mask, true, first_op, last_op);
 	} else {
 		struct dma_fence *fence =
 			xe_exec_queue_last_fence_get(wait_exec_queue, vm);
@@ -2408,10 +2436,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	struct xe_device *xe = vm->xe;
 	struct xe_vma_op *last_op = NULL;
 	struct drm_gpuva_op *__op;
+	struct xe_tile *tile;
+	u8 id, tile_mask = 0;
 	int err = 0;
 
 	lockdep_assert_held_write(&vm->lock);
 
+	for_each_tile(tile, vm->xe, id)
+		tile_mask |= 0x1 << id;
+
 	drm_gpuva_for_each_op(__op, ops) {
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 		struct xe_vma *vma;
@@ -2428,6 +2461,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 		}
 
 		op->q = q;
+		op->tile_mask = tile_mask;
 
 		switch (op->base.op) {
 		case DRM_GPUVA_OP_MAP:
@@ -2574,6 +2608,7 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
 				   op->syncs, op->num_syncs,
 				   op->map.immediate || !xe_vm_in_fault_mode(vm),
+				   op->tile_mask,
 				   op->flags & XE_VMA_OP_FIRST,
 				   op->flags & XE_VMA_OP_LAST);
 		break;
@@ -2600,7 +2635,9 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 			dma_fence_put(fence);
 			fence = xe_vm_bind(vm, op->remap.prev, op->q,
 					   xe_vma_bo(op->remap.prev), op->syncs,
-					   op->num_syncs, true, false,
+					   op->num_syncs,
+					   op->remap.prev->tile_mask, true,
+					   false,
 					   op->flags & XE_VMA_OP_LAST && !next);
 			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
 			if (IS_ERR(fence))
@@ -2614,8 +2651,8 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 			fence = xe_vm_bind(vm, op->remap.next, op->q,
 					   xe_vma_bo(op->remap.next),
 					   op->syncs, op->num_syncs,
-					   true, false,
-					   op->flags & XE_VMA_OP_LAST);
+					   op->remap.next->tile_mask, true,
+					   false, op->flags & XE_VMA_OP_LAST);
 			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
 			if (IS_ERR(fence))
 				break;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 306cd0934a19..204a4ff63f88 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -208,6 +208,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
 int xe_vm_userptr_check_repin(struct xe_vm *vm);
 
 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
+				u8 tile_mask);
 
 int xe_vm_invalidate_vma(struct xe_vma *vma);
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 149ab892967e..e9cd6da6263a 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -343,6 +343,8 @@ struct xe_vma_op {
 	struct list_head link;
 	/** @flags: operation flags */
 	enum xe_vma_op_flags flags;
+	/** @tile_mask: Tile mask for operation */
+	u8 tile_mask;
 
 	union {
 		/** @map: VMA map operation specific data */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 08/13] drm/xe: Add some members to xe_vma_ops
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (6 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-19 14:24   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 09/13] drm/xe: Add vm_bind_ioctl_ops_fini helper Matthew Brost
                   ` (5 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost, Oak Zeng

This will help with moving to single jobs for many bind operations.

v2:
 - Rebase

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       | 19 ++++++++++++++-----
 drivers/gpu/drm/xe/xe_vm_types.h |  8 ++++++++
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 54a69fbfbb00..09871538484b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -844,7 +844,9 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
 
 static struct dma_fence *ops_execute(struct xe_vm *vm,
 				     struct xe_vma_ops *vops);
-static void xe_vma_ops_init(struct xe_vma_ops *vops);
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+			    struct xe_exec_queue *q,
+			    struct xe_sync_entry *syncs, u32 num_syncs);
 
 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 {
@@ -859,7 +861,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 	    list_empty(&vm->rebind_list))
 		return 0;
 
-	xe_vma_ops_init(&vops);
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
 
 	xe_vm_assert_held(vm);
 	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
@@ -905,7 +907,7 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma
 	xe_vm_assert_held(vm);
 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
 
-	xe_vma_ops_init(&vops);
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
 
 	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
 	if (err)
@@ -3115,9 +3117,16 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
 	return err;
 }
 
-static void xe_vma_ops_init(struct xe_vma_ops *vops)
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+			    struct xe_exec_queue *q,
+			    struct xe_sync_entry *syncs, u32 num_syncs)
 {
+	memset(vops, 0, sizeof(*vops));
 	INIT_LIST_HEAD(&vops->list);
+	vops->vm = vm;
+	vops->q = q;
+	vops->syncs = syncs;
+	vops->num_syncs = num_syncs;
 }
 
 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -3284,7 +3293,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto free_syncs;
 	}
 
-	xe_vma_ops_init(&vops);
+	xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
 	for (i = 0; i < args->num_binds; ++i) {
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index e9cd6da6263a..ce1a63a5e3e7 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -360,6 +360,14 @@ struct xe_vma_op {
 struct xe_vma_ops {
 	/** @list: list of VMA operations */
 	struct list_head list;
+	/** @vm: VM */
+	struct xe_vm *vm;
+	/** @q: exec queue these operations */
+	struct xe_exec_queue *q;
+	/** @syncs: syncs these operation */
+	struct xe_sync_entry *syncs;
+	/** @num_syncs: number of syncs */
+	u32 num_syncs;
 };
 
 #endif
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 09/13] drm/xe: Add vm_bind_ioctl_ops_fini helper
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (7 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 08/13] drm/xe: Add some members to xe_vma_ops Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-19 14:51   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 10/13] drm/xe: Move ufence check to op_lock Matthew Brost
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost, Oak Zeng

Simplify VM bind code by signaling out-fences / destroying VMAs in a
single location. Will help with transition single job for many bind ops.

v2:
 - s/vm_bind_ioctl_ops_install_fences/vm_bind_ioctl_ops_fini (Oak)
 - Set last fence in vm_bind_ioctl_ops_fini (Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 62 +++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 09871538484b..97384c77f662 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1748,7 +1748,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	struct dma_fence *fence = NULL;
 	struct dma_fence **fences = NULL;
 	struct dma_fence_array *cf = NULL;
-	int cur_fence = 0, i;
+	int cur_fence = 0;
 	int number_tiles = hweight8(vma->tile_present);
 	int err;
 	u8 id;
@@ -1806,10 +1806,6 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 
 	fence = cf ? &cf->base : !fence ?
 		xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
-	if (last_op) {
-		for (i = 0; i < num_syncs; i++)
-			xe_sync_entry_signal(&syncs[i], fence);
-	}
 
 	return fence;
 
@@ -1833,7 +1829,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	struct dma_fence **fences = NULL;
 	struct dma_fence_array *cf = NULL;
 	struct xe_vm *vm = xe_vma_vm(vma);
-	int cur_fence = 0, i;
+	int cur_fence = 0;
 	int number_tiles = hweight8(tile_mask);
 	int err;
 	u8 id;
@@ -1880,12 +1876,6 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 		}
 	}
 
-	if (last_op) {
-		for (i = 0; i < num_syncs; i++)
-			xe_sync_entry_signal(&syncs[i],
-					     cf ? &cf->base : fence);
-	}
-
 	return cf ? &cf->base : fence;
 
 err_fences:
@@ -1937,20 +1927,11 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 		if (IS_ERR(fence))
 			return fence;
 	} else {
-		int i;
-
 		xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
 
 		fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
-		if (last_op) {
-			for (i = 0; i < num_syncs; i++)
-				xe_sync_entry_signal(&syncs[i], fence);
-		}
 	}
 
-	if (last_op)
-		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-
 	return fence;
 }
 
@@ -1960,7 +1941,6 @@ xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	     u32 num_syncs, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
-	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
@@ -1969,10 +1949,6 @@ xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	if (IS_ERR(fence))
 		return fence;
 
-	xe_vma_destroy(vma, fence);
-	if (last_op)
-		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-
 	return fence;
 }
 
@@ -2127,17 +2103,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
 				  vma->tile_mask, true, first_op, last_op);
 	} else {
-		struct dma_fence *fence =
-			xe_exec_queue_last_fence_get(wait_exec_queue, vm);
-		int i;
-
-		/* Nothing to do, signal fences now */
-		if (last_op) {
-			for (i = 0; i < num_syncs; i++)
-				xe_sync_entry_signal(&syncs[i], fence);
-		}
-
-		return fence;
+		return xe_exec_queue_last_fence_get(wait_exec_queue, vm);
 	}
 }
 
@@ -2939,6 +2905,26 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
 	return fence;
 }
 
+static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
+				   struct dma_fence *fence)
+{
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
+	struct xe_vma_op *op;
+	int i;
+
+	list_for_each_entry(op, &vops->list, link) {
+		if (op->base.op == DRM_GPUVA_OP_UNMAP)
+			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
+		else if (op->base.op == DRM_GPUVA_OP_REMAP)
+			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
+				       fence);
+	}
+	for (i = 0; i < vops->num_syncs; i++)
+		xe_sync_entry_signal(vops->syncs + i, fence);
+	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+	dma_fence_put(fence);
+}
+
 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 				     struct xe_vma_ops *vops)
 {
@@ -2963,7 +2949,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 			xe_vm_kill(vm, false);
 			goto unlock;
 		} else {
-			dma_fence_put(fence);
+			vm_bind_ioctl_ops_fini(vm, vops, fence);
 		}
 	}
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 10/13] drm/xe: Move ufence check to op_lock
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (8 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 09/13] drm/xe: Add vm_bind_ioctl_ops_fini helper Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-19 14:56   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini Matthew Brost
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost

Rather than checking for an unsignaled ufence ay unbind time, check for
this during the op_lock function. This will help with the transition to
job 1 per VM bind IOCTL.

v2:
 - Rebase

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 97384c77f662..0319e70577fe 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1755,16 +1755,6 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 
 	trace_xe_vma_unbind(vma);
 
-	if (vma->ufence) {
-		struct xe_user_fence * const f = vma->ufence;
-
-		if (!xe_sync_ufence_get_status(f))
-			return ERR_PTR(-EBUSY);
-
-		vma->ufence = NULL;
-		xe_sync_ufence_put(f);
-	}
-
 	if (number_tiles > 1) {
 		fences = kmalloc_array(number_tiles, sizeof(*fences),
 				       GFP_KERNEL);
@@ -2819,6 +2809,21 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
 	return err;
 }
 
+static int check_ufence(struct xe_vma *vma)
+{
+	if (vma->ufence) {
+		struct xe_user_fence * const f = vma->ufence;
+
+		if (!xe_sync_ufence_get_status(f))
+			return -EBUSY;
+
+		vma->ufence = NULL;
+		xe_sync_ufence_put(f);
+	}
+
+	return 0;
+}
+
 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 			    struct xe_vma_op *op)
 {
@@ -2830,6 +2835,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 					    !xe_vm_in_fault_mode(vm));
 		break;
 	case DRM_GPUVA_OP_REMAP:
+		err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
+		if (err)
+			break;
+
 		err = vma_lock_and_validate(exec,
 					    gpuva_to_vma(op->base.remap.unmap->va),
 					    false);
@@ -2839,6 +2848,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 			err = vma_lock_and_validate(exec, op->remap.next, true);
 		break;
 	case DRM_GPUVA_OP_UNMAP:
+		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
+		if (err)
+			break;
+
 		err = vma_lock_and_validate(exec,
 					    gpuva_to_vma(op->base.unmap.va),
 					    false);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (9 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 10/13] drm/xe: Move ufence check to op_lock Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-19 15:24   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 12/13] drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this Matthew Brost
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost

Rather than adding a ufence to a VMA in the bind function, add the
ufence to all VMAs in the IOCTL that require binds in
vm_bind_ioctl_ops_install_fences. This will help with the transition to
job 1 per VM bind IOCTL.

v2:
 - Rebase

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_sync.c | 15 ++++++++++++
 drivers/gpu/drm/xe/xe_sync.h |  1 +
 drivers/gpu/drm/xe/xe_vm.c   | 44 ++++++++++++++++++++++++++++++------
 3 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 65f1f1628235..2883d9aca404 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -338,6 +338,21 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
 	return ERR_PTR(-ENOMEM);
 }
 
+/**
+ * __xe_sync_ufence_get() - Get user fence from user fence
+ * @ufence: input user fence
+ *
+ * Get a user fence reference from user fence
+ *
+ * Return: xe_user_fence pointer with reference
+ */
+struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence)
+{
+	user_fence_get(ufence);
+
+	return ufence;
+}
+
 /**
  * xe_sync_ufence_get() - Get user fence from sync
  * @sync: input sync
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 3e03396af2c6..006dbf780793 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -37,6 +37,7 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync)
 	return !!sync->ufence;
 }
 
+struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence);
 struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync);
 void xe_sync_ufence_put(struct xe_user_fence *ufence);
 int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 0319e70577fe..1da68a03407b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1900,17 +1900,10 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 {
 	struct dma_fence *fence;
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
-	struct xe_user_fence *ufence;
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(bo);
 
-	ufence = find_ufence_get(syncs, num_syncs);
-	if (vma->ufence && ufence)
-		xe_sync_ufence_put(vma->ufence);
-
-	vma->ufence = ufence ?: vma->ufence;
-
 	if (immediate) {
 		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask,
 				       first_op, last_op);
@@ -2918,20 +2911,57 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
 	return fence;
 }
 
+static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
+{
+	if (vma->ufence)
+		xe_sync_ufence_put(vma->ufence);
+	vma->ufence = __xe_sync_ufence_get(ufence);
+}
+
+static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
+			  struct xe_user_fence *ufence)
+{
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		vma_add_ufence(op->map.vma, ufence);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		if (op->remap.prev)
+			vma_add_ufence(op->remap.prev, ufence);
+		if (op->remap.next)
+			vma_add_ufence(op->remap.next, ufence);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+}
+
 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
 				   struct dma_fence *fence)
 {
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
+	struct xe_user_fence *ufence;
 	struct xe_vma_op *op;
 	int i;
 
+	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
 	list_for_each_entry(op, &vops->list, link) {
+		if (ufence)
+			op_add_ufence(vm, op, ufence);
+
 		if (op->base.op == DRM_GPUVA_OP_UNMAP)
 			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
 		else if (op->base.op == DRM_GPUVA_OP_REMAP)
 			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
 				       fence);
 	}
+	if (ufence)
+		xe_sync_ufence_put(ufence);
 	for (i = 0; i < vops->num_syncs; i++)
 		xe_sync_entry_signal(vops->syncs + i, fence);
 	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 12/13] drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (10 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-19 16:00   ` Zeng, Oak
  2024-04-10  5:40 ` [PATCH 13/13] drm/xe: Delete PT update selftest Matthew Brost
  2024-04-10  6:28 ` ✗ CI.Patch_applied: failure for Prep patches for 1 job per VM bind IOCTL Patchwork
  13 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost

xe_gt_tlb_invalidation_range accepts a start and end address rather than
a VMA. This will enable multiple VMAs to be invalidated in a single
invalidation. Update the PT layer to use this new function.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 59 +++++++++++++++------
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h |  3 ++
 drivers/gpu/drm/xe/xe_pt.c                  | 25 ++++++---
 3 files changed, 65 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 93df2d7969b3..65409f494f59 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -263,11 +263,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
 }
 
 /**
- * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
+ * address range
+ *
  * @gt: graphics tile
  * @fence: invalidation fence which will be signal on TLB invalidation
  * completion, can be NULL
- * @vma: VMA to invalidate
+ * @start: start address
+ * @end: end address
+ * @asid: address space id
  *
  * Issue a range based TLB invalidation if supported, if not fallback to a full
  * TLB invalidation. Completion of TLB is asynchronous and caller can either use
@@ -277,17 +281,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
  * negative error code on error.
  */
-int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
-			       struct xe_gt_tlb_invalidation_fence *fence,
-			       struct xe_vma *vma)
+int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
+				 struct xe_gt_tlb_invalidation_fence *fence,
+				 u64 start, u64 end, u32 asid)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 #define MAX_TLB_INVALIDATION_LEN	7
 	u32 action[MAX_TLB_INVALIDATION_LEN];
 	int len = 0;
 
-	xe_gt_assert(gt, vma);
-
 	/* Execlists not supported */
 	if (gt_to_xe(gt)->info.force_execlist) {
 		if (fence)
@@ -301,8 +303,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	if (!xe->info.has_range_tlb_invalidation) {
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
 	} else {
-		u64 start = xe_vma_start(vma);
-		u64 length = xe_vma_size(vma);
+		u64 orig_start = start;
+		u64 length = end - start;
 		u64 align, end;
 
 		if (length < SZ_4K)
@@ -315,12 +317,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 		 * address mask covering the required range.
 		 */
 		align = roundup_pow_of_two(length);
-		start = ALIGN_DOWN(xe_vma_start(vma), align);
-		end = ALIGN(xe_vma_end(vma), align);
+		start = ALIGN_DOWN(start, align);
+		end = ALIGN(end, align);
 		length = align;
 		while (start + length < end) {
 			length <<= 1;
-			start = ALIGN_DOWN(xe_vma_start(vma), length);
+			start = ALIGN_DOWN(orig_start, length);
 		}
 
 		/*
@@ -329,16 +331,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 		 */
 		if (length >= SZ_2M) {
 			length = max_t(u64, SZ_16M, length);
-			start = ALIGN_DOWN(xe_vma_start(vma), length);
+			start = ALIGN_DOWN(orig_start, length);
 		}
 
 		xe_gt_assert(gt, length >= SZ_4K);
 		xe_gt_assert(gt, is_power_of_2(length));
-		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
+		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
+						    ilog2(SZ_2M) + 1)));
 		xe_gt_assert(gt, IS_ALIGNED(start, length));
 
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
-		action[len++] = xe_vma_vm(vma)->usm.asid;
+		action[len++] = asid;
 		action[len++] = lower_32_bits(start);
 		action[len++] = upper_32_bits(start);
 		action[len++] = ilog2(length) - ilog2(SZ_4K);
@@ -349,6 +352,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
 }
 
+/**
+ * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion, can be NULL
+ * @vma: VMA to invalidate
+ *
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can either use
+ * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
+ * completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+			       struct xe_gt_tlb_invalidation_fence *fence,
+			       struct xe_vma *vma)
+{
+	xe_gt_assert(gt, vma);
+
+	return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
+					    xe_vma_end(vma),
+					    xe_vma_vm(vma)->usm.asid);
+}
+
 /**
  * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
  * @gt: graphics tile
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index fbb743d80d2c..bf3bebd9f985 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
 int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 			       struct xe_gt_tlb_invalidation_fence *fence,
 			       struct xe_vma *vma);
+int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
+				 struct xe_gt_tlb_invalidation_fence *fence,
+				 u64 start, u64 end, u32 asid);
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 5b7930f46cf3..8d3765d3351e 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
 struct invalidation_fence {
 	struct xe_gt_tlb_invalidation_fence base;
 	struct xe_gt *gt;
-	struct xe_vma *vma;
 	struct dma_fence *fence;
 	struct dma_fence_cb cb;
 	struct work_struct work;
+	u64 start;
+	u64 end;
+	u32 asid;
 };
 
 static const char *
@@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct work_struct *w)
 		container_of(w, struct invalidation_fence, work);
 
 	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
-	xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
+	xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
+				     ifence->end, ifence->asid);
 }
 
 static int invalidation_fence_init(struct xe_gt *gt,
 				   struct invalidation_fence *ifence,
 				   struct dma_fence *fence,
-				   struct xe_vma *vma)
+				   u64 start, u64 end, u32 asid)
 {
 	int ret;
 
@@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
 	dma_fence_get(&ifence->base.base);	/* Ref for caller */
 	ifence->fence = fence;
 	ifence->gt = gt;
-	ifence->vma = vma;
+	ifence->start = start;
+	ifence->end = end;
+	ifence->asid = asid;
 
 	INIT_WORK(&ifence->work, invalidation_fence_work_func);
 	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
@@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 
 		/* TLB invalidation must be done before signaling rebind */
 		if (ifence) {
-			int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
-							  vma);
+			int err = invalidation_fence_init(tile->primary_gt,
+							  ifence, fence,
+							  xe_vma_start(vma),
+							  xe_vma_end(vma),
+							  xe_vma_vm(vma)->usm.asid);
 			if (err) {
 				dma_fence_put(fence);
 				kfree(ifence);
@@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
 			dma_fence_wait(fence, false);
 
 		/* TLB invalidation must be done before signaling unbind */
-		err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
+		err = invalidation_fence_init(tile->primary_gt, ifence, fence,
+					      xe_vma_start(vma),
+					      xe_vma_end(vma),
+					      xe_vma_vm(vma)->usm.asid);
 		if (err) {
 			dma_fence_put(fence);
 			kfree(ifence);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 13/13] drm/xe: Delete PT update selftest
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (11 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 12/13] drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this Matthew Brost
@ 2024-04-10  5:40 ` Matthew Brost
  2024-04-10  6:28 ` ✗ CI.Patch_applied: failure for Prep patches for 1 job per VM bind IOCTL Patchwork
  13 siblings, 0 replies; 40+ messages in thread
From: Matthew Brost @ 2024-04-10  5:40 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost, Oak Zeng

IGTs (e.g. xe_vm) can provide the exact same coverage as the PT update
selftest. The PT update selftest is dependent on internal functions
which can change thus maintaining this test is costly and provide no
extra coverage. Delete this test.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 86 ---------------------------
 1 file changed, 86 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 1332832e2f97..35cd5183ce09 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -62,36 +62,6 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
 	return 0;
 }
 
-static void
-sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
-		   struct xe_tile *tile, struct iosys_map *map, void *dst,
-		   u32 qword_ofs, u32 num_qwords,
-		   const struct xe_vm_pgtable_update *update)
-{
-	struct migrate_test_params *p =
-		to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));
-	int i;
-	u64 *ptr = dst;
-	u64 value;
-
-	for (i = 0; i < num_qwords; i++) {
-		value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
-		if (map)
-			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
-				  sizeof(u64), u64, value);
-		else
-			ptr[i] = value;
-	}
-
-	kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU");
-	if (p->force_gpu && map)
-		KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n");
-}
-
-static const struct xe_migrate_pt_update_ops sanity_ops = {
-	.populate = sanity_populate_cb,
-};
-
 #define check(_retval, _expected, str, _test)				\
 	do { if ((_retval) != (_expected)) {				\
 			KUNIT_FAIL(_test, "Sanity check failed: " str	\
@@ -209,57 +179,6 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
 	test_copy(m, bo, test, region);
 }
 
-static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
-			   struct kunit *test, bool force_gpu)
-{
-	struct xe_device *xe = tile_to_xe(m->tile);
-	struct dma_fence *fence;
-	u64 retval, expected;
-	ktime_t then, now;
-	int i;
-
-	struct xe_vm_pgtable_update update = {
-		.ofs = 1,
-		.qwords = 0x10,
-		.pt_bo = pt,
-	};
-	struct xe_migrate_pt_update pt_update = {
-		.ops = &sanity_ops,
-	};
-	struct migrate_test_params p = {
-		.base.id = XE_TEST_LIVE_MIGRATE,
-		.force_gpu = force_gpu,
-	};
-
-	test->priv = &p;
-	/* Test xe_migrate_update_pgtables() updates the pagetable as expected */
-	expected = 0xf0f0f0f0f0f0f0f0ULL;
-	xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
-
-	then = ktime_get();
-	fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1,
-					   NULL, 0, &pt_update);
-	now = ktime_get();
-	if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
-		return;
-
-	kunit_info(test, "Updating without syncing took %llu us,\n",
-		   (unsigned long long)ktime_to_us(ktime_sub(now, then)));
-
-	dma_fence_put(fence);
-	retval = xe_map_rd(xe, &pt->vmap, 0, u64);
-	check(retval, expected, "PTE[0] must stay untouched", test);
-
-	for (i = 0; i < update.qwords; i++) {
-		retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64);
-		check(retval, i * 0x1111111111111111ULL, "PTE update", test);
-	}
-
-	retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords),
-			   u64);
-	check(retval, expected, "PTE[0x11] must stay untouched", test);
-}
-
 static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 {
 	struct xe_tile *tile = m->tile;
@@ -398,11 +317,6 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		test_copy_vram(m, big, test);
 	}
 
-	kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
-	test_pt_update(m, pt, test, false);
-	kunit_info(test, "Testing page table update using GPU\n");
-	test_pt_update(m, pt, test, true);
-
 out:
 	xe_bb_free(bb, NULL);
 free_tiny:
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* ✗ CI.Patch_applied: failure for Prep patches for 1 job per VM bind IOCTL
  2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
                   ` (12 preceding siblings ...)
  2024-04-10  5:40 ` [PATCH 13/13] drm/xe: Delete PT update selftest Matthew Brost
@ 2024-04-10  6:28 ` Patchwork
  13 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2024-04-10  6:28 UTC (permalink / raw)
  To: Matthew Brost; +Cc: intel-xe

== Series Details ==

Series: Prep patches for 1 job per VM bind IOCTL
URL   : https://patchwork.freedesktop.org/series/132246/
State : failure

== Summary ==

=== Applying kernel patches on branch 'drm-tip' with base: ===
Base commit: 057ec21a54cd drm-tip: 2024y-04m-09d-21h-23m-50s UTC integration manifest
=== git am output follows ===
error: patch failed: drivers/gpu/drm/xe/xe_vm.c:2547
error: drivers/gpu/drm/xe/xe_vm.c: patch does not apply
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Applying: drm/xe: Lock all gpuva ops during VM bind IOCTL
Applying: drm/xe: Add ops_execute function which returns a fence
Patch failed at 0002 drm/xe: Add ops_execute function which returns a fence
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".



^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 01/13] drm/xe: Lock all gpuva ops during VM bind IOCTL
  2024-04-10  5:40 ` [PATCH 01/13] drm/xe: Lock all gpuva ops during " Matthew Brost
@ 2024-04-16 15:51   ` Zeng, Oak
  2024-04-16 17:02     ` Matthew Brost
  0 siblings, 1 reply; 40+ messages in thread
From: Zeng, Oak @ 2024-04-16 15:51 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe



> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> <oak.zeng@intel.com>
> Subject: [PATCH 01/13] drm/xe: Lock all gpuva ops during VM bind IOCTL
> 
> Lock all BOs used in gpuva ops and validate all BOs in a single step
> during the VM bind IOCTL.
> 
> This help with the transition to making all gpuva ops in a VM bind IOCTL
> a single atomic job which is required for proper error handling.
> 
> v2:
>  - Better commit message (Oak)
>  - s/op_lock/op_lock_and_prep, few other renames too (Oak)
>  - Use DRM_EXEC_IGNORE_DUPLICATES flag in drm_exec_init (local testing)
>  - Do not reserve slots in locking step (direction based on series from Thomas)
> 
> Cc: Oak Zeng <oak.zeng@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c | 147 +++++++++++++++++++++++++++----------
>  1 file changed, 107 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 66b70fd3d105..6375c136e21a 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -414,19 +414,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
> 
>  #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
> 
> -static void xe_vm_kill(struct xe_vm *vm)
> +static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
>  {
>  	struct xe_exec_queue *q;
> 
>  	lockdep_assert_held(&vm->lock);
> 
> -	xe_vm_lock(vm, false);
> +	if (unlocked)
> +		xe_vm_lock(vm, false);
> +
>  	vm->flags |= XE_VM_FLAG_BANNED;
>  	trace_xe_vm_kill(vm);
> 
>  	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
>  		q->ops->kill(q);
> -	xe_vm_unlock(vm);
> +
> +	if (unlocked)
> +		xe_vm_unlock(vm);
> 
>  	/* TODO: Inform user the VM is banned */
>  }
> @@ -656,7 +660,7 @@ static void preempt_rebind_work_func(struct
> work_struct *w)
> 
>  	if (err) {
>  		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
> -		xe_vm_kill(vm);
> +		xe_vm_kill(vm, true);
>  	}
>  	up_write(&vm->lock);
> 
> @@ -1876,17 +1880,9 @@ static int xe_vm_bind(struct xe_vm *vm, struct
> xe_vma *vma, struct xe_exec_queue
>  		      u32 num_syncs, bool immediate, bool first_op,
>  		      bool last_op)
>  {
> -	int err;
> -
>  	xe_vm_assert_held(vm);
>  	xe_bo_assert_held(bo);
> 
> -	if (bo && immediate) {
> -		err = xe_bo_validate(bo, vm, true);

In this original code, bo validate is conditional to this immediate bind flag. This flag is from DRM_XE_VM_BIND_FLAG_IMMEDIATE. It seems later in this patch, the immediate flag is not used any more... can you explain? See below

> -		if (err)
> -			return err;
> -	}
> -
>  	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate,
> first_op,
>  			    last_op);
>  }
> @@ -2539,17 +2535,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> *vm, struct xe_exec_queue *q,
>  	return 0;
>  }
> 
> -static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
> -		      struct xe_vma *vma, struct xe_vma_op *op)
> +static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
> +		      struct xe_vma_op *op)
>  {
>  	int err;
> 
>  	lockdep_assert_held_write(&vm->lock);
> 
> -	err = xe_vm_lock_vma(exec, vma);
> -	if (err)
> -		return err;
> -
>  	xe_vm_assert_held(vm);
>  	xe_bo_assert_held(xe_vma_bo(vma));
> 
> @@ -2630,19 +2622,10 @@ static int op_execute(struct drm_exec *exec,
> struct xe_vm *vm,
>  static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
>  			       struct xe_vma_op *op)
>  {
> -	struct drm_exec exec;
>  	int err;
> 
>  retry_userptr:
> -	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> -	drm_exec_until_all_locked(&exec) {
> -		err = op_execute(&exec, vm, vma, op);
> -		drm_exec_retry_on_contention(&exec);
> -		if (err)
> -			break;
> -	}
> -	drm_exec_fini(&exec);
> -
> +	err = op_execute(vm, vma, op);
>  	if (err == -EAGAIN) {
>  		lockdep_assert_held_write(&vm->lock);
> 
> @@ -2807,29 +2790,113 @@ static void vm_bind_ioctl_ops_unwind(struct
> xe_vm *vm,
>  	}
>  }
> 
> +static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
> +				 bool validate)
> +{
> +	struct xe_bo *bo = xe_vma_bo(vma);
> +	int err = 0;
> +
> +	if (bo) {
> +		if (!bo->vm)
> +			err = drm_exec_prepare_obj(exec, &bo->ttm.base, 0);
> +		if (!err && validate)
> +			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
> +	}
> +
> +	return err;
> +}
> +
> +static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
> +			    struct xe_vma_op *op)
> +{
> +	int err = 0;
> +
> +	switch (op->base.op) {
> +	case DRM_GPUVA_OP_MAP:
> +		err = vma_lock_and_validate(exec, op->map.vma,
> +					    !xe_vm_in_fault_mode(vm));

I meant here, should the last parameter be: !xe_vm_in_fault_mode(vm) && IMMEDIATE flag? Or maybe the IMMEDIATE flag is deleted? I did see it in xe_vm.c:

2796 #define SUPPORTED_FLAGS \
2797     (DRM_XE_VM_BIND_FLAG_READONLY | \
2798      DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
2799      DRM_XE_VM_BIND_FLAG_NULL | \

Oak


> +		break;
> +	case DRM_GPUVA_OP_REMAP:
> +		err = vma_lock_and_validate(exec,
> +					    gpuva_to_vma(op-
> >base.remap.unmap->va),
> +					    false);
> +		if (!err && op->remap.prev)
> +			err = vma_lock_and_validate(exec, op->remap.prev,
> true);
> +		if (!err && op->remap.next)
> +			err = vma_lock_and_validate(exec, op->remap.next,
> true);
> +		break;
> +	case DRM_GPUVA_OP_UNMAP:
> +		err = vma_lock_and_validate(exec,
> +					    gpuva_to_vma(op->base.unmap.va),
> +					    false);
> +		break;
> +	case DRM_GPUVA_OP_PREFETCH:
> +		err = vma_lock_and_validate(exec,
> +					    gpuva_to_vma(op-
> >base.prefetch.va), true);
> +		break;
> +	default:
> +		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> +	}
> +
> +	return err;
> +}
> +
> +static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
> +					   struct xe_vm *vm,
> +					   struct list_head *ops_list)
> +{
> +	struct xe_vma_op *op;
> +	int err;
> +
> +	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 0);
> +	if (err)
> +		return err;
> +
> +	list_for_each_entry(op, ops_list, link) {
> +		err = op_lock_and_prep(exec, vm, op);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
>  static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>  				     struct list_head *ops_list)
>  {
> +	struct drm_exec exec;
>  	struct xe_vma_op *op, *next;
>  	int err;
> 
>  	lockdep_assert_held_write(&vm->lock);
> 
> -	list_for_each_entry_safe(op, next, ops_list, link) {
> -		err = xe_vma_op_execute(vm, op);
> -		if (err) {
> -			drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
> -				 op->base.op, err);
> -			/*
> -			 * FIXME: Killing VM rather than proper error handling
> -			 */
> -			xe_vm_kill(vm);
> -			return -ENOSPC;
> +	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> +		      DRM_EXEC_IGNORE_DUPLICATES, 0);
> +	drm_exec_until_all_locked(&exec) {
> +		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list);
> +		drm_exec_retry_on_contention(&exec);
> +		if (err)
> +			goto unlock;
> +
> +		list_for_each_entry_safe(op, next, ops_list, link) {
> +			err = xe_vma_op_execute(vm, op);
> +			if (err) {
> +				drm_warn(&vm->xe->drm, "VM op(%d) failed
> with %d",
> +					 op->base.op, err);
> +				/*
> +				 * FIXME: Killing VM rather than proper error
> handling
> +				 */
> +				xe_vm_kill(vm, false);
> +				err = -ENOSPC;
> +				goto unlock;
> +			}
> +			xe_vma_op_cleanup(vm, op);
>  		}
> -		xe_vma_op_cleanup(vm, op);
>  	}
> 
> -	return 0;
> +unlock:
> +	drm_exec_fini(&exec);
> +	return err;
>  }
> 
>  #define SUPPORTED_FLAGS	\
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 01/13] drm/xe: Lock all gpuva ops during VM bind IOCTL
  2024-04-16 15:51   ` Zeng, Oak
@ 2024-04-16 17:02     ` Matthew Brost
  0 siblings, 0 replies; 40+ messages in thread
From: Matthew Brost @ 2024-04-16 17:02 UTC (permalink / raw)
  To: Zeng, Oak; +Cc: intel-xe

On Tue, Apr 16, 2024 at 09:51:24AM -0600, Zeng, Oak wrote:
> 
> 
> > -----Original Message-----
> > From: Brost, Matthew <matthew.brost@intel.com>
> > Sent: Wednesday, April 10, 2024 1:41 AM
> > To: intel-xe@lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> > <oak.zeng@intel.com>
> > Subject: [PATCH 01/13] drm/xe: Lock all gpuva ops during VM bind IOCTL
> > 
> > Lock all BOs used in gpuva ops and validate all BOs in a single step
> > during the VM bind IOCTL.
> > 
> > This help with the transition to making all gpuva ops in a VM bind IOCTL
> > a single atomic job which is required for proper error handling.
> > 
> > v2:
> >  - Better commit message (Oak)
> >  - s/op_lock/op_lock_and_prep, few other renames too (Oak)
> >  - Use DRM_EXEC_IGNORE_DUPLICATES flag in drm_exec_init (local testing)
> >  - Do not reserve slots in locking step (direction based on series from Thomas)
> > 
> > Cc: Oak Zeng <oak.zeng@intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_vm.c | 147 +++++++++++++++++++++++++++----------
> >  1 file changed, 107 insertions(+), 40 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 66b70fd3d105..6375c136e21a 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -414,19 +414,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
> > 
> >  #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
> > 
> > -static void xe_vm_kill(struct xe_vm *vm)
> > +static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
> >  {
> >  	struct xe_exec_queue *q;
> > 
> >  	lockdep_assert_held(&vm->lock);
> > 
> > -	xe_vm_lock(vm, false);
> > +	if (unlocked)
> > +		xe_vm_lock(vm, false);
> > +
> >  	vm->flags |= XE_VM_FLAG_BANNED;
> >  	trace_xe_vm_kill(vm);
> > 
> >  	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
> >  		q->ops->kill(q);
> > -	xe_vm_unlock(vm);
> > +
> > +	if (unlocked)
> > +		xe_vm_unlock(vm);
> > 
> >  	/* TODO: Inform user the VM is banned */
> >  }
> > @@ -656,7 +660,7 @@ static void preempt_rebind_work_func(struct
> > work_struct *w)
> > 
> >  	if (err) {
> >  		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
> > -		xe_vm_kill(vm);
> > +		xe_vm_kill(vm, true);
> >  	}
> >  	up_write(&vm->lock);
> > 
> > @@ -1876,17 +1880,9 @@ static int xe_vm_bind(struct xe_vm *vm, struct
> > xe_vma *vma, struct xe_exec_queue
> >  		      u32 num_syncs, bool immediate, bool first_op,
> >  		      bool last_op)
> >  {
> > -	int err;
> > -
> >  	xe_vm_assert_held(vm);
> >  	xe_bo_assert_held(bo);
> > 
> > -	if (bo && immediate) {
> > -		err = xe_bo_validate(bo, vm, true);
> 
> In this original code, bo validate is conditional to this immediate bind flag. This flag is from DRM_XE_VM_BIND_FLAG_IMMEDIATE. It seems later in this patch, the immediate flag is not used any more... can you explain? See below
> 

The immediate code just got pulled back in and omitted this in the
rebase. Will fix.

Matt

> > -		if (err)
> > -			return err;
> > -	}
> > -
> >  	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate,
> > first_op,
> >  			    last_op);
> >  }
> > @@ -2539,17 +2535,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> > *vm, struct xe_exec_queue *q,
> >  	return 0;
> >  }
> > 
> > -static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
> > -		      struct xe_vma *vma, struct xe_vma_op *op)
> > +static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
> > +		      struct xe_vma_op *op)
> >  {
> >  	int err;
> > 
> >  	lockdep_assert_held_write(&vm->lock);
> > 
> > -	err = xe_vm_lock_vma(exec, vma);
> > -	if (err)
> > -		return err;
> > -
> >  	xe_vm_assert_held(vm);
> >  	xe_bo_assert_held(xe_vma_bo(vma));
> > 
> > @@ -2630,19 +2622,10 @@ static int op_execute(struct drm_exec *exec,
> > struct xe_vm *vm,
> >  static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> >  			       struct xe_vma_op *op)
> >  {
> > -	struct drm_exec exec;
> >  	int err;
> > 
> >  retry_userptr:
> > -	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> > -	drm_exec_until_all_locked(&exec) {
> > -		err = op_execute(&exec, vm, vma, op);
> > -		drm_exec_retry_on_contention(&exec);
> > -		if (err)
> > -			break;
> > -	}
> > -	drm_exec_fini(&exec);
> > -
> > +	err = op_execute(vm, vma, op);
> >  	if (err == -EAGAIN) {
> >  		lockdep_assert_held_write(&vm->lock);
> > 
> > @@ -2807,29 +2790,113 @@ static void vm_bind_ioctl_ops_unwind(struct
> > xe_vm *vm,
> >  	}
> >  }
> > 
> > +static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
> > +				 bool validate)
> > +{
> > +	struct xe_bo *bo = xe_vma_bo(vma);
> > +	int err = 0;
> > +
> > +	if (bo) {
> > +		if (!bo->vm)
> > +			err = drm_exec_prepare_obj(exec, &bo->ttm.base, 0);
> > +		if (!err && validate)
> > +			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
> > +	}
> > +
> > +	return err;
> > +}
> > +
> > +static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
> > +			    struct xe_vma_op *op)
> > +{
> > +	int err = 0;
> > +
> > +	switch (op->base.op) {
> > +	case DRM_GPUVA_OP_MAP:
> > +		err = vma_lock_and_validate(exec, op->map.vma,
> > +					    !xe_vm_in_fault_mode(vm));
> 
> I meant here, should the last parameter be: !xe_vm_in_fault_mode(vm) && IMMEDIATE flag? Or maybe the IMMEDIATE flag is deleted? I did see it in xe_vm.c:
> 
> 2796 #define SUPPORTED_FLAGS \
> 2797     (DRM_XE_VM_BIND_FLAG_READONLY | \
> 2798      DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
> 2799      DRM_XE_VM_BIND_FLAG_NULL | \
> 
> Oak
> 
> 
> > +		break;
> > +	case DRM_GPUVA_OP_REMAP:
> > +		err = vma_lock_and_validate(exec,
> > +					    gpuva_to_vma(op-
> > >base.remap.unmap->va),
> > +					    false);
> > +		if (!err && op->remap.prev)
> > +			err = vma_lock_and_validate(exec, op->remap.prev,
> > true);
> > +		if (!err && op->remap.next)
> > +			err = vma_lock_and_validate(exec, op->remap.next,
> > true);
> > +		break;
> > +	case DRM_GPUVA_OP_UNMAP:
> > +		err = vma_lock_and_validate(exec,
> > +					    gpuva_to_vma(op->base.unmap.va),
> > +					    false);
> > +		break;
> > +	case DRM_GPUVA_OP_PREFETCH:
> > +		err = vma_lock_and_validate(exec,
> > +					    gpuva_to_vma(op-
> > >base.prefetch.va), true);
> > +		break;
> > +	default:
> > +		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > +	}
> > +
> > +	return err;
> > +}
> > +
> > +static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
> > +					   struct xe_vm *vm,
> > +					   struct list_head *ops_list)
> > +{
> > +	struct xe_vma_op *op;
> > +	int err;
> > +
> > +	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 0);
> > +	if (err)
> > +		return err;
> > +
> > +	list_for_each_entry(op, ops_list, link) {
> > +		err = op_lock_and_prep(exec, vm, op);
> > +		if (err)
> > +			return err;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> >  				     struct list_head *ops_list)
> >  {
> > +	struct drm_exec exec;
> >  	struct xe_vma_op *op, *next;
> >  	int err;
> > 
> >  	lockdep_assert_held_write(&vm->lock);
> > 
> > -	list_for_each_entry_safe(op, next, ops_list, link) {
> > -		err = xe_vma_op_execute(vm, op);
> > -		if (err) {
> > -			drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
> > -				 op->base.op, err);
> > -			/*
> > -			 * FIXME: Killing VM rather than proper error handling
> > -			 */
> > -			xe_vm_kill(vm);
> > -			return -ENOSPC;
> > +	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> > +		      DRM_EXEC_IGNORE_DUPLICATES, 0);
> > +	drm_exec_until_all_locked(&exec) {
> > +		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list);
> > +		drm_exec_retry_on_contention(&exec);
> > +		if (err)
> > +			goto unlock;
> > +
> > +		list_for_each_entry_safe(op, next, ops_list, link) {
> > +			err = xe_vma_op_execute(vm, op);
> > +			if (err) {
> > +				drm_warn(&vm->xe->drm, "VM op(%d) failed
> > with %d",
> > +					 op->base.op, err);
> > +				/*
> > +				 * FIXME: Killing VM rather than proper error
> > handling
> > +				 */
> > +				xe_vm_kill(vm, false);
> > +				err = -ENOSPC;
> > +				goto unlock;
> > +			}
> > +			xe_vma_op_cleanup(vm, op);
> >  		}
> > -		xe_vma_op_cleanup(vm, op);
> >  	}
> > 
> > -	return 0;
> > +unlock:
> > +	drm_exec_fini(&exec);
> > +	return err;
> >  }
> > 
> >  #define SUPPORTED_FLAGS	\
> > --
> > 2.34.1
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 02/13] drm/xe: Add ops_execute function which returns a fence
  2024-04-10  5:40 ` [PATCH 02/13] drm/xe: Add ops_execute function which returns a fence Matthew Brost
@ 2024-04-18 16:16   ` Zeng, Oak
  2024-04-18 19:36     ` Matthew Brost
  0 siblings, 1 reply; 40+ messages in thread
From: Zeng, Oak @ 2024-04-18 16:16 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe



> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> <oak.zeng@intel.com>
> Subject: [PATCH 02/13] drm/xe: Add ops_execute function which returns a
> fence
> 
> Add ops_execute function which returns a fence. This will be helpful to
> initiate all binds (VM bind IOCTL, rebinds in exec IOCTL, rebinds in
> preempt rebind worker, and rebinds in pagefaults) via a gpuva ops list.
> Returning a fence is needed in various paths.
> 
> v2:
>  - Rebase
> 
> Cc: Oak Zeng <oak.zeng@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c | 211 +++++++++++++++++++------------------
>  1 file changed, 111 insertions(+), 100 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 6375c136e21a..84c6b10b4b78 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1834,16 +1834,17 @@ find_ufence_get(struct xe_sync_entry *syncs,
> u32 num_syncs)
>  	return NULL;
>  }
> 
> -static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
> -			struct xe_exec_queue *q, struct xe_sync_entry
> *syncs,
> -			u32 num_syncs, bool immediate, bool first_op,
> -			bool last_op)
> +static struct dma_fence *
> +xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> xe_exec_queue *q,
> +	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
> +	   bool immediate, bool first_op, bool last_op)
>  {
>  	struct dma_fence *fence;
>  	struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, q);
>  	struct xe_user_fence *ufence;
> 
>  	xe_vm_assert_held(vm);
> +	xe_bo_assert_held(bo);
> 
>  	ufence = find_ufence_get(syncs, num_syncs);
>  	if (vma->ufence && ufence)
> @@ -1855,7 +1856,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct
> xe_vma *vma,
>  		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> first_op,
>  				       last_op);
>  		if (IS_ERR(fence))
> -			return PTR_ERR(fence);
> +			return fence;
>  	} else {
>  		int i;
> 
> @@ -1870,26 +1871,14 @@ static int __xe_vm_bind(struct xe_vm *vm,
> struct xe_vma *vma,
> 
>  	if (last_op)
>  		xe_exec_queue_last_fence_set(wait_exec_queue, vm,
> fence);
> -	dma_fence_put(fence);
> -
> -	return 0;
> -}
> -
> -static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> xe_exec_queue *q,
> -		      struct xe_bo *bo, struct xe_sync_entry *syncs,
> -		      u32 num_syncs, bool immediate, bool first_op,
> -		      bool last_op)
> -{
> -	xe_vm_assert_held(vm);
> -	xe_bo_assert_held(bo);
> 
> -	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate,
> first_op,
> -			    last_op);
> +	return fence;
>  }
> 
> -static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
> -			struct xe_exec_queue *q, struct xe_sync_entry
> *syncs,
> -			u32 num_syncs, bool first_op, bool last_op)
> +static struct dma_fence *
> +xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
> +	     struct xe_exec_queue *q, struct xe_sync_entry *syncs,
> +	     u32 num_syncs, bool first_op, bool last_op)
>  {
>  	struct dma_fence *fence;
>  	struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, q);
> @@ -1899,14 +1888,13 @@ static int xe_vm_unbind(struct xe_vm *vm,
> struct xe_vma *vma,
> 
>  	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op,
> last_op);
>  	if (IS_ERR(fence))
> -		return PTR_ERR(fence);
> +		return fence;
> 
>  	xe_vma_destroy(vma, fence);
>  	if (last_op)
>  		xe_exec_queue_last_fence_set(wait_exec_queue, vm,
> fence);
> -	dma_fence_put(fence);
> 
> -	return 0;
> +	return fence;
>  }
> 
>  #define ALL_DRM_XE_VM_CREATE_FLAGS
> (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
> @@ -2049,10 +2037,11 @@ static const u32 region_to_mem_type[] = {
>  	XE_PL_VRAM1,
>  };
> 
> -static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
> -			  struct xe_exec_queue *q, u32 region,
> -			  struct xe_sync_entry *syncs, u32 num_syncs,
> -			  bool first_op, bool last_op)
> +static struct dma_fence *
> +xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
> +	       struct xe_exec_queue *q, u32 region,
> +	       struct xe_sync_entry *syncs, u32 num_syncs,
> +	       bool first_op, bool last_op)
>  {
>  	struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, q);
>  	int err;
> @@ -2062,27 +2051,24 @@ static int xe_vm_prefetch(struct xe_vm *vm,
> struct xe_vma *vma,
>  	if (!xe_vma_has_no_bo(vma)) {
>  		err = xe_bo_migrate(xe_vma_bo(vma),
> region_to_mem_type[region]);
>  		if (err)
> -			return err;
> +			return ERR_PTR(err);
>  	}
> 
>  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> {
>  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> num_syncs,
>  				  true, first_op, last_op);
>  	} else {
> +		struct dma_fence *fence =
> +			xe_exec_queue_last_fence_get(wait_exec_queue,
> vm);
>  		int i;
> 
>  		/* Nothing to do, signal fences now */
>  		if (last_op) {
> -			for (i = 0; i < num_syncs; i++) {
> -				struct dma_fence *fence =
> -
> 	xe_exec_queue_last_fence_get(wait_exec_queue, vm);
> -
> +			for (i = 0; i < num_syncs; i++)
>  				xe_sync_entry_signal(&syncs[i], fence);
> -				dma_fence_put(fence);
> -			}
>  		}
> 
> -		return 0;
> +		return fence;
>  	}
>  }
> 
> @@ -2535,10 +2521,10 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> *vm, struct xe_exec_queue *q,
>  	return 0;
>  }
> 
> -static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
> -		      struct xe_vma_op *op)
> +static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma
> *vma,
> +				    struct xe_vma_op *op)
>  {
> -	int err;
> +	struct dma_fence *fence = NULL;
> 
>  	lockdep_assert_held_write(&vm->lock);
> 
> @@ -2547,11 +2533,11 @@ static int op_execute(struct xe_vm *vm, struct
> xe_vma *vma,
> 
>  	switch (op->base.op) {
>  	case DRM_GPUVA_OP_MAP:
> -		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> -				 op->syncs, op->num_syncs,
> -				 op->map.immediate
> || !xe_vm_in_fault_mode(vm),
> -				 op->flags & XE_VMA_OP_FIRST,
> -				 op->flags & XE_VMA_OP_LAST);
> +		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> +				   op->syncs, op->num_syncs,
> +				   op->map.immediate
> || !xe_vm_in_fault_mode(vm),
> +				   op->flags & XE_VMA_OP_FIRST,
> +				   op->flags & XE_VMA_OP_LAST);
>  		break;
>  	case DRM_GPUVA_OP_REMAP:
>  	{
> @@ -2561,37 +2547,39 @@ static int op_execute(struct xe_vm *vm, struct
> xe_vma *vma,
>  		if (!op->remap.unmap_done) {
>  			if (prev || next)
>  				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
> -			err = xe_vm_unbind(vm, vma, op->q, op->syncs,
> -					   op->num_syncs,
> -					   op->flags & XE_VMA_OP_FIRST,
> -					   op->flags & XE_VMA_OP_LAST &&
> -					   !prev && !next);
> -			if (err)
> +			fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
> +					     op->num_syncs,
> +					     op->flags & XE_VMA_OP_FIRST,
> +					     op->flags & XE_VMA_OP_LAST &&
> +					     !prev && !next);
> +			if (IS_ERR(fence))
>  				break;
>  			op->remap.unmap_done = true;
>  		}
> 
>  		if (prev) {
>  			op->remap.prev->gpuva.flags |=
> XE_VMA_LAST_REBIND;
> -			err = xe_vm_bind(vm, op->remap.prev, op->q,
> -					 xe_vma_bo(op->remap.prev), op-
> >syncs,
> -					 op->num_syncs, true, false,
> -					 op->flags & XE_VMA_OP_LAST
> && !next);
> +			dma_fence_put(fence);
> +			fence = xe_vm_bind(vm, op->remap.prev, op->q,
> +					   xe_vma_bo(op->remap.prev), op-
> >syncs,
> +					   op->num_syncs, true, false,
> +					   op->flags & XE_VMA_OP_LAST
> && !next);
>  			op->remap.prev->gpuva.flags &=
> ~XE_VMA_LAST_REBIND;
> -			if (err)
> +			if (IS_ERR(fence))
>  				break;
>  			op->remap.prev = NULL;
>  		}
> 
>  		if (next) {
>  			op->remap.next->gpuva.flags |=
> XE_VMA_LAST_REBIND;
> -			err = xe_vm_bind(vm, op->remap.next, op->q,
> -					 xe_vma_bo(op->remap.next),
> -					 op->syncs, op->num_syncs,
> -					 true, false,
> -					 op->flags & XE_VMA_OP_LAST);
> +			dma_fence_put(fence);
> +			fence = xe_vm_bind(vm, op->remap.next, op->q,
> +					   xe_vma_bo(op->remap.next),
> +					   op->syncs, op->num_syncs,
> +					   true, false,
> +					   op->flags & XE_VMA_OP_LAST);
>  			op->remap.next->gpuva.flags &=
> ~XE_VMA_LAST_REBIND;
> -			if (err)
> +			if (IS_ERR(fence))
>  				break;
>  			op->remap.next = NULL;
>  		}
> @@ -2599,34 +2587,36 @@ static int op_execute(struct xe_vm *vm, struct
> xe_vma *vma,
>  		break;
>  	}
>  	case DRM_GPUVA_OP_UNMAP:
> -		err = xe_vm_unbind(vm, vma, op->q, op->syncs,
> -				   op->num_syncs, op->flags &
> XE_VMA_OP_FIRST,
> -				   op->flags & XE_VMA_OP_LAST);
> +		fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
> +				     op->num_syncs, op->flags &
> XE_VMA_OP_FIRST,
> +				     op->flags & XE_VMA_OP_LAST);
>  		break;
>  	case DRM_GPUVA_OP_PREFETCH:
> -		err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
> -				     op->syncs, op->num_syncs,
> -				     op->flags & XE_VMA_OP_FIRST,
> -				     op->flags & XE_VMA_OP_LAST);
> +		fence = xe_vm_prefetch(vm, vma, op->q, op-
> >prefetch.region,
> +				       op->syncs, op->num_syncs,
> +				       op->flags & XE_VMA_OP_FIRST,
> +				       op->flags & XE_VMA_OP_LAST);
>  		break;
>  	default:
>  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
>  	}
> 
> -	if (err)
> +	if (IS_ERR(fence))
>  		trace_xe_vma_fail(vma);
> 
> -	return err;
> +	return fence;
>  }
> 
> -static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> -			       struct xe_vma_op *op)
> +static struct dma_fence *
> +__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> +		    struct xe_vma_op *op)
>  {
> +	struct dma_fence *fence;
>  	int err;
> 
>  retry_userptr:
> -	err = op_execute(vm, vma, op);
> -	if (err == -EAGAIN) {
> +	fence = op_execute(vm, vma, op);
> +	if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
>  		lockdep_assert_held_write(&vm->lock);
> 
>  		if (op->base.op == DRM_GPUVA_OP_REMAP) {
> @@ -2643,22 +2633,24 @@ static int __xe_vma_op_execute(struct xe_vm
> *vm, struct xe_vma *vma,
>  			if (!err)
>  				goto retry_userptr;
> 
> +			fence = ERR_PTR(err);
>  			trace_xe_vma_fail(vma);
>  		}
>  	}
> 
> -	return err;
> +	return fence;
>  }
> 
> -static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
> +static struct dma_fence *
> +xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
>  {
> -	int ret = 0;
> +	struct dma_fence *fence = ERR_PTR(-ENOMEM);
> 
>  	lockdep_assert_held_write(&vm->lock);
> 
>  	switch (op->base.op) {
>  	case DRM_GPUVA_OP_MAP:
> -		ret = __xe_vma_op_execute(vm, op->map.vma, op);
> +		fence = __xe_vma_op_execute(vm, op->map.vma, op);
>  		break;
>  	case DRM_GPUVA_OP_REMAP:
>  	{
> @@ -2671,23 +2663,23 @@ static int xe_vma_op_execute(struct xe_vm *vm,
> struct xe_vma_op *op)
>  		else
>  			vma = op->remap.next;
> 
> -		ret = __xe_vma_op_execute(vm, vma, op);
> +		fence = __xe_vma_op_execute(vm, vma, op);
>  		break;
>  	}
>  	case DRM_GPUVA_OP_UNMAP:
> -		ret = __xe_vma_op_execute(vm, gpuva_to_vma(op-
> >base.unmap.va),
> -					  op);
> +		fence = __xe_vma_op_execute(vm, gpuva_to_vma(op-
> >base.unmap.va),
> +					    op);
>  		break;
>  	case DRM_GPUVA_OP_PREFETCH:
> -		ret = __xe_vma_op_execute(vm,
> -					  gpuva_to_vma(op-
> >base.prefetch.va),
> -					  op);
> +		fence = __xe_vma_op_execute(vm,
> +					    gpuva_to_vma(op-
> >base.prefetch.va),
> +					    op);
>  		break;
>  	default:
>  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
>  	}
> 
> -	return ret;
> +	return fence;
>  }
> 
>  static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
> @@ -2861,11 +2853,35 @@ static int
> vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
>  	return 0;
>  }
> 
> +static struct dma_fence *ops_execute(struct xe_vm *vm,
> +				     struct list_head *ops_list,
> +				     bool cleanup)
> +{
> +	struct xe_vma_op *op, *next;
> +	struct dma_fence *fence = NULL;
> +
> +	list_for_each_entry_safe(op, next, ops_list, link) {
> +		if (!IS_ERR(fence)) {
> +			dma_fence_put(fence);
> +			fence = xe_vma_op_execute(vm, op);
> +		}
> +		if (IS_ERR(fence)) {
> +			drm_warn(&vm->xe->drm, "VM op(%d) failed
> with %ld",
> +				 op->base.op, PTR_ERR(fence));
> +			fence = ERR_PTR(-ENOSPC);

There is a comment before not addressed. Copy as below:


> > Once error happen for one operation, you seem to print the same error
> message for all the rest operations....because fence = xe_vma_op_execute(vm,
> op) is not called anymore after the first error
> >
> 
> Yes.

Is this problematic though? Lets say you have 2 ops in the list and op_execute failed with op1. You will print as below:

VM op1 failed with xxx
VM op1 failed with xxx



> +		}
> +		if (cleanup)
> +			xe_vma_op_cleanup(vm, op);
> +	}
> +
> +	return fence;
> +}
> +
>  static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>  				     struct list_head *ops_list)
>  {
>  	struct drm_exec exec;
> -	struct xe_vma_op *op, *next;
> +	struct dma_fence *fence;
>  	int err;
> 
>  	lockdep_assert_held_write(&vm->lock);
> @@ -2878,19 +2894,14 @@ static int vm_bind_ioctl_ops_execute(struct
> xe_vm *vm,
>  		if (err)
>  			goto unlock;
> 
> -		list_for_each_entry_safe(op, next, ops_list, link) {
> -			err = xe_vma_op_execute(vm, op);
> -			if (err) {
> -				drm_warn(&vm->xe->drm, "VM op(%d)
> failed with %d",
> -					 op->base.op, err);
> -				/*
> -				 * FIXME: Killing VM rather than proper error
> handling
> -				 */
> -				xe_vm_kill(vm, false);
> -				err = -ENOSPC;
> -				goto unlock;
> -			}
> -			xe_vma_op_cleanup(vm, op);
> +		fence = ops_execute(vm, ops_list, true);
> +		if (IS_ERR(fence)) {
> +			err = PTR_ERR(fence);
> +			/* FIXME: Killing VM rather than proper error
> handling */
> +			xe_vm_kill(vm, false);
> +			goto unlock;
> +		} else {
> +			dma_fence_put(fence);

I don't get here. You introduced function ops_execute to return the last fence of all the operations. But you just put the fence here. Isn't you intend to wait for this fence somehow? What is the point to return a fence from ops_execute?

Oak


>  		}
>  	}
> 
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 03/13] drm/xe: Move migrate to prefetch to op_lock_and_prep function
  2024-04-10  5:40 ` [PATCH 03/13] drm/xe: Move migrate to prefetch to op_lock_and_prep function Matthew Brost
@ 2024-04-18 19:27   ` Zeng, Oak
  2024-04-19 19:52     ` Matthew Brost
  0 siblings, 1 reply; 40+ messages in thread
From: Zeng, Oak @ 2024-04-18 19:27 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe



> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> <oak.zeng@intel.com>
> Subject: [PATCH 03/13] drm/xe: Move migrate to prefetch to
> op_lock_and_prep function
> 
> All non-binding operations in VM bind IOCTL should be in the lock and
> prepare step rather than the execution step. Move prefetch to conform to
> this pattern.
> 
> v2:
>  - Rebase
>  - New function names (Oak)
>  - Update stale comment (Oak)
> 
> Cc: Oak Zeng <oak.zeng@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c | 30 +++++++++++++++---------------
>  1 file changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 84c6b10b4b78..2c0521573154 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2039,20 +2039,10 @@ static const u32 region_to_mem_type[] = {
> 
>  static struct dma_fence *
>  xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
> -	       struct xe_exec_queue *q, u32 region,
> -	       struct xe_sync_entry *syncs, u32 num_syncs,
> -	       bool first_op, bool last_op)
> +	       struct xe_exec_queue *q, struct xe_sync_entry *syncs,
> +	       u32 num_syncs, bool first_op, bool last_op)


I am wondering, do you still need this function? The original prefetch function is migration + vm_bind. Now you moved the migration to lock_and_prepare step, only vm bind left...

Even if you keep this function, we should change the name... it is not a prefetch anymore...

Oak

>  {
>  	struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, q);
> -	int err;
> -
> -	xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
> -
> -	if (!xe_vma_has_no_bo(vma)) {
> -		err = xe_bo_migrate(xe_vma_bo(vma),
> region_to_mem_type[region]);
> -		if (err)
> -			return ERR_PTR(err);
> -	}
> 
>  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> {
>  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> num_syncs,
> @@ -2592,8 +2582,7 @@ static struct dma_fence *op_execute(struct xe_vm
> *vm, struct xe_vma *vma,
>  				     op->flags & XE_VMA_OP_LAST);
>  		break;
>  	case DRM_GPUVA_OP_PREFETCH:
> -		fence = xe_vm_prefetch(vm, vma, op->q, op-
> >prefetch.region,
> -				       op->syncs, op->num_syncs,
> +		fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op-
> >num_syncs,
>  				       op->flags & XE_VMA_OP_FIRST,
>  				       op->flags & XE_VMA_OP_LAST);
>  		break;
> @@ -2823,9 +2812,20 @@ static int op_lock_and_prep(struct drm_exec
> *exec, struct xe_vm *vm,
>  					    false);
>  		break;
>  	case DRM_GPUVA_OP_PREFETCH:
> +	{
> +		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> +		u32 region = op->prefetch.region;
> +
> +		xe_assert(vm->xe, region <=
> ARRAY_SIZE(region_to_mem_type));
> +
>  		err = vma_lock_and_validate(exec,
> -					    gpuva_to_vma(op-
> >base.prefetch.va), true);
> +					    gpuva_to_vma(op-
> >base.prefetch.va),
> +					    false);
> +		if (!err && !xe_vma_has_no_bo(vma))
> +			err = xe_bo_migrate(xe_vma_bo(vma),
> +					    region_to_mem_type[region]);
>  		break;
> +	}
>  	default:
>  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
>  	}
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 02/13] drm/xe: Add ops_execute function which returns a fence
  2024-04-18 16:16   ` Zeng, Oak
@ 2024-04-18 19:36     ` Matthew Brost
  2024-04-23  3:09       ` Zeng, Oak
  0 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-18 19:36 UTC (permalink / raw)
  To: Zeng, Oak; +Cc: intel-xe

On Thu, Apr 18, 2024 at 10:16:15AM -0600, Zeng, Oak wrote:
> 
> 
> > -----Original Message-----
> > From: Brost, Matthew <matthew.brost@intel.com>
> > Sent: Wednesday, April 10, 2024 1:41 AM
> > To: intel-xe@lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> > <oak.zeng@intel.com>
> > Subject: [PATCH 02/13] drm/xe: Add ops_execute function which returns a
> > fence
> > 
> > Add ops_execute function which returns a fence. This will be helpful to
> > initiate all binds (VM bind IOCTL, rebinds in exec IOCTL, rebinds in
> > preempt rebind worker, and rebinds in pagefaults) via a gpuva ops list.
> > Returning a fence is needed in various paths.
> > 
> > v2:
> >  - Rebase
> > 
> > Cc: Oak Zeng <oak.zeng@intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_vm.c | 211 +++++++++++++++++++------------------
> >  1 file changed, 111 insertions(+), 100 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 6375c136e21a..84c6b10b4b78 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -1834,16 +1834,17 @@ find_ufence_get(struct xe_sync_entry *syncs,
> > u32 num_syncs)
> >  	return NULL;
> >  }
> > 
> > -static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
> > -			struct xe_exec_queue *q, struct xe_sync_entry
> > *syncs,
> > -			u32 num_syncs, bool immediate, bool first_op,
> > -			bool last_op)
> > +static struct dma_fence *
> > +xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> > xe_exec_queue *q,
> > +	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
> > +	   bool immediate, bool first_op, bool last_op)
> >  {
> >  	struct dma_fence *fence;
> >  	struct xe_exec_queue *wait_exec_queue =
> > to_wait_exec_queue(vm, q);
> >  	struct xe_user_fence *ufence;
> > 
> >  	xe_vm_assert_held(vm);
> > +	xe_bo_assert_held(bo);
> > 
> >  	ufence = find_ufence_get(syncs, num_syncs);
> >  	if (vma->ufence && ufence)
> > @@ -1855,7 +1856,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct
> > xe_vma *vma,
> >  		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > first_op,
> >  				       last_op);
> >  		if (IS_ERR(fence))
> > -			return PTR_ERR(fence);
> > +			return fence;
> >  	} else {
> >  		int i;
> > 
> > @@ -1870,26 +1871,14 @@ static int __xe_vm_bind(struct xe_vm *vm,
> > struct xe_vma *vma,
> > 
> >  	if (last_op)
> >  		xe_exec_queue_last_fence_set(wait_exec_queue, vm,
> > fence);
> > -	dma_fence_put(fence);
> > -
> > -	return 0;
> > -}
> > -
> > -static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> > xe_exec_queue *q,
> > -		      struct xe_bo *bo, struct xe_sync_entry *syncs,
> > -		      u32 num_syncs, bool immediate, bool first_op,
> > -		      bool last_op)
> > -{
> > -	xe_vm_assert_held(vm);
> > -	xe_bo_assert_held(bo);
> > 
> > -	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate,
> > first_op,
> > -			    last_op);
> > +	return fence;
> >  }
> > 
> > -static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
> > -			struct xe_exec_queue *q, struct xe_sync_entry
> > *syncs,
> > -			u32 num_syncs, bool first_op, bool last_op)
> > +static struct dma_fence *
> > +xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
> > +	     struct xe_exec_queue *q, struct xe_sync_entry *syncs,
> > +	     u32 num_syncs, bool first_op, bool last_op)
> >  {
> >  	struct dma_fence *fence;
> >  	struct xe_exec_queue *wait_exec_queue =
> > to_wait_exec_queue(vm, q);
> > @@ -1899,14 +1888,13 @@ static int xe_vm_unbind(struct xe_vm *vm,
> > struct xe_vma *vma,
> > 
> >  	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op,
> > last_op);
> >  	if (IS_ERR(fence))
> > -		return PTR_ERR(fence);
> > +		return fence;
> > 
> >  	xe_vma_destroy(vma, fence);
> >  	if (last_op)
> >  		xe_exec_queue_last_fence_set(wait_exec_queue, vm,
> > fence);
> > -	dma_fence_put(fence);
> > 
> > -	return 0;
> > +	return fence;
> >  }
> > 
> >  #define ALL_DRM_XE_VM_CREATE_FLAGS
> > (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
> > @@ -2049,10 +2037,11 @@ static const u32 region_to_mem_type[] = {
> >  	XE_PL_VRAM1,
> >  };
> > 
> > -static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
> > -			  struct xe_exec_queue *q, u32 region,
> > -			  struct xe_sync_entry *syncs, u32 num_syncs,
> > -			  bool first_op, bool last_op)
> > +static struct dma_fence *
> > +xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
> > +	       struct xe_exec_queue *q, u32 region,
> > +	       struct xe_sync_entry *syncs, u32 num_syncs,
> > +	       bool first_op, bool last_op)
> >  {
> >  	struct xe_exec_queue *wait_exec_queue =
> > to_wait_exec_queue(vm, q);
> >  	int err;
> > @@ -2062,27 +2051,24 @@ static int xe_vm_prefetch(struct xe_vm *vm,
> > struct xe_vma *vma,
> >  	if (!xe_vma_has_no_bo(vma)) {
> >  		err = xe_bo_migrate(xe_vma_bo(vma),
> > region_to_mem_type[region]);
> >  		if (err)
> > -			return err;
> > +			return ERR_PTR(err);
> >  	}
> > 
> >  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> > {
> >  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> > num_syncs,
> >  				  true, first_op, last_op);
> >  	} else {
> > +		struct dma_fence *fence =
> > +			xe_exec_queue_last_fence_get(wait_exec_queue,
> > vm);
> >  		int i;
> > 
> >  		/* Nothing to do, signal fences now */
> >  		if (last_op) {
> > -			for (i = 0; i < num_syncs; i++) {
> > -				struct dma_fence *fence =
> > -
> > 	xe_exec_queue_last_fence_get(wait_exec_queue, vm);
> > -
> > +			for (i = 0; i < num_syncs; i++)
> >  				xe_sync_entry_signal(&syncs[i], fence);
> > -				dma_fence_put(fence);
> > -			}
> >  		}
> > 
> > -		return 0;
> > +		return fence;
> >  	}
> >  }
> > 
> > @@ -2535,10 +2521,10 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> > *vm, struct xe_exec_queue *q,
> >  	return 0;
> >  }
> > 
> > -static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
> > -		      struct xe_vma_op *op)
> > +static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma
> > *vma,
> > +				    struct xe_vma_op *op)
> >  {
> > -	int err;
> > +	struct dma_fence *fence = NULL;
> > 
> >  	lockdep_assert_held_write(&vm->lock);
> > 
> > @@ -2547,11 +2533,11 @@ static int op_execute(struct xe_vm *vm, struct
> > xe_vma *vma,
> > 
> >  	switch (op->base.op) {
> >  	case DRM_GPUVA_OP_MAP:
> > -		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> > -				 op->syncs, op->num_syncs,
> > -				 op->map.immediate
> > || !xe_vm_in_fault_mode(vm),
> > -				 op->flags & XE_VMA_OP_FIRST,
> > -				 op->flags & XE_VMA_OP_LAST);
> > +		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> > +				   op->syncs, op->num_syncs,
> > +				   op->map.immediate
> > || !xe_vm_in_fault_mode(vm),
> > +				   op->flags & XE_VMA_OP_FIRST,
> > +				   op->flags & XE_VMA_OP_LAST);
> >  		break;
> >  	case DRM_GPUVA_OP_REMAP:
> >  	{
> > @@ -2561,37 +2547,39 @@ static int op_execute(struct xe_vm *vm, struct
> > xe_vma *vma,
> >  		if (!op->remap.unmap_done) {
> >  			if (prev || next)
> >  				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
> > -			err = xe_vm_unbind(vm, vma, op->q, op->syncs,
> > -					   op->num_syncs,
> > -					   op->flags & XE_VMA_OP_FIRST,
> > -					   op->flags & XE_VMA_OP_LAST &&
> > -					   !prev && !next);
> > -			if (err)
> > +			fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
> > +					     op->num_syncs,
> > +					     op->flags & XE_VMA_OP_FIRST,
> > +					     op->flags & XE_VMA_OP_LAST &&
> > +					     !prev && !next);
> > +			if (IS_ERR(fence))
> >  				break;
> >  			op->remap.unmap_done = true;
> >  		}
> > 
> >  		if (prev) {
> >  			op->remap.prev->gpuva.flags |=
> > XE_VMA_LAST_REBIND;
> > -			err = xe_vm_bind(vm, op->remap.prev, op->q,
> > -					 xe_vma_bo(op->remap.prev), op-
> > >syncs,
> > -					 op->num_syncs, true, false,
> > -					 op->flags & XE_VMA_OP_LAST
> > && !next);
> > +			dma_fence_put(fence);
> > +			fence = xe_vm_bind(vm, op->remap.prev, op->q,
> > +					   xe_vma_bo(op->remap.prev), op-
> > >syncs,
> > +					   op->num_syncs, true, false,
> > +					   op->flags & XE_VMA_OP_LAST
> > && !next);
> >  			op->remap.prev->gpuva.flags &=
> > ~XE_VMA_LAST_REBIND;
> > -			if (err)
> > +			if (IS_ERR(fence))
> >  				break;
> >  			op->remap.prev = NULL;
> >  		}
> > 
> >  		if (next) {
> >  			op->remap.next->gpuva.flags |=
> > XE_VMA_LAST_REBIND;
> > -			err = xe_vm_bind(vm, op->remap.next, op->q,
> > -					 xe_vma_bo(op->remap.next),
> > -					 op->syncs, op->num_syncs,
> > -					 true, false,
> > -					 op->flags & XE_VMA_OP_LAST);
> > +			dma_fence_put(fence);
> > +			fence = xe_vm_bind(vm, op->remap.next, op->q,
> > +					   xe_vma_bo(op->remap.next),
> > +					   op->syncs, op->num_syncs,
> > +					   true, false,
> > +					   op->flags & XE_VMA_OP_LAST);
> >  			op->remap.next->gpuva.flags &=
> > ~XE_VMA_LAST_REBIND;
> > -			if (err)
> > +			if (IS_ERR(fence))
> >  				break;
> >  			op->remap.next = NULL;
> >  		}
> > @@ -2599,34 +2587,36 @@ static int op_execute(struct xe_vm *vm, struct
> > xe_vma *vma,
> >  		break;
> >  	}
> >  	case DRM_GPUVA_OP_UNMAP:
> > -		err = xe_vm_unbind(vm, vma, op->q, op->syncs,
> > -				   op->num_syncs, op->flags &
> > XE_VMA_OP_FIRST,
> > -				   op->flags & XE_VMA_OP_LAST);
> > +		fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
> > +				     op->num_syncs, op->flags &
> > XE_VMA_OP_FIRST,
> > +				     op->flags & XE_VMA_OP_LAST);
> >  		break;
> >  	case DRM_GPUVA_OP_PREFETCH:
> > -		err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
> > -				     op->syncs, op->num_syncs,
> > -				     op->flags & XE_VMA_OP_FIRST,
> > -				     op->flags & XE_VMA_OP_LAST);
> > +		fence = xe_vm_prefetch(vm, vma, op->q, op-
> > >prefetch.region,
> > +				       op->syncs, op->num_syncs,
> > +				       op->flags & XE_VMA_OP_FIRST,
> > +				       op->flags & XE_VMA_OP_LAST);
> >  		break;
> >  	default:
> >  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> >  	}
> > 
> > -	if (err)
> > +	if (IS_ERR(fence))
> >  		trace_xe_vma_fail(vma);
> > 
> > -	return err;
> > +	return fence;
> >  }
> > 
> > -static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> > -			       struct xe_vma_op *op)
> > +static struct dma_fence *
> > +__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> > +		    struct xe_vma_op *op)
> >  {
> > +	struct dma_fence *fence;
> >  	int err;
> > 
> >  retry_userptr:
> > -	err = op_execute(vm, vma, op);
> > -	if (err == -EAGAIN) {
> > +	fence = op_execute(vm, vma, op);
> > +	if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
> >  		lockdep_assert_held_write(&vm->lock);
> > 
> >  		if (op->base.op == DRM_GPUVA_OP_REMAP) {
> > @@ -2643,22 +2633,24 @@ static int __xe_vma_op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> >  			if (!err)
> >  				goto retry_userptr;
> > 
> > +			fence = ERR_PTR(err);
> >  			trace_xe_vma_fail(vma);
> >  		}
> >  	}
> > 
> > -	return err;
> > +	return fence;
> >  }
> > 
> > -static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
> > +static struct dma_fence *
> > +xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
> >  {
> > -	int ret = 0;
> > +	struct dma_fence *fence = ERR_PTR(-ENOMEM);
> > 
> >  	lockdep_assert_held_write(&vm->lock);
> > 
> >  	switch (op->base.op) {
> >  	case DRM_GPUVA_OP_MAP:
> > -		ret = __xe_vma_op_execute(vm, op->map.vma, op);
> > +		fence = __xe_vma_op_execute(vm, op->map.vma, op);
> >  		break;
> >  	case DRM_GPUVA_OP_REMAP:
> >  	{
> > @@ -2671,23 +2663,23 @@ static int xe_vma_op_execute(struct xe_vm *vm,
> > struct xe_vma_op *op)
> >  		else
> >  			vma = op->remap.next;
> > 
> > -		ret = __xe_vma_op_execute(vm, vma, op);
> > +		fence = __xe_vma_op_execute(vm, vma, op);
> >  		break;
> >  	}
> >  	case DRM_GPUVA_OP_UNMAP:
> > -		ret = __xe_vma_op_execute(vm, gpuva_to_vma(op-
> > >base.unmap.va),
> > -					  op);
> > +		fence = __xe_vma_op_execute(vm, gpuva_to_vma(op-
> > >base.unmap.va),
> > +					    op);
> >  		break;
> >  	case DRM_GPUVA_OP_PREFETCH:
> > -		ret = __xe_vma_op_execute(vm,
> > -					  gpuva_to_vma(op-
> > >base.prefetch.va),
> > -					  op);
> > +		fence = __xe_vma_op_execute(vm,
> > +					    gpuva_to_vma(op-
> > >base.prefetch.va),
> > +					    op);
> >  		break;
> >  	default:
> >  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> >  	}
> > 
> > -	return ret;
> > +	return fence;
> >  }
> > 
> >  static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
> > @@ -2861,11 +2853,35 @@ static int
> > vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
> >  	return 0;
> >  }
> > 
> > +static struct dma_fence *ops_execute(struct xe_vm *vm,
> > +				     struct list_head *ops_list,
> > +				     bool cleanup)
> > +{
> > +	struct xe_vma_op *op, *next;
> > +	struct dma_fence *fence = NULL;
> > +
> > +	list_for_each_entry_safe(op, next, ops_list, link) {
> > +		if (!IS_ERR(fence)) {
> > +			dma_fence_put(fence);
> > +			fence = xe_vma_op_execute(vm, op);
> > +		}
> > +		if (IS_ERR(fence)) {
> > +			drm_warn(&vm->xe->drm, "VM op(%d) failed
> > with %ld",
> > +				 op->base.op, PTR_ERR(fence));
> > +			fence = ERR_PTR(-ENOSPC);
> 
> There is a comment before not addressed. Copy as below:
> 
> 
> > > Once error happen for one operation, you seem to print the same error
> > message for all the rest operations....because fence = xe_vma_op_execute(vm,
> > op) is not called anymore after the first error
> > >
> > 
> > Yes.
> 
> Is this problematic though? Lets say you have 2 ops in the list and op_execute failed with op1. You will print as below:
> 
> VM op1 failed with xxx
> VM op1 failed with xxx
> 

I don't think that is a problem and changes later in the series once
xe_vma_op_cleanup is removed from this function.

> 
> 
> > +		}
> > +		if (cleanup)
> > +			xe_vma_op_cleanup(vm, op);
> > +	}
> > +
> > +	return fence;
> > +}
> > +
> >  static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> >  				     struct list_head *ops_list)
> >  {
> >  	struct drm_exec exec;
> > -	struct xe_vma_op *op, *next;
> > +	struct dma_fence *fence;
> >  	int err;
> > 
> >  	lockdep_assert_held_write(&vm->lock);
> > @@ -2878,19 +2894,14 @@ static int vm_bind_ioctl_ops_execute(struct
> > xe_vm *vm,
> >  		if (err)
> >  			goto unlock;
> > 
> > -		list_for_each_entry_safe(op, next, ops_list, link) {
> > -			err = xe_vma_op_execute(vm, op);
> > -			if (err) {
> > -				drm_warn(&vm->xe->drm, "VM op(%d)
> > failed with %d",
> > -					 op->base.op, err);
> > -				/*
> > -				 * FIXME: Killing VM rather than proper error
> > handling
> > -				 */
> > -				xe_vm_kill(vm, false);
> > -				err = -ENOSPC;
> > -				goto unlock;
> > -			}
> > -			xe_vma_op_cleanup(vm, op);
> > +		fence = ops_execute(vm, ops_list, true);
> > +		if (IS_ERR(fence)) {
> > +			err = PTR_ERR(fence);
> > +			/* FIXME: Killing VM rather than proper error
> > handling */
> > +			xe_vm_kill(vm, false);
> > +			goto unlock;
> > +		} else {
> > +			dma_fence_put(fence);
> 
> I don't get here. You introduced function ops_execute to return the last fence of all the operations. But you just put the fence here. Isn't you intend to wait for this fence somehow? What is the point to return a fence from ops_execute?
> i

It is used patch #7 [1] and #9 [2] in this series.

In [1], the returned fence is used wait on ops to compelete before
signaling page fault complete to GuC.

In [2], the returned fence is used as an argument to
vm_bind_ioctl_ops_fini which attaches VMA destroy to fence, installs
fence in IOCTL out-syncs, and sets last fence on the exec queue.

Matt

[1] https://patchwork.freedesktop.org/patch/588594/?series=132246&rev=1
[2] https://patchwork.freedesktop.org/patch/588595/?series=132246&rev=1

> Oak
> 
> 
> >  		}
> >  	}
> > 
> > --
> > 2.34.1
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind
  2024-04-10  5:40 ` [PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind Matthew Brost
@ 2024-04-19  3:43   ` Zeng, Oak
  2024-04-19  4:14     ` Matthew Brost
  0 siblings, 1 reply; 40+ messages in thread
From: Zeng, Oak @ 2024-04-19  3:43 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe; +Cc: Brost, Matthew



> -----Original Message-----
> From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> Matthew Brost
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>
> Subject: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement
> xe_vm_rebind
> 
> All page tables updates are moving to a xe_vma_ops interface to
> implement 1 job per VM bind IOCTL.

Just want to make sure I understand it correctly. So far after this patch, the rebind is still many jobs (one job per vma), right?


 Convert xe_vm_rebind to use a
> xe_vma_ops based interface.
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c | 78 +++++++++++++++++++++++++++++++-
> ------
>  1 file changed, 64 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 4cd485d5bc0a..9d82396cf5d5 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -811,37 +811,87 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
>  		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
>  }
> 
> -static struct dma_fence *
> -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> -	       struct xe_sync_entry *syncs, u32 num_syncs,
> -	       bool first_op, bool last_op);
> +static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma
> *vma,
> +				  u8 tile_mask)
> +{
> +	INIT_LIST_HEAD(&op->link);
> +	op->base.op = DRM_GPUVA_OP_MAP;
> +	op->base.map.va.addr = vma->gpuva.va.addr;
> +	op->base.map.va.range = vma->gpuva.va.range;
> +	op->base.map.gem.obj = vma->gpuva.gem.obj;
> +	op->base.map.gem.offset = vma->gpuva.gem.offset;
> +	op->map.vma = vma;
> +	op->map.immediate = true;
> +	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
> +	op->map.is_null = xe_vma_is_null(vma);
> +}
> +
> +static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma
> *vma,
> +				u8 tile_mask)
> +{
> +	struct xe_vma_op *op;
> +
> +	op = kzalloc(sizeof(*op), GFP_KERNEL);
> +	if (!op)
> +		return -ENOMEM;
> +
> +	xe_vm_populate_rebind(op, vma, tile_mask);
> +	list_add_tail(&op->link, &vops->list);
> +
> +	return 0;
> +}
> +
> +static struct dma_fence *ops_execute(struct xe_vm *vm,
> +				     struct xe_vma_ops *vops,
> +				     bool cleanup);
> +static void xe_vma_ops_init(struct xe_vma_ops *vops);
> 
>  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>  {
>  	struct dma_fence *fence;
>  	struct xe_vma *vma, *next;
> +	struct xe_vma_ops vops;
> +	struct xe_vma_op *op, *next_op;
> +	int err;
> 
>  	lockdep_assert_held(&vm->lock);
> -	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
> +	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
> +	    list_empty(&vm->rebind_list))
>  		return 0;
> 
> +	xe_vma_ops_init(&vops);
> +
>  	xe_vm_assert_held(vm);
> -	list_for_each_entry_safe(vma, next, &vm->rebind_list,
> -				 combined_links.rebind) {
> +	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind)
> {
>  		xe_assert(vm->xe, vma->tile_present);
> 
> -		list_del_init(&vma->combined_links.rebind);
>  		if (rebind_worker)
>  			trace_xe_vma_rebind_worker(vma);
>  		else
>  			trace_xe_vma_rebind_exec(vma);
> -		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
> -		if (IS_ERR(fence))
> -			return PTR_ERR(fence);
> +
> +		err = xe_vm_ops_add_rebind(&vops, vma,
> +					   vma->tile_present);
> +		if (err)
> +			goto free_ops;
> +	}
> +
> +	fence = ops_execute(vm, &vops, false);
> +	if (IS_ERR(fence)) {
> +		err = PTR_ERR(fence);

So here, if above ops_execute partially succeed (some vma bind failed, some succeed), for those vmas which are successfully bound, it is kept in the vm's rebind_list. Is this the correct behavior? Next time we will rebind them again....


Oak


> +	} else {
>  		dma_fence_put(fence);
> +		list_for_each_entry_safe(vma, next, &vm->rebind_list,
> +					 combined_links.rebind)
> +			list_del_init(&vma->combined_links.rebind);
> +	}
> +free_ops:
> +	list_for_each_entry_safe(op, next_op, &vops.list, link) {
> +		list_del(&op->link);
> +		kfree(op);
>  	}
> 
> -	return 0;
> +	return err;
>  }
> 
>  static void xe_vma_free(struct xe_vma *vma)
> @@ -2516,7 +2566,7 @@ static struct dma_fence *op_execute(struct xe_vm
> *vm, struct xe_vma *vma,
>  {
>  	struct dma_fence *fence = NULL;
> 
> -	lockdep_assert_held_write(&vm->lock);
> +	lockdep_assert_held(&vm->lock);
> 
>  	xe_vm_assert_held(vm);
>  	xe_bo_assert_held(xe_vma_bo(vma));
> @@ -2635,7 +2685,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> xe_vma_op *op)
>  {
>  	struct dma_fence *fence = ERR_PTR(-ENOMEM);
> 
> -	lockdep_assert_held_write(&vm->lock);
> +	lockdep_assert_held(&vm->lock);
> 
>  	switch (op->base.op) {
>  	case DRM_GPUVA_OP_MAP:
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind
  2024-04-19  3:43   ` Zeng, Oak
@ 2024-04-19  4:14     ` Matthew Brost
  2024-04-23  3:17       ` Zeng, Oak
  0 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-19  4:14 UTC (permalink / raw)
  To: Zeng, Oak; +Cc: intel-xe

On Thu, Apr 18, 2024 at 09:43:06PM -0600, Zeng, Oak wrote:
> 
> 
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> > Matthew Brost
> > Sent: Wednesday, April 10, 2024 1:41 AM
> > To: intel-xe@lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost@intel.com>
> > Subject: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement
> > xe_vm_rebind
> > 
> > All page tables updates are moving to a xe_vma_ops interface to
> > implement 1 job per VM bind IOCTL.
> 
> Just want to make sure I understand it correctly. So far after this patch, the rebind is still many jobs (one job per vma), right?
> 

Yes. A follow on series will convert to 1 job for all of the rebind list.

> 
>  Convert xe_vm_rebind to use a
> > xe_vma_ops based interface.
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_vm.c | 78 +++++++++++++++++++++++++++++++-
> > ------
> >  1 file changed, 64 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 4cd485d5bc0a..9d82396cf5d5 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -811,37 +811,87 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
> >  		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
> >  }
> > 
> > -static struct dma_fence *
> > -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> > -	       struct xe_sync_entry *syncs, u32 num_syncs,
> > -	       bool first_op, bool last_op);
> > +static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma
> > *vma,
> > +				  u8 tile_mask)
> > +{
> > +	INIT_LIST_HEAD(&op->link);
> > +	op->base.op = DRM_GPUVA_OP_MAP;
> > +	op->base.map.va.addr = vma->gpuva.va.addr;
> > +	op->base.map.va.range = vma->gpuva.va.range;
> > +	op->base.map.gem.obj = vma->gpuva.gem.obj;
> > +	op->base.map.gem.offset = vma->gpuva.gem.offset;
> > +	op->map.vma = vma;
> > +	op->map.immediate = true;
> > +	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
> > +	op->map.is_null = xe_vma_is_null(vma);
> > +}
> > +
> > +static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma
> > *vma,
> > +				u8 tile_mask)
> > +{
> > +	struct xe_vma_op *op;
> > +
> > +	op = kzalloc(sizeof(*op), GFP_KERNEL);
> > +	if (!op)
> > +		return -ENOMEM;
> > +
> > +	xe_vm_populate_rebind(op, vma, tile_mask);
> > +	list_add_tail(&op->link, &vops->list);
> > +
> > +	return 0;
> > +}
> > +
> > +static struct dma_fence *ops_execute(struct xe_vm *vm,
> > +				     struct xe_vma_ops *vops,
> > +				     bool cleanup);
> > +static void xe_vma_ops_init(struct xe_vma_ops *vops);
> > 
> >  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> >  {
> >  	struct dma_fence *fence;
> >  	struct xe_vma *vma, *next;
> > +	struct xe_vma_ops vops;
> > +	struct xe_vma_op *op, *next_op;
> > +	int err;
> > 
> >  	lockdep_assert_held(&vm->lock);
> > -	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
> > +	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
> > +	    list_empty(&vm->rebind_list))
> >  		return 0;
> > 
> > +	xe_vma_ops_init(&vops);
> > +
> >  	xe_vm_assert_held(vm);
> > -	list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > -				 combined_links.rebind) {
> > +	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind)
> > {
> >  		xe_assert(vm->xe, vma->tile_present);
> > 
> > -		list_del_init(&vma->combined_links.rebind);
> >  		if (rebind_worker)
> >  			trace_xe_vma_rebind_worker(vma);
> >  		else
> >  			trace_xe_vma_rebind_exec(vma);
> > -		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
> > -		if (IS_ERR(fence))
> > -			return PTR_ERR(fence);
> > +
> > +		err = xe_vm_ops_add_rebind(&vops, vma,
> > +					   vma->tile_present);
> > +		if (err)
> > +			goto free_ops;
> > +	}
> > +
> > +	fence = ops_execute(vm, &vops, false);
> > +	if (IS_ERR(fence)) {
> > +		err = PTR_ERR(fence);
> 
> So here, if above ops_execute partially succeed (some vma bind failed, some succeed), for those vmas which are successfully bound, it is kept in the vm's rebind_list. Is this the correct behavior? Next time we will rebind them again....
> 

The VM is killed if any VMA ops fails so it doesn't really matter, also
it safe to issue a rebind twice.

In the follow up series, once we have 1 job per the rebind list we can
cope with errors and not kill the VM. In that case we must leave
everything on the rebind list.

So this patch is correct now and for the follow on series.

Matt

> 
> Oak
> 
> 
> > +	} else {
> >  		dma_fence_put(fence);
> > +		list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > +					 combined_links.rebind)
> > +			list_del_init(&vma->combined_links.rebind);
> > +	}
> > +free_ops:
> > +	list_for_each_entry_safe(op, next_op, &vops.list, link) {
> > +		list_del(&op->link);
> > +		kfree(op);
> >  	}
> > 
> > -	return 0;
> > +	return err;
> >  }
> > 
> >  static void xe_vma_free(struct xe_vma *vma)
> > @@ -2516,7 +2566,7 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> >  {
> >  	struct dma_fence *fence = NULL;
> > 
> > -	lockdep_assert_held_write(&vm->lock);
> > +	lockdep_assert_held(&vm->lock);
> > 
> >  	xe_vm_assert_held(vm);
> >  	xe_bo_assert_held(xe_vma_bo(vma));
> > @@ -2635,7 +2685,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> > xe_vma_op *op)
> >  {
> >  	struct dma_fence *fence = ERR_PTR(-ENOMEM);
> > 
> > -	lockdep_assert_held_write(&vm->lock);
> > +	lockdep_assert_held(&vm->lock);
> > 
> >  	switch (op->base.op) {
> >  	case DRM_GPUVA_OP_MAP:
> > --
> > 2.34.1
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and cleanup
  2024-04-10  5:40 ` [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and cleanup Matthew Brost
@ 2024-04-19  4:19   ` Zeng, Oak
  2024-04-19 19:16     ` Matthew Brost
  0 siblings, 1 reply; 40+ messages in thread
From: Zeng, Oak @ 2024-04-19  4:19 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe

It is a nice clean up. See one question inline

> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> <oak.zeng@intel.com>
> Subject: [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and
> cleanup
> 
> Clean up everything in VM bind IOCTL in 1 path for both errors and
> non-errors. Also move VM bind IOCTL cleanup from ops (also used by
> non-IOCTL binds) to the VM bind IOCTL.
> 
> v2:
>  - Break ops_execute on error (Oak)
> 
> Cc: Oak Zeng <oak.zeng@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c       | 67 ++++++--------------------------
>  drivers/gpu/drm/xe/xe_vm_types.h |  5 ---
>  2 files changed, 12 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 9d82396cf5d5..8f5b24c8f6cd 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -842,8 +842,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops
> *vops, struct xe_vma *vma,
>  }
> 
>  static struct dma_fence *ops_execute(struct xe_vm *vm,
> -				     struct xe_vma_ops *vops,
> -				     bool cleanup);
> +				     struct xe_vma_ops *vops);
>  static void xe_vma_ops_init(struct xe_vma_ops *vops);
> 
>  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> @@ -876,7 +875,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool
> rebind_worker)
>  			goto free_ops;
>  	}
> 
> -	fence = ops_execute(vm, &vops, false);
> +	fence = ops_execute(vm, &vops);
>  	if (IS_ERR(fence)) {
>  		err = PTR_ERR(fence);
>  	} else {
> @@ -2551,7 +2550,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> *vm, struct xe_exec_queue *q,
>  	if (!last_op)
>  		return 0;
> 
> -	last_op->ops = ops;
>  	if (last) {
>  		last_op->flags |= XE_VMA_OP_LAST;
>  		last_op->num_syncs = num_syncs;
> @@ -2721,25 +2719,6 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> xe_vma_op *op)
>  	return fence;
>  }
> 
> -static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
> -{
> -	bool last = op->flags & XE_VMA_OP_LAST;
> -
> -	if (last) {
> -		while (op->num_syncs--)
> -			xe_sync_entry_cleanup(&op->syncs[op-
> >num_syncs]);

I understand all the other part of this function is not needed anymore. But I didn't figure out why sync_entry_cleanup is not needed? You still have syncs, do you? They are allocated in bind_ioctl function and seems you didn't touch it in this patch....Can you explain?

Oak


> -		kfree(op->syncs);
> -		if (op->q)
> -			xe_exec_queue_put(op->q);
> -	}
> -	if (!list_empty(&op->link))
> -		list_del(&op->link);
> -	if (op->ops)
> -		drm_gpuva_ops_free(&vm->gpuvm, op->ops);
> -	if (last)
> -		xe_vm_put(vm);
> -}
> -
>  static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
>  			     bool post_commit, bool prev_post_commit,
>  			     bool next_post_commit)
> @@ -2816,8 +2795,6 @@ static void vm_bind_ioctl_ops_unwind(struct
> xe_vm *vm,
>  					 op->flags &
> XE_VMA_OP_PREV_COMMITTED,
>  					 op->flags &
> XE_VMA_OP_NEXT_COMMITTED);
>  		}
> -
> -		drm_gpuva_ops_free(&vm->gpuvm, __ops);
>  	}
>  }
> 
> @@ -2904,24 +2881,20 @@ static int
> vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
>  }
> 
>  static struct dma_fence *ops_execute(struct xe_vm *vm,
> -				     struct xe_vma_ops *vops,
> -				     bool cleanup)
> +				     struct xe_vma_ops *vops)
>  {
>  	struct xe_vma_op *op, *next;
>  	struct dma_fence *fence = NULL;
> 
>  	list_for_each_entry_safe(op, next, &vops->list, link) {
> -		if (!IS_ERR(fence)) {
> -			dma_fence_put(fence);
> -			fence = xe_vma_op_execute(vm, op);
> -		}
> +		dma_fence_put(fence);
> +		fence = xe_vma_op_execute(vm, op);
>  		if (IS_ERR(fence)) {
>  			drm_warn(&vm->xe->drm, "VM op(%d) failed
> with %ld",
>  				 op->base.op, PTR_ERR(fence));
>  			fence = ERR_PTR(-ENOSPC);
> +			break;
>  		}
> -		if (cleanup)
> -			xe_vma_op_cleanup(vm, op);
>  	}
> 
>  	return fence;
> @@ -2944,7 +2917,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm
> *vm,
>  		if (err)
>  			goto unlock;
> 
> -		fence = ops_execute(vm, vops, true);
> +		fence = ops_execute(vm, vops);
>  		if (IS_ERR(fence)) {
>  			err = PTR_ERR(fence);
>  			/* FIXME: Killing VM rather than proper error
> handling */
> @@ -3305,30 +3278,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> void *data, struct drm_file *file)
>  		goto unwind_ops;
>  	}
> 
> -	xe_vm_get(vm);
> -	if (q)
> -		xe_exec_queue_get(q);
> -
>  	err = vm_bind_ioctl_ops_execute(vm, &vops);
> 
> -	up_write(&vm->lock);
> -
> -	if (q)
> -		xe_exec_queue_put(q);
> -	xe_vm_put(vm);
> -
> -	for (i = 0; bos && i < args->num_binds; ++i)
> -		xe_bo_put(bos[i]);
> -
> -	kvfree(bos);
> -	kvfree(ops);
> -	if (args->num_binds > 1)
> -		kvfree(bind_ops);
> -
> -	return err;
> -
>  unwind_ops:
> -	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> +	if (err && err != -ENODATA)
> +		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> +	for (i = args->num_binds - 1; i >= 0; --i)
> +		if (ops[i])
> +			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
>  free_syncs:
>  	if (err == -ENODATA)
>  		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> b/drivers/gpu/drm/xe/xe_vm_types.h
> index 466b6c62d1f9..149ab892967e 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -330,11 +330,6 @@ enum xe_vma_op_flags {
>  struct xe_vma_op {
>  	/** @base: GPUVA base operation */
>  	struct drm_gpuva_op base;
> -	/**
> -	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
> -	 * operations is processed
> -	 */
> -	struct drm_gpuva_ops *ops;
>  	/** @q: exec queue for this operation */
>  	struct xe_exec_queue *q;
>  	/**
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds
  2024-04-10  5:40 ` [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds Matthew Brost
@ 2024-04-19 14:22   ` Zeng, Oak
  2024-04-19 19:33     ` Matthew Brost
  0 siblings, 1 reply; 40+ messages in thread
From: Zeng, Oak @ 2024-04-19 14:22 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe; +Cc: Brost, Matthew



> -----Original Message-----
> From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> Matthew Brost
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>
> Subject: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault
> rebinds
> 
> All page tables updates are moving to a xe_vma_ops interface to
> implement 1 job per VM bind IOCTL.

Can you explain why using xe_vma_ops interface is necessary even to bind one vma? I understand it make sense to use this interface to bind multiple vmas. See also below


 Add xe_vma_rebind function which is
> implemented using xe_vma_ops interface. Use xe_vma_rebind in page
> faults
> for rebinds.
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_gt_pagefault.c | 16 ++++----
>  drivers/gpu/drm/xe/xe_vm.c           | 57 +++++++++++++++++++++++-----
>  drivers/gpu/drm/xe/xe_vm.h           |  2 +
>  drivers/gpu/drm/xe/xe_vm_types.h     |  2 +
>  4 files changed, 58 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index fa9e9853c53b..040dd142c49c 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -19,7 +19,6 @@
>  #include "xe_guc.h"
>  #include "xe_guc_ct.h"
>  #include "xe_migrate.h"
> -#include "xe_pt.h"
>  #include "xe_trace.h"
>  #include "xe_vm.h"
> 
> @@ -204,15 +203,14 @@ static int handle_pagefault(struct xe_gt *gt, struct
> pagefault *pf)
>  		drm_exec_retry_on_contention(&exec);
>  		if (ret)
>  			goto unlock_dma_resv;
> -	}
> 
> -	/* Bind VMA only to the GT that has faulted */
> -	trace_xe_vma_pf_bind(vma);
> -	fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile),
> NULL, 0,
> -				 vma->tile_present & BIT(tile->id));
> -	if (IS_ERR(fence)) {
> -		ret = PTR_ERR(fence);
> -		goto unlock_dma_resv;
> +		/* Bind VMA only to the GT that has faulted */
> +		trace_xe_vma_pf_bind(vma);
> +		fence = xe_vma_rebind(vm, vma, BIT(tile->id));
> +		if (IS_ERR(fence)) {
> +			ret = PTR_ERR(fence);
> +			goto unlock_dma_resv;
> +		}
>  	}
> 
>  	/*
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 8f5b24c8f6cd..54a69fbfbb00 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -815,6 +815,7 @@ static void xe_vm_populate_rebind(struct
> xe_vma_op *op, struct xe_vma *vma,
>  				  u8 tile_mask)
>  {
>  	INIT_LIST_HEAD(&op->link);
> +	op->tile_mask = tile_mask;
>  	op->base.op = DRM_GPUVA_OP_MAP;
>  	op->base.map.va.addr = vma->gpuva.va.addr;
>  	op->base.map.va.range = vma->gpuva.va.range;
> @@ -893,6 +894,33 @@ int xe_vm_rebind(struct xe_vm *vm, bool
> rebind_worker)
>  	return err;
>  }
> 
> +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
> u8 tile_mask)


I try to figure out why this function is necessary. We are only binding one vma here. Why we need to create xe_vma_ops list? We are only adding one vma to this list....

Oak

> +{
> +	struct dma_fence *fence = NULL;
> +	struct xe_vma_ops vops;
> +	struct xe_vma_op *op, *next_op;
> +	int err;
> +
> +	lockdep_assert_held(&vm->lock);
> +	xe_vm_assert_held(vm);
> +	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
> +
> +	xe_vma_ops_init(&vops);
> +
> +	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
> +	if (err)
> +		return ERR_PTR(err);
> +
> +	fence = ops_execute(vm, &vops);
> +
> +	list_for_each_entry_safe(op, next_op, &vops.list, link) {
> +		list_del(&op->link);
> +		kfree(op);
> +	}
> +
> +	return fence;
> +}
> +
>  static void xe_vma_free(struct xe_vma *vma)
>  {
>  	if (xe_vma_is_userptr(vma))
> @@ -1796,7 +1824,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
>  static struct dma_fence *
>  xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
>  	       struct xe_sync_entry *syncs, u32 num_syncs,
> -	       bool first_op, bool last_op)
> +	       u8 tile_mask, bool first_op, bool last_op)
>  {
>  	struct xe_tile *tile;
>  	struct dma_fence *fence;
> @@ -1804,7 +1832,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
>  	struct dma_fence_array *cf = NULL;
>  	struct xe_vm *vm = xe_vma_vm(vma);
>  	int cur_fence = 0, i;
> -	int number_tiles = hweight8(vma->tile_mask);
> +	int number_tiles = hweight8(tile_mask);
>  	int err;
>  	u8 id;
> 
> @@ -1818,7 +1846,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
>  	}
> 
>  	for_each_tile(tile, vm->xe, id) {
> -		if (!(vma->tile_mask & BIT(id)))
> +		if (!(tile_mask & BIT(id)))
>  			goto next;
> 
>  		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
> @@ -1886,7 +1914,7 @@ find_ufence_get(struct xe_sync_entry *syncs, u32
> num_syncs)
>  static struct dma_fence *
>  xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> xe_exec_queue *q,
>  	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
> -	   bool immediate, bool first_op, bool last_op)
> +	   u8 tile_mask, bool immediate, bool first_op, bool last_op)
>  {
>  	struct dma_fence *fence;
>  	struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, q);
> @@ -1902,8 +1930,8 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma
> *vma, struct xe_exec_queue *q,
>  	vma->ufence = ufence ?: vma->ufence;
> 
>  	if (immediate) {
> -		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> first_op,
> -				       last_op);
> +		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> tile_mask,
> +				       first_op, last_op);
>  		if (IS_ERR(fence))
>  			return fence;
>  	} else {
> @@ -2095,7 +2123,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct xe_vma
> *vma,
> 
>  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> {
>  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> num_syncs,
> -				  true, first_op, last_op);
> +				  vma->tile_mask, true, first_op, last_op);
>  	} else {
>  		struct dma_fence *fence =
>  			xe_exec_queue_last_fence_get(wait_exec_queue,
> vm);
> @@ -2408,10 +2436,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> *vm, struct xe_exec_queue *q,
>  	struct xe_device *xe = vm->xe;
>  	struct xe_vma_op *last_op = NULL;
>  	struct drm_gpuva_op *__op;
> +	struct xe_tile *tile;
> +	u8 id, tile_mask = 0;
>  	int err = 0;
> 
>  	lockdep_assert_held_write(&vm->lock);
> 
> +	for_each_tile(tile, vm->xe, id)
> +		tile_mask |= 0x1 << id;
> +
>  	drm_gpuva_for_each_op(__op, ops) {
>  		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
>  		struct xe_vma *vma;
> @@ -2428,6 +2461,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> *vm, struct xe_exec_queue *q,
>  		}
> 
>  		op->q = q;
> +		op->tile_mask = tile_mask;
> 
>  		switch (op->base.op) {
>  		case DRM_GPUVA_OP_MAP:
> @@ -2574,6 +2608,7 @@ static struct dma_fence *op_execute(struct xe_vm
> *vm, struct xe_vma *vma,
>  		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
>  				   op->syncs, op->num_syncs,
>  				   op->map.immediate
> || !xe_vm_in_fault_mode(vm),
> +				   op->tile_mask,
>  				   op->flags & XE_VMA_OP_FIRST,
>  				   op->flags & XE_VMA_OP_LAST);
>  		break;
> @@ -2600,7 +2635,9 @@ static struct dma_fence *op_execute(struct xe_vm
> *vm, struct xe_vma *vma,
>  			dma_fence_put(fence);
>  			fence = xe_vm_bind(vm, op->remap.prev, op->q,
>  					   xe_vma_bo(op->remap.prev), op-
> >syncs,
> -					   op->num_syncs, true, false,
> +					   op->num_syncs,
> +					   op->remap.prev->tile_mask, true,
> +					   false,
>  					   op->flags & XE_VMA_OP_LAST
> && !next);
>  			op->remap.prev->gpuva.flags &=
> ~XE_VMA_LAST_REBIND;
>  			if (IS_ERR(fence))
> @@ -2614,8 +2651,8 @@ static struct dma_fence *op_execute(struct xe_vm
> *vm, struct xe_vma *vma,
>  			fence = xe_vm_bind(vm, op->remap.next, op->q,
>  					   xe_vma_bo(op->remap.next),
>  					   op->syncs, op->num_syncs,
> -					   true, false,
> -					   op->flags & XE_VMA_OP_LAST);
> +					   op->remap.next->tile_mask, true,
> +					   false, op->flags &
> XE_VMA_OP_LAST);
>  			op->remap.next->gpuva.flags &=
> ~XE_VMA_LAST_REBIND;
>  			if (IS_ERR(fence))
>  				break;
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 306cd0934a19..204a4ff63f88 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -208,6 +208,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm
> *vm);
>  int xe_vm_userptr_check_repin(struct xe_vm *vm);
> 
>  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
> +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
> +				u8 tile_mask);
> 
>  int xe_vm_invalidate_vma(struct xe_vma *vma);
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> b/drivers/gpu/drm/xe/xe_vm_types.h
> index 149ab892967e..e9cd6da6263a 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -343,6 +343,8 @@ struct xe_vma_op {
>  	struct list_head link;
>  	/** @flags: operation flags */
>  	enum xe_vma_op_flags flags;
> +	/** @tile_mask: Tile mask for operation */
> +	u8 tile_mask;
> 
>  	union {
>  		/** @map: VMA map operation specific data */
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 08/13] drm/xe: Add some members to xe_vma_ops
  2024-04-10  5:40 ` [PATCH 08/13] drm/xe: Add some members to xe_vma_ops Matthew Brost
@ 2024-04-19 14:24   ` Zeng, Oak
  0 siblings, 0 replies; 40+ messages in thread
From: Zeng, Oak @ 2024-04-19 14:24 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe

This patch is:

Reviewed-by: Oak Zeng <oak.zeng@intel.com>

> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> <oak.zeng@intel.com>
> Subject: [PATCH 08/13] drm/xe: Add some members to xe_vma_ops
> 
> This will help with moving to single jobs for many bind operations.
> 
> v2:
>  - Rebase
> 
> Cc: Oak Zeng <oak.zeng@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c       | 19 ++++++++++++++-----
>  drivers/gpu/drm/xe/xe_vm_types.h |  8 ++++++++
>  2 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 54a69fbfbb00..09871538484b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -844,7 +844,9 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops
> *vops, struct xe_vma *vma,
> 
>  static struct dma_fence *ops_execute(struct xe_vm *vm,
>  				     struct xe_vma_ops *vops);
> -static void xe_vma_ops_init(struct xe_vma_ops *vops);
> +static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
> +			    struct xe_exec_queue *q,
> +			    struct xe_sync_entry *syncs, u32 num_syncs);
> 
>  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>  {
> @@ -859,7 +861,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool
> rebind_worker)
>  	    list_empty(&vm->rebind_list))
>  		return 0;
> 
> -	xe_vma_ops_init(&vops);
> +	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
> 
>  	xe_vm_assert_held(vm);
>  	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind)
> {
> @@ -905,7 +907,7 @@ struct dma_fence *xe_vma_rebind(struct xe_vm
> *vm, struct xe_vma *vma, u8 tile_ma
>  	xe_vm_assert_held(vm);
>  	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
> 
> -	xe_vma_ops_init(&vops);
> +	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
> 
>  	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
>  	if (err)
> @@ -3115,9 +3117,16 @@ static int vm_bind_ioctl_signal_fences(struct
> xe_vm *vm,
>  	return err;
>  }
> 
> -static void xe_vma_ops_init(struct xe_vma_ops *vops)
> +static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
> +			    struct xe_exec_queue *q,
> +			    struct xe_sync_entry *syncs, u32 num_syncs)
>  {
> +	memset(vops, 0, sizeof(*vops));
>  	INIT_LIST_HEAD(&vops->list);
> +	vops->vm = vm;
> +	vops->q = q;
> +	vops->syncs = syncs;
> +	vops->num_syncs = num_syncs;
>  }
> 
>  int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file
> *file)
> @@ -3284,7 +3293,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void
> *data, struct drm_file *file)
>  		goto free_syncs;
>  	}
> 
> -	xe_vma_ops_init(&vops);
> +	xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
>  	for (i = 0; i < args->num_binds; ++i) {
>  		u64 range = bind_ops[i].range;
>  		u64 addr = bind_ops[i].addr;
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> b/drivers/gpu/drm/xe/xe_vm_types.h
> index e9cd6da6263a..ce1a63a5e3e7 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -360,6 +360,14 @@ struct xe_vma_op {
>  struct xe_vma_ops {
>  	/** @list: list of VMA operations */
>  	struct list_head list;
> +	/** @vm: VM */
> +	struct xe_vm *vm;
> +	/** @q: exec queue these operations */
> +	struct xe_exec_queue *q;
> +	/** @syncs: syncs these operation */
> +	struct xe_sync_entry *syncs;
> +	/** @num_syncs: number of syncs */
> +	u32 num_syncs;
>  };
> 
>  #endif
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 09/13] drm/xe: Add vm_bind_ioctl_ops_fini helper
  2024-04-10  5:40 ` [PATCH 09/13] drm/xe: Add vm_bind_ioctl_ops_fini helper Matthew Brost
@ 2024-04-19 14:51   ` Zeng, Oak
  0 siblings, 0 replies; 40+ messages in thread
From: Zeng, Oak @ 2024-04-19 14:51 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe

It is a nice clean up. Patch is:

Reviewed-by: Oak Zeng <oak.zeng@intel.com>

> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> <oak.zeng@intel.com>
> Subject: [PATCH 09/13] drm/xe: Add vm_bind_ioctl_ops_fini helper
> 
> Simplify VM bind code by signaling out-fences / destroying VMAs in a
> single location. Will help with transition single job for many bind ops.
> 
> v2:
>  - s/vm_bind_ioctl_ops_install_fences/vm_bind_ioctl_ops_fini (Oak)
>  - Set last fence in vm_bind_ioctl_ops_fini (Oak)
> 
> Cc: Oak Zeng <oak.zeng@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c | 62 +++++++++++++++-----------------------
>  1 file changed, 24 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 09871538484b..97384c77f662 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1748,7 +1748,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
>  	struct dma_fence *fence = NULL;
>  	struct dma_fence **fences = NULL;
>  	struct dma_fence_array *cf = NULL;
> -	int cur_fence = 0, i;
> +	int cur_fence = 0;
>  	int number_tiles = hweight8(vma->tile_present);
>  	int err;
>  	u8 id;
> @@ -1806,10 +1806,6 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
> 
>  	fence = cf ? &cf->base : !fence ?
>  		xe_exec_queue_last_fence_get(wait_exec_queue, vm) :
> fence;
> -	if (last_op) {
> -		for (i = 0; i < num_syncs; i++)
> -			xe_sync_entry_signal(&syncs[i], fence);
> -	}
> 
>  	return fence;
> 
> @@ -1833,7 +1829,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
>  	struct dma_fence **fences = NULL;
>  	struct dma_fence_array *cf = NULL;
>  	struct xe_vm *vm = xe_vma_vm(vma);
> -	int cur_fence = 0, i;
> +	int cur_fence = 0;
>  	int number_tiles = hweight8(tile_mask);
>  	int err;
>  	u8 id;
> @@ -1880,12 +1876,6 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
>  		}
>  	}
> 
> -	if (last_op) {
> -		for (i = 0; i < num_syncs; i++)
> -			xe_sync_entry_signal(&syncs[i],
> -					     cf ? &cf->base : fence);
> -	}
> -
>  	return cf ? &cf->base : fence;
> 
>  err_fences:
> @@ -1937,20 +1927,11 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma
> *vma, struct xe_exec_queue *q,
>  		if (IS_ERR(fence))
>  			return fence;
>  	} else {
> -		int i;
> -
>  		xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
> 
>  		fence = xe_exec_queue_last_fence_get(wait_exec_queue,
> vm);
> -		if (last_op) {
> -			for (i = 0; i < num_syncs; i++)
> -				xe_sync_entry_signal(&syncs[i], fence);
> -		}
>  	}
> 
> -	if (last_op)
> -		xe_exec_queue_last_fence_set(wait_exec_queue, vm,
> fence);
> -
>  	return fence;
>  }
> 
> @@ -1960,7 +1941,6 @@ xe_vm_unbind(struct xe_vm *vm, struct xe_vma
> *vma,
>  	     u32 num_syncs, bool first_op, bool last_op)
>  {
>  	struct dma_fence *fence;
> -	struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, q);
> 
>  	xe_vm_assert_held(vm);
>  	xe_bo_assert_held(xe_vma_bo(vma));
> @@ -1969,10 +1949,6 @@ xe_vm_unbind(struct xe_vm *vm, struct xe_vma
> *vma,
>  	if (IS_ERR(fence))
>  		return fence;
> 
> -	xe_vma_destroy(vma, fence);
> -	if (last_op)
> -		xe_exec_queue_last_fence_set(wait_exec_queue, vm,
> fence);
> -
>  	return fence;
>  }
> 
> @@ -2127,17 +2103,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct
> xe_vma *vma,
>  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> num_syncs,
>  				  vma->tile_mask, true, first_op, last_op);
>  	} else {
> -		struct dma_fence *fence =
> -			xe_exec_queue_last_fence_get(wait_exec_queue,
> vm);
> -		int i;
> -
> -		/* Nothing to do, signal fences now */
> -		if (last_op) {
> -			for (i = 0; i < num_syncs; i++)
> -				xe_sync_entry_signal(&syncs[i], fence);
> -		}
> -
> -		return fence;
> +		return xe_exec_queue_last_fence_get(wait_exec_queue,
> vm);
>  	}
>  }
> 
> @@ -2939,6 +2905,26 @@ static struct dma_fence *ops_execute(struct
> xe_vm *vm,
>  	return fence;
>  }
> 
> +static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops
> *vops,
> +				   struct dma_fence *fence)
> +{
> +	struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, vops->q);
> +	struct xe_vma_op *op;
> +	int i;
> +
> +	list_for_each_entry(op, &vops->list, link) {
> +		if (op->base.op == DRM_GPUVA_OP_UNMAP)
> +			xe_vma_destroy(gpuva_to_vma(op-
> >base.unmap.va), fence);
> +		else if (op->base.op == DRM_GPUVA_OP_REMAP)
> +			xe_vma_destroy(gpuva_to_vma(op-
> >base.remap.unmap->va),
> +				       fence);
> +	}
> +	for (i = 0; i < vops->num_syncs; i++)
> +		xe_sync_entry_signal(vops->syncs + i, fence);
> +	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
> +	dma_fence_put(fence);
> +}
> +
>  static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>  				     struct xe_vma_ops *vops)
>  {
> @@ -2963,7 +2949,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm
> *vm,
>  			xe_vm_kill(vm, false);
>  			goto unlock;
>  		} else {
> -			dma_fence_put(fence);
> +			vm_bind_ioctl_ops_fini(vm, vops, fence);
>  		}
>  	}
> 
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 10/13] drm/xe: Move ufence check to op_lock
  2024-04-10  5:40 ` [PATCH 10/13] drm/xe: Move ufence check to op_lock Matthew Brost
@ 2024-04-19 14:56   ` Zeng, Oak
  2024-04-19 19:34     ` Matthew Brost
  0 siblings, 1 reply; 40+ messages in thread
From: Zeng, Oak @ 2024-04-19 14:56 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe; +Cc: Brost, Matthew

There is a typo in the commit message. Other that patch looks good.

Reviewed-by: Oak Zeng <oak.zeng@intel.com>

> -----Original Message-----
> From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> Matthew Brost
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>
> Subject: [PATCH 10/13] drm/xe: Move ufence check to op_lock
> 
> Rather than checking for an unsignaled ufence ay

At

Oak


 unbind time, check for
> this during the op_lock function. This will help with the transition to
> job 1 per VM bind IOCTL.
> 
> v2:
>  - Rebase
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c | 33 +++++++++++++++++++++++----------
>  1 file changed, 23 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 97384c77f662..0319e70577fe 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1755,16 +1755,6 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> xe_exec_queue *q,
> 
>  	trace_xe_vma_unbind(vma);
> 
> -	if (vma->ufence) {
> -		struct xe_user_fence * const f = vma->ufence;
> -
> -		if (!xe_sync_ufence_get_status(f))
> -			return ERR_PTR(-EBUSY);
> -
> -		vma->ufence = NULL;
> -		xe_sync_ufence_put(f);
> -	}
> -
>  	if (number_tiles > 1) {
>  		fences = kmalloc_array(number_tiles, sizeof(*fences),
>  				       GFP_KERNEL);
> @@ -2819,6 +2809,21 @@ static int vma_lock_and_validate(struct drm_exec
> *exec, struct xe_vma *vma,
>  	return err;
>  }
> 
> +static int check_ufence(struct xe_vma *vma)
> +{
> +	if (vma->ufence) {
> +		struct xe_user_fence * const f = vma->ufence;
> +
> +		if (!xe_sync_ufence_get_status(f))
> +			return -EBUSY;
> +
> +		vma->ufence = NULL;
> +		xe_sync_ufence_put(f);
> +	}
> +
> +	return 0;
> +}
> +
>  static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
>  			    struct xe_vma_op *op)
>  {
> @@ -2830,6 +2835,10 @@ static int op_lock_and_prep(struct drm_exec
> *exec, struct xe_vm *vm,
>  					    !xe_vm_in_fault_mode(vm));
>  		break;
>  	case DRM_GPUVA_OP_REMAP:
> +		err = check_ufence(gpuva_to_vma(op->base.remap.unmap-
> >va));
> +		if (err)
> +			break;
> +
>  		err = vma_lock_and_validate(exec,
>  					    gpuva_to_vma(op-
> >base.remap.unmap->va),
>  					    false);
> @@ -2839,6 +2848,10 @@ static int op_lock_and_prep(struct drm_exec
> *exec, struct xe_vm *vm,
>  			err = vma_lock_and_validate(exec, op->remap.next,
> true);
>  		break;
>  	case DRM_GPUVA_OP_UNMAP:
> +		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
> +		if (err)
> +			break;
> +
>  		err = vma_lock_and_validate(exec,
>  					    gpuva_to_vma(op-
> >base.unmap.va),
>  					    false);
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini
  2024-04-10  5:40 ` [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini Matthew Brost
@ 2024-04-19 15:24   ` Zeng, Oak
  2024-04-19 19:45     ` Matthew Brost
  0 siblings, 1 reply; 40+ messages in thread
From: Zeng, Oak @ 2024-04-19 15:24 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe; +Cc: Brost, Matthew



> -----Original Message-----
> From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> Matthew Brost
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>
> Subject: [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini
> 
> Rather than adding a ufence to a VMA in the bind function, add the
> ufence to all VMAs in the IOCTL that require binds in
> vm_bind_ioctl_ops_install_fences.

This is a typo right? From the codes, it should be vm_bind_ioctl_ops_fini

I also want to make sure I understand here: so the ufence added to vma is *only* used to make sure last vma bind has been completed upon vma unbind time. So even though it is more natural to set ufence at bind function, it is safe to set it after all operations are submitted (vm_bind_ioctl_ops_fini func). No vm_bind ioctl (and the vma unbind triggered by ioctl) can go through *before* the last vm bind ioctl's ops finish, right?

Oak



 This will help with the transition to
> job 1 per VM bind IOCTL.
> 
> v2:
>  - Rebase
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_sync.c | 15 ++++++++++++
>  drivers/gpu/drm/xe/xe_sync.h |  1 +
>  drivers/gpu/drm/xe/xe_vm.c   | 44 ++++++++++++++++++++++++++++++--
> ----
>  3 files changed, 53 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
> index 65f1f1628235..2883d9aca404 100644
> --- a/drivers/gpu/drm/xe/xe_sync.c
> +++ b/drivers/gpu/drm/xe/xe_sync.c
> @@ -338,6 +338,21 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync,
> int num_sync,
>  	return ERR_PTR(-ENOMEM);
>  }
> 
> +/**
> + * __xe_sync_ufence_get() - Get user fence from user fence
> + * @ufence: input user fence
> + *
> + * Get a user fence reference from user fence
> + *
> + * Return: xe_user_fence pointer with reference
> + */
> +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence
> *ufence)
> +{
> +	user_fence_get(ufence);
> +
> +	return ufence;
> +}
> +
>  /**
>   * xe_sync_ufence_get() - Get user fence from sync
>   * @sync: input sync
> diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
> index 3e03396af2c6..006dbf780793 100644
> --- a/drivers/gpu/drm/xe/xe_sync.h
> +++ b/drivers/gpu/drm/xe/xe_sync.h
> @@ -37,6 +37,7 @@ static inline bool xe_sync_is_ufence(struct
> xe_sync_entry *sync)
>  	return !!sync->ufence;
>  }
> 
> +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence
> *ufence);
>  struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync);
>  void xe_sync_ufence_put(struct xe_user_fence *ufence);
>  int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 0319e70577fe..1da68a03407b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1900,17 +1900,10 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma
> *vma, struct xe_exec_queue *q,
>  {
>  	struct dma_fence *fence;
>  	struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, q);
> -	struct xe_user_fence *ufence;
> 
>  	xe_vm_assert_held(vm);
>  	xe_bo_assert_held(bo);
> 
> -	ufence = find_ufence_get(syncs, num_syncs);
> -	if (vma->ufence && ufence)
> -		xe_sync_ufence_put(vma->ufence);
> -
> -	vma->ufence = ufence ?: vma->ufence;
> -
>  	if (immediate) {
>  		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> tile_mask,
>  				       first_op, last_op);
> @@ -2918,20 +2911,57 @@ static struct dma_fence *ops_execute(struct
> xe_vm *vm,
>  	return fence;
>  }
> 
> +static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence
> *ufence)
> +{
> +	if (vma->ufence)
> +		xe_sync_ufence_put(vma->ufence);
> +	vma->ufence = __xe_sync_ufence_get(ufence);
> +}
> +
> +static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
> +			  struct xe_user_fence *ufence)
> +{
> +	switch (op->base.op) {
> +	case DRM_GPUVA_OP_MAP:
> +		vma_add_ufence(op->map.vma, ufence);
> +		break;
> +	case DRM_GPUVA_OP_REMAP:
> +		if (op->remap.prev)
> +			vma_add_ufence(op->remap.prev, ufence);
> +		if (op->remap.next)
> +			vma_add_ufence(op->remap.next, ufence);
> +		break;
> +	case DRM_GPUVA_OP_UNMAP:
> +		break;
> +	case DRM_GPUVA_OP_PREFETCH:
> +		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va),
> ufence);
> +		break;
> +	default:
> +		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> +	}
> +}
> +
>  static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops
> *vops,
>  				   struct dma_fence *fence)
>  {
>  	struct xe_exec_queue *wait_exec_queue =
> to_wait_exec_queue(vm, vops->q);
> +	struct xe_user_fence *ufence;
>  	struct xe_vma_op *op;
>  	int i;
> 
> +	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
>  	list_for_each_entry(op, &vops->list, link) {
> +		if (ufence)
> +			op_add_ufence(vm, op, ufence);
> +
>  		if (op->base.op == DRM_GPUVA_OP_UNMAP)
>  			xe_vma_destroy(gpuva_to_vma(op-
> >base.unmap.va), fence);
>  		else if (op->base.op == DRM_GPUVA_OP_REMAP)
>  			xe_vma_destroy(gpuva_to_vma(op-
> >base.remap.unmap->va),
>  				       fence);
>  	}
> +	if (ufence)
> +		xe_sync_ufence_put(ufence);
>  	for (i = 0; i < vops->num_syncs; i++)
>  		xe_sync_entry_signal(vops->syncs + i, fence);
>  	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 12/13] drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this
  2024-04-10  5:40 ` [PATCH 12/13] drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this Matthew Brost
@ 2024-04-19 16:00   ` Zeng, Oak
  0 siblings, 0 replies; 40+ messages in thread
From: Zeng, Oak @ 2024-04-19 16:00 UTC (permalink / raw)
  To: Brost, Matthew, intel-xe; +Cc: Brost, Matthew

Reviewed-by: Oak Zeng <oak.zeng@intel.com>

> -----Original Message-----
> From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> Matthew Brost
> Sent: Wednesday, April 10, 2024 1:41 AM
> To: intel-xe@lists.freedesktop.org
> Cc: Brost, Matthew <matthew.brost@intel.com>
> Subject: [PATCH 12/13] drm/xe: Add xe_gt_tlb_invalidation_range and
> convert PT layer to use this
> 
> xe_gt_tlb_invalidation_range accepts a start and end address rather than
> a VMA. This will enable multiple VMAs to be invalidated in a single
> invalidation. Update the PT layer to use this new function.
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 59 +++++++++++++++------
>  drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h |  3 ++
>  drivers/gpu/drm/xe/xe_pt.c                  | 25 ++++++---
>  3 files changed, 65 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index 93df2d7969b3..65409f494f59 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -263,11 +263,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
>  }
> 
>  /**
> - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
> + * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
> + * address range
> + *
>   * @gt: graphics tile
>   * @fence: invalidation fence which will be signal on TLB invalidation
>   * completion, can be NULL
> - * @vma: VMA to invalidate
> + * @start: start address
> + * @end: end address
> + * @asid: address space id
>   *
>   * Issue a range based TLB invalidation if supported, if not fallback to a full
>   * TLB invalidation. Completion of TLB is asynchronous and caller can either
> use
> @@ -277,17 +281,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
>   * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on
> success,
>   * negative error code on error.
>   */
> -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> -			       struct xe_gt_tlb_invalidation_fence *fence,
> -			       struct xe_vma *vma)
> +int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> +				 struct xe_gt_tlb_invalidation_fence *fence,
> +				 u64 start, u64 end, u32 asid)
>  {
>  	struct xe_device *xe = gt_to_xe(gt);
>  #define MAX_TLB_INVALIDATION_LEN	7
>  	u32 action[MAX_TLB_INVALIDATION_LEN];
>  	int len = 0;
> 
> -	xe_gt_assert(gt, vma);
> -
>  	/* Execlists not supported */
>  	if (gt_to_xe(gt)->info.force_execlist) {
>  		if (fence)
> @@ -301,8 +303,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
>  	if (!xe->info.has_range_tlb_invalidation) {
>  		action[len++] =
> MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
>  	} else {
> -		u64 start = xe_vma_start(vma);
> -		u64 length = xe_vma_size(vma);
> +		u64 orig_start = start;
> +		u64 length = end - start;
>  		u64 align, end;
> 
>  		if (length < SZ_4K)
> @@ -315,12 +317,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
>  		 * address mask covering the required range.
>  		 */
>  		align = roundup_pow_of_two(length);
> -		start = ALIGN_DOWN(xe_vma_start(vma), align);
> -		end = ALIGN(xe_vma_end(vma), align);
> +		start = ALIGN_DOWN(start, align);
> +		end = ALIGN(end, align);
>  		length = align;
>  		while (start + length < end) {
>  			length <<= 1;
> -			start = ALIGN_DOWN(xe_vma_start(vma), length);
> +			start = ALIGN_DOWN(orig_start, length);
>  		}
> 
>  		/*
> @@ -329,16 +331,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
>  		 */
>  		if (length >= SZ_2M) {
>  			length = max_t(u64, SZ_16M, length);
> -			start = ALIGN_DOWN(xe_vma_start(vma), length);
> +			start = ALIGN_DOWN(orig_start, length);
>  		}
> 
>  		xe_gt_assert(gt, length >= SZ_4K);
>  		xe_gt_assert(gt, is_power_of_2(length));
> -		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
> ilog2(SZ_2M) + 1)));
> +		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
> +						    ilog2(SZ_2M) + 1)));
>  		xe_gt_assert(gt, IS_ALIGNED(start, length));
> 
>  		action[len++] =
> MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
> -		action[len++] = xe_vma_vm(vma)->usm.asid;
> +		action[len++] = asid;
>  		action[len++] = lower_32_bits(start);
>  		action[len++] = upper_32_bits(start);
>  		action[len++] = ilog2(length) - ilog2(SZ_4K);
> @@ -349,6 +352,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
>  	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
>  }
> 
> +/**
> + * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a
> VMA
> + * @gt: graphics tile
> + * @fence: invalidation fence which will be signal on TLB invalidation
> + * completion, can be NULL
> + * @vma: VMA to invalidate
> + *
> + * Issue a range based TLB invalidation if supported, if not fallback to a full
> + * TLB invalidation. Completion of TLB is asynchronous and caller can either
> use
> + * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
> + * completion.
> + *
> + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on
> success,
> + * negative error code on error.
> + */
> +int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
> +			       struct xe_gt_tlb_invalidation_fence *fence,
> +			       struct xe_vma *vma)
> +{
> +	xe_gt_assert(gt, vma);
> +
> +	return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
> +					    xe_vma_end(vma),
> +					    xe_vma_vm(vma)->usm.asid);
> +}
> +
>  /**
>   * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
>   * @gt: graphics tile
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> index fbb743d80d2c..bf3bebd9f985 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> @@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
>  int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
>  			       struct xe_gt_tlb_invalidation_fence *fence,
>  			       struct xe_vma *vma);
> +int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
> +				 struct xe_gt_tlb_invalidation_fence *fence,
> +				 u64 start, u64 end, u32 asid);
>  int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
>  int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg,
> u32 len);
> 
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 5b7930f46cf3..8d3765d3351e 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops
> userptr_bind_ops = {
>  struct invalidation_fence {
>  	struct xe_gt_tlb_invalidation_fence base;
>  	struct xe_gt *gt;
> -	struct xe_vma *vma;
>  	struct dma_fence *fence;
>  	struct dma_fence_cb cb;
>  	struct work_struct work;
> +	u64 start;
> +	u64 end;
> +	u32 asid;
>  };
> 
>  static const char *
> @@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct
> work_struct *w)
>  		container_of(w, struct invalidation_fence, work);
> 
>  	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
> -	xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence-
> >vma);
> +	xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence-
> >start,
> +				     ifence->end, ifence->asid);
>  }
> 
>  static int invalidation_fence_init(struct xe_gt *gt,
>  				   struct invalidation_fence *ifence,
>  				   struct dma_fence *fence,
> -				   struct xe_vma *vma)
> +				   u64 start, u64 end, u32 asid)
>  {
>  	int ret;
> 
> @@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
>  	dma_fence_get(&ifence->base.base);	/* Ref for caller */
>  	ifence->fence = fence;
>  	ifence->gt = gt;
> -	ifence->vma = vma;
> +	ifence->start = start;
> +	ifence->end = end;
> +	ifence->asid = asid;
> 
>  	INIT_WORK(&ifence->work, invalidation_fence_work_func);
>  	ret = dma_fence_add_callback(fence, &ifence->cb,
> invalidation_fence_cb);
> @@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct
> xe_vma *vma, struct xe_exec_queue
> 
>  		/* TLB invalidation must be done before signaling rebind */
>  		if (ifence) {
> -			int err = invalidation_fence_init(tile->primary_gt,
> ifence, fence,
> -							  vma);
> +			int err = invalidation_fence_init(tile->primary_gt,
> +							  ifence, fence,
> +							  xe_vma_start(vma),
> +							  xe_vma_end(vma),
> +							  xe_vma_vm(vma)-
> >usm.asid);
>  			if (err) {
>  				dma_fence_put(fence);
>  				kfree(ifence);
> @@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct
> xe_vma *vma, struct xe_exec_queu
>  			dma_fence_wait(fence, false);
> 
>  		/* TLB invalidation must be done before signaling unbind */
> -		err = invalidation_fence_init(tile->primary_gt, ifence, fence,
> vma);
> +		err = invalidation_fence_init(tile->primary_gt, ifence, fence,
> +					      xe_vma_start(vma),
> +					      xe_vma_end(vma),
> +					      xe_vma_vm(vma)->usm.asid);
>  		if (err) {
>  			dma_fence_put(fence);
>  			kfree(ifence);
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and cleanup
  2024-04-19  4:19   ` Zeng, Oak
@ 2024-04-19 19:16     ` Matthew Brost
  2024-04-23  3:22       ` Zeng, Oak
  0 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-19 19:16 UTC (permalink / raw)
  To: Zeng, Oak; +Cc: intel-xe

On Thu, Apr 18, 2024 at 10:19:04PM -0600, Zeng, Oak wrote:
> It is a nice clean up. See one question inline
> 
> > -----Original Message-----
> > From: Brost, Matthew <matthew.brost@intel.com>
> > Sent: Wednesday, April 10, 2024 1:41 AM
> > To: intel-xe@lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> > <oak.zeng@intel.com>
> > Subject: [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and
> > cleanup
> > 
> > Clean up everything in VM bind IOCTL in 1 path for both errors and
> > non-errors. Also move VM bind IOCTL cleanup from ops (also used by
> > non-IOCTL binds) to the VM bind IOCTL.
> > 
> > v2:
> >  - Break ops_execute on error (Oak)
> > 
> > Cc: Oak Zeng <oak.zeng@intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_vm.c       | 67 ++++++--------------------------
> >  drivers/gpu/drm/xe/xe_vm_types.h |  5 ---
> >  2 files changed, 12 insertions(+), 60 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 9d82396cf5d5..8f5b24c8f6cd 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -842,8 +842,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops
> > *vops, struct xe_vma *vma,
> >  }
> > 
> >  static struct dma_fence *ops_execute(struct xe_vm *vm,
> > -				     struct xe_vma_ops *vops,
> > -				     bool cleanup);
> > +				     struct xe_vma_ops *vops);
> >  static void xe_vma_ops_init(struct xe_vma_ops *vops);
> > 
> >  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> > @@ -876,7 +875,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool
> > rebind_worker)
> >  			goto free_ops;
> >  	}
> > 
> > -	fence = ops_execute(vm, &vops, false);
> > +	fence = ops_execute(vm, &vops);
> >  	if (IS_ERR(fence)) {
> >  		err = PTR_ERR(fence);
> >  	} else {
> > @@ -2551,7 +2550,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> > *vm, struct xe_exec_queue *q,
> >  	if (!last_op)
> >  		return 0;
> > 
> > -	last_op->ops = ops;
> >  	if (last) {
> >  		last_op->flags |= XE_VMA_OP_LAST;
> >  		last_op->num_syncs = num_syncs;
> > @@ -2721,25 +2719,6 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> > xe_vma_op *op)
> >  	return fence;
> >  }
> > 
> > -static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
> > -{
> > -	bool last = op->flags & XE_VMA_OP_LAST;
> > -
> > -	if (last) {
> > -		while (op->num_syncs--)
> > -			xe_sync_entry_cleanup(&op->syncs[op-
> > >num_syncs]);
> 
> I understand all the other part of this function is not needed anymore. But I didn't figure out why sync_entry_cleanup is not needed? You still have syncs, do you? They are allocated in bind_ioctl function and seems you didn't touch it in this patch....Can you explain?
> 

It is called in the main IOCTL code (xe_vm_bind_ioctl) now.

See below.

> Oak
> 
> 
> > -		kfree(op->syncs);
> > -		if (op->q)
> > -			xe_exec_queue_put(op->q);
> > -	}
> > -	if (!list_empty(&op->link))
> > -		list_del(&op->link);
> > -	if (op->ops)
> > -		drm_gpuva_ops_free(&vm->gpuvm, op->ops);
> > -	if (last)
> > -		xe_vm_put(vm);
> > -}
> > -
> >  static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
> >  			     bool post_commit, bool prev_post_commit,
> >  			     bool next_post_commit)
> > @@ -2816,8 +2795,6 @@ static void vm_bind_ioctl_ops_unwind(struct
> > xe_vm *vm,
> >  					 op->flags &
> > XE_VMA_OP_PREV_COMMITTED,
> >  					 op->flags &
> > XE_VMA_OP_NEXT_COMMITTED);
> >  		}
> > -
> > -		drm_gpuva_ops_free(&vm->gpuvm, __ops);
> >  	}
> >  }
> > 
> > @@ -2904,24 +2881,20 @@ static int
> > vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
> >  }
> > 
> >  static struct dma_fence *ops_execute(struct xe_vm *vm,
> > -				     struct xe_vma_ops *vops,
> > -				     bool cleanup)
> > +				     struct xe_vma_ops *vops)
> >  {
> >  	struct xe_vma_op *op, *next;
> >  	struct dma_fence *fence = NULL;
> > 
> >  	list_for_each_entry_safe(op, next, &vops->list, link) {
> > -		if (!IS_ERR(fence)) {
> > -			dma_fence_put(fence);
> > -			fence = xe_vma_op_execute(vm, op);
> > -		}
> > +		dma_fence_put(fence);
> > +		fence = xe_vma_op_execute(vm, op);
> >  		if (IS_ERR(fence)) {
> >  			drm_warn(&vm->xe->drm, "VM op(%d) failed
> > with %ld",
> >  				 op->base.op, PTR_ERR(fence));
> >  			fence = ERR_PTR(-ENOSPC);
> > +			break;
> >  		}
> > -		if (cleanup)
> > -			xe_vma_op_cleanup(vm, op);
> >  	}
> > 
> >  	return fence;
> > @@ -2944,7 +2917,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm
> > *vm,
> >  		if (err)
> >  			goto unlock;
> > 
> > -		fence = ops_execute(vm, vops, true);
> > +		fence = ops_execute(vm, vops);
> >  		if (IS_ERR(fence)) {
> >  			err = PTR_ERR(fence);
> >  			/* FIXME: Killing VM rather than proper error
> > handling */
> > @@ -3305,30 +3278,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> > void *data, struct drm_file *file)
> >  		goto unwind_ops;
> >  	}
> > 
> > -	xe_vm_get(vm);
> > -	if (q)
> > -		xe_exec_queue_get(q);
> > -
> >  	err = vm_bind_ioctl_ops_execute(vm, &vops);
> > 
> > -	up_write(&vm->lock);
> > -
> > -	if (q)
> > -		xe_exec_queue_put(q);
> > -	xe_vm_put(vm);
> > -
> > -	for (i = 0; bos && i < args->num_binds; ++i)
> > -		xe_bo_put(bos[i]);
> > -
> > -	kvfree(bos);
> > -	kvfree(ops);
> > -	if (args->num_binds > 1)
> > -		kvfree(bind_ops);
> > -
> > -	return err;
> > -

We now fall throuh to the cleanup in both success and error paths...

> >  unwind_ops:
> > -	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> > +	if (err && err != -ENODATA)
> > +		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> > +	for (i = args->num_binds - 1; i >= 0; --i)
> > +		if (ops[i])
> > +			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
> >  free_syncs:
> >  	if (err == -ENODATA)
> >  		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);

The next few lines of code call xe_sync_entry_cleanup.

Matt

> > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > b/drivers/gpu/drm/xe/xe_vm_types.h
> > index 466b6c62d1f9..149ab892967e 100644
> > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > @@ -330,11 +330,6 @@ enum xe_vma_op_flags {
> >  struct xe_vma_op {
> >  	/** @base: GPUVA base operation */
> >  	struct drm_gpuva_op base;
> > -	/**
> > -	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
> > -	 * operations is processed
> > -	 */
> > -	struct drm_gpuva_ops *ops;
> >  	/** @q: exec queue for this operation */
> >  	struct xe_exec_queue *q;
> >  	/**
> > --
> > 2.34.1
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds
  2024-04-19 14:22   ` Zeng, Oak
@ 2024-04-19 19:33     ` Matthew Brost
  2024-04-23  3:27       ` Zeng, Oak
  0 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-19 19:33 UTC (permalink / raw)
  To: Zeng, Oak; +Cc: intel-xe

On Fri, Apr 19, 2024 at 08:22:29AM -0600, Zeng, Oak wrote:
> 
> 
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> > Matthew Brost
> > Sent: Wednesday, April 10, 2024 1:41 AM
> > To: intel-xe@lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost@intel.com>
> > Subject: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault
> > rebinds
> > 
> > All page tables updates are moving to a xe_vma_ops interface to
> > implement 1 job per VM bind IOCTL.
> 
> Can you explain why using xe_vma_ops interface is necessary even to bind one vma? I understand it make sense to use this interface to bind multiple vmas. See also below
> 

Essentially once we switch to 1 bind per IOCTL [1] xe_vma_ops is passed
around throughout all the layers. The xe_vma_ops list a single atomic
unit for updating the GPUVA state, internal PT, and GPU page tables. If
at point something fails, xe_vma_ops can be unwound restoring all the
original state.

i.e. __xe_pt_bind_vma is will be deleted and replaces with a function
that accepts a xe_vma_ops list, ops_execute() is the correct place to
hook into the software pipeline as we already have the locks and only
internal PT and GPU page tables need to be updated.

[1] https://patchwork.freedesktop.org/patch/582024/?series=125608&rev=5

> 
>  Add xe_vma_rebind function which is
> > implemented using xe_vma_ops interface. Use xe_vma_rebind in page
> > faults
> > for rebinds.
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_gt_pagefault.c | 16 ++++----
> >  drivers/gpu/drm/xe/xe_vm.c           | 57 +++++++++++++++++++++++-----
> >  drivers/gpu/drm/xe/xe_vm.h           |  2 +
> >  drivers/gpu/drm/xe/xe_vm_types.h     |  2 +
> >  4 files changed, 58 insertions(+), 19 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > index fa9e9853c53b..040dd142c49c 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > @@ -19,7 +19,6 @@
> >  #include "xe_guc.h"
> >  #include "xe_guc_ct.h"
> >  #include "xe_migrate.h"
> > -#include "xe_pt.h"
> >  #include "xe_trace.h"
> >  #include "xe_vm.h"
> > 
> > @@ -204,15 +203,14 @@ static int handle_pagefault(struct xe_gt *gt, struct
> > pagefault *pf)
> >  		drm_exec_retry_on_contention(&exec);
> >  		if (ret)
> >  			goto unlock_dma_resv;
> > -	}
> > 
> > -	/* Bind VMA only to the GT that has faulted */
> > -	trace_xe_vma_pf_bind(vma);
> > -	fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile),
> > NULL, 0,
> > -				 vma->tile_present & BIT(tile->id));
> > -	if (IS_ERR(fence)) {
> > -		ret = PTR_ERR(fence);
> > -		goto unlock_dma_resv;
> > +		/* Bind VMA only to the GT that has faulted */
> > +		trace_xe_vma_pf_bind(vma);
> > +		fence = xe_vma_rebind(vm, vma, BIT(tile->id));
> > +		if (IS_ERR(fence)) {
> > +			ret = PTR_ERR(fence);
> > +			goto unlock_dma_resv;
> > +		}
> >  	}
> > 
> >  	/*
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 8f5b24c8f6cd..54a69fbfbb00 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -815,6 +815,7 @@ static void xe_vm_populate_rebind(struct
> > xe_vma_op *op, struct xe_vma *vma,
> >  				  u8 tile_mask)
> >  {
> >  	INIT_LIST_HEAD(&op->link);
> > +	op->tile_mask = tile_mask;
> >  	op->base.op = DRM_GPUVA_OP_MAP;
> >  	op->base.map.va.addr = vma->gpuva.va.addr;
> >  	op->base.map.va.range = vma->gpuva.va.range;
> > @@ -893,6 +894,33 @@ int xe_vm_rebind(struct xe_vm *vm, bool
> > rebind_worker)
> >  	return err;
> >  }
> > 
> > +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
> > u8 tile_mask)
> 
> 
> I try to figure out why this function is necessary. We are only binding one vma here. Why we need to create xe_vma_ops list? We are only adding one vma to this list....
> 

See above ability to directly modify page tables without a xe_vma_ops
list will be removed.

Matt

> Oak
> 
> > +{
> > +	struct dma_fence *fence = NULL;
> > +	struct xe_vma_ops vops;
> > +	struct xe_vma_op *op, *next_op;
> > +	int err;
> > +
> > +	lockdep_assert_held(&vm->lock);
> > +	xe_vm_assert_held(vm);
> > +	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
> > +
> > +	xe_vma_ops_init(&vops);
> > +
> > +	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
> > +	if (err)
> > +		return ERR_PTR(err);
> > +
> > +	fence = ops_execute(vm, &vops);
> > +
> > +	list_for_each_entry_safe(op, next_op, &vops.list, link) {
> > +		list_del(&op->link);
> > +		kfree(op);
> > +	}
> > +
> > +	return fence;
> > +}
> > +
> >  static void xe_vma_free(struct xe_vma *vma)
> >  {
> >  	if (xe_vma_is_userptr(vma))
> > @@ -1796,7 +1824,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> > xe_exec_queue *q,
> >  static struct dma_fence *
> >  xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> >  	       struct xe_sync_entry *syncs, u32 num_syncs,
> > -	       bool first_op, bool last_op)
> > +	       u8 tile_mask, bool first_op, bool last_op)
> >  {
> >  	struct xe_tile *tile;
> >  	struct dma_fence *fence;
> > @@ -1804,7 +1832,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> > xe_exec_queue *q,
> >  	struct dma_fence_array *cf = NULL;
> >  	struct xe_vm *vm = xe_vma_vm(vma);
> >  	int cur_fence = 0, i;
> > -	int number_tiles = hweight8(vma->tile_mask);
> > +	int number_tiles = hweight8(tile_mask);
> >  	int err;
> >  	u8 id;
> > 
> > @@ -1818,7 +1846,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> > xe_exec_queue *q,
> >  	}
> > 
> >  	for_each_tile(tile, vm->xe, id) {
> > -		if (!(vma->tile_mask & BIT(id)))
> > +		if (!(tile_mask & BIT(id)))
> >  			goto next;
> > 
> >  		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
> > @@ -1886,7 +1914,7 @@ find_ufence_get(struct xe_sync_entry *syncs, u32
> > num_syncs)
> >  static struct dma_fence *
> >  xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> > xe_exec_queue *q,
> >  	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
> > -	   bool immediate, bool first_op, bool last_op)
> > +	   u8 tile_mask, bool immediate, bool first_op, bool last_op)
> >  {
> >  	struct dma_fence *fence;
> >  	struct xe_exec_queue *wait_exec_queue =
> > to_wait_exec_queue(vm, q);
> > @@ -1902,8 +1930,8 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma
> > *vma, struct xe_exec_queue *q,
> >  	vma->ufence = ufence ?: vma->ufence;
> > 
> >  	if (immediate) {
> > -		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > first_op,
> > -				       last_op);
> > +		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > tile_mask,
> > +				       first_op, last_op);
> >  		if (IS_ERR(fence))
> >  			return fence;
> >  	} else {
> > @@ -2095,7 +2123,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct xe_vma
> > *vma,
> > 
> >  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> > {
> >  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> > num_syncs,
> > -				  true, first_op, last_op);
> > +				  vma->tile_mask, true, first_op, last_op);
> >  	} else {
> >  		struct dma_fence *fence =
> >  			xe_exec_queue_last_fence_get(wait_exec_queue,
> > vm);
> > @@ -2408,10 +2436,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> > *vm, struct xe_exec_queue *q,
> >  	struct xe_device *xe = vm->xe;
> >  	struct xe_vma_op *last_op = NULL;
> >  	struct drm_gpuva_op *__op;
> > +	struct xe_tile *tile;
> > +	u8 id, tile_mask = 0;
> >  	int err = 0;
> > 
> >  	lockdep_assert_held_write(&vm->lock);
> > 
> > +	for_each_tile(tile, vm->xe, id)
> > +		tile_mask |= 0x1 << id;
> > +
> >  	drm_gpuva_for_each_op(__op, ops) {
> >  		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
> >  		struct xe_vma *vma;
> > @@ -2428,6 +2461,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm
> > *vm, struct xe_exec_queue *q,
> >  		}
> > 
> >  		op->q = q;
> > +		op->tile_mask = tile_mask;
> > 
> >  		switch (op->base.op) {
> >  		case DRM_GPUVA_OP_MAP:
> > @@ -2574,6 +2608,7 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> >  		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> >  				   op->syncs, op->num_syncs,
> >  				   op->map.immediate
> > || !xe_vm_in_fault_mode(vm),
> > +				   op->tile_mask,
> >  				   op->flags & XE_VMA_OP_FIRST,
> >  				   op->flags & XE_VMA_OP_LAST);
> >  		break;
> > @@ -2600,7 +2635,9 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> >  			dma_fence_put(fence);
> >  			fence = xe_vm_bind(vm, op->remap.prev, op->q,
> >  					   xe_vma_bo(op->remap.prev), op-
> > >syncs,
> > -					   op->num_syncs, true, false,
> > +					   op->num_syncs,
> > +					   op->remap.prev->tile_mask, true,
> > +					   false,
> >  					   op->flags & XE_VMA_OP_LAST
> > && !next);
> >  			op->remap.prev->gpuva.flags &=
> > ~XE_VMA_LAST_REBIND;
> >  			if (IS_ERR(fence))
> > @@ -2614,8 +2651,8 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> >  			fence = xe_vm_bind(vm, op->remap.next, op->q,
> >  					   xe_vma_bo(op->remap.next),
> >  					   op->syncs, op->num_syncs,
> > -					   true, false,
> > -					   op->flags & XE_VMA_OP_LAST);
> > +					   op->remap.next->tile_mask, true,
> > +					   false, op->flags &
> > XE_VMA_OP_LAST);
> >  			op->remap.next->gpuva.flags &=
> > ~XE_VMA_LAST_REBIND;
> >  			if (IS_ERR(fence))
> >  				break;
> > diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> > index 306cd0934a19..204a4ff63f88 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.h
> > +++ b/drivers/gpu/drm/xe/xe_vm.h
> > @@ -208,6 +208,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm
> > *vm);
> >  int xe_vm_userptr_check_repin(struct xe_vm *vm);
> > 
> >  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
> > +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
> > +				u8 tile_mask);
> > 
> >  int xe_vm_invalidate_vma(struct xe_vma *vma);
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > b/drivers/gpu/drm/xe/xe_vm_types.h
> > index 149ab892967e..e9cd6da6263a 100644
> > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > @@ -343,6 +343,8 @@ struct xe_vma_op {
> >  	struct list_head link;
> >  	/** @flags: operation flags */
> >  	enum xe_vma_op_flags flags;
> > +	/** @tile_mask: Tile mask for operation */
> > +	u8 tile_mask;
> > 
> >  	union {
> >  		/** @map: VMA map operation specific data */
> > --
> > 2.34.1
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 10/13] drm/xe: Move ufence check to op_lock
  2024-04-19 14:56   ` Zeng, Oak
@ 2024-04-19 19:34     ` Matthew Brost
  0 siblings, 0 replies; 40+ messages in thread
From: Matthew Brost @ 2024-04-19 19:34 UTC (permalink / raw)
  To: Zeng, Oak; +Cc: intel-xe

On Fri, Apr 19, 2024 at 08:56:57AM -0600, Zeng, Oak wrote:
> There is a typo in the commit message. Other that patch looks good.
> 

Noticed that sending it. Will fix in next rev.

Matt

> Reviewed-by: Oak Zeng <oak.zeng@intel.com>
> 
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> > Matthew Brost
> > Sent: Wednesday, April 10, 2024 1:41 AM
> > To: intel-xe@lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost@intel.com>
> > Subject: [PATCH 10/13] drm/xe: Move ufence check to op_lock
> > 
> > Rather than checking for an unsignaled ufence ay
> 
> At
> 
> Oak
> 
> 
>  unbind time, check for
> > this during the op_lock function. This will help with the transition to
> > job 1 per VM bind IOCTL.
> > 
> > v2:
> >  - Rebase
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_vm.c | 33 +++++++++++++++++++++++----------
> >  1 file changed, 23 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 97384c77f662..0319e70577fe 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -1755,16 +1755,6 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct
> > xe_exec_queue *q,
> > 
> >  	trace_xe_vma_unbind(vma);
> > 
> > -	if (vma->ufence) {
> > -		struct xe_user_fence * const f = vma->ufence;
> > -
> > -		if (!xe_sync_ufence_get_status(f))
> > -			return ERR_PTR(-EBUSY);
> > -
> > -		vma->ufence = NULL;
> > -		xe_sync_ufence_put(f);
> > -	}
> > -
> >  	if (number_tiles > 1) {
> >  		fences = kmalloc_array(number_tiles, sizeof(*fences),
> >  				       GFP_KERNEL);
> > @@ -2819,6 +2809,21 @@ static int vma_lock_and_validate(struct drm_exec
> > *exec, struct xe_vma *vma,
> >  	return err;
> >  }
> > 
> > +static int check_ufence(struct xe_vma *vma)
> > +{
> > +	if (vma->ufence) {
> > +		struct xe_user_fence * const f = vma->ufence;
> > +
> > +		if (!xe_sync_ufence_get_status(f))
> > +			return -EBUSY;
> > +
> > +		vma->ufence = NULL;
> > +		xe_sync_ufence_put(f);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
> >  			    struct xe_vma_op *op)
> >  {
> > @@ -2830,6 +2835,10 @@ static int op_lock_and_prep(struct drm_exec
> > *exec, struct xe_vm *vm,
> >  					    !xe_vm_in_fault_mode(vm));
> >  		break;
> >  	case DRM_GPUVA_OP_REMAP:
> > +		err = check_ufence(gpuva_to_vma(op->base.remap.unmap-
> > >va));
> > +		if (err)
> > +			break;
> > +
> >  		err = vma_lock_and_validate(exec,
> >  					    gpuva_to_vma(op-
> > >base.remap.unmap->va),
> >  					    false);
> > @@ -2839,6 +2848,10 @@ static int op_lock_and_prep(struct drm_exec
> > *exec, struct xe_vm *vm,
> >  			err = vma_lock_and_validate(exec, op->remap.next,
> > true);
> >  		break;
> >  	case DRM_GPUVA_OP_UNMAP:
> > +		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
> > +		if (err)
> > +			break;
> > +
> >  		err = vma_lock_and_validate(exec,
> >  					    gpuva_to_vma(op-
> > >base.unmap.va),
> >  					    false);
> > --
> > 2.34.1
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini
  2024-04-19 15:24   ` Zeng, Oak
@ 2024-04-19 19:45     ` Matthew Brost
  2024-04-23  3:36       ` Zeng, Oak
  0 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-19 19:45 UTC (permalink / raw)
  To: Zeng, Oak; +Cc: intel-xe

On Fri, Apr 19, 2024 at 09:24:18AM -0600, Zeng, Oak wrote:
> 
> 
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> > Matthew Brost
> > Sent: Wednesday, April 10, 2024 1:41 AM
> > To: intel-xe@lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost@intel.com>
> > Subject: [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini
> > 
> > Rather than adding a ufence to a VMA in the bind function, add the
> > ufence to all VMAs in the IOCTL that require binds in
> > vm_bind_ioctl_ops_install_fences.
> 
> This is a typo right? From the codes, it should be vm_bind_ioctl_ops_fini
> 

Yes, typo. Will fix in next rev.

> I also want to make sure I understand here: so the ufence added to vma is *only* used to make sure last vma bind has been completed upon vma unbind time. So even though it is more natural to set ufence at bind function, it is safe to set it after all operations are submitted (vm_bind_ioctl_ops_fini func). No vm_bind ioctl (and the vma unbind triggered by ioctl) can go through *before* the last vm bind ioctl's ops finish, right?
>

The ufence is attached to all VMAs being bound in an IOCTL. It prevents
any of those VMAs from being unbound until the attached ufence has
signaled (binding operation complete).

It is safe (and correct) to attach the ufence *after* operations
submitted because we are past the point of failure and are under the
VM->lock. The ufence could be signaled or unsignaled when attached to
the VMA. Safe to attach signaled ufences due to ref counting. Future
IOCTLs return -EBUSY if trying to unbind a VMA which an unsignaled
ufence.

Matt
 
> Oak
> 
> 
> 
>  This will help with the transition to
> > job 1 per VM bind IOCTL.
> > 
> > v2:
> >  - Rebase
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_sync.c | 15 ++++++++++++
> >  drivers/gpu/drm/xe/xe_sync.h |  1 +
> >  drivers/gpu/drm/xe/xe_vm.c   | 44 ++++++++++++++++++++++++++++++--
> > ----
> >  3 files changed, 53 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
> > index 65f1f1628235..2883d9aca404 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.c
> > +++ b/drivers/gpu/drm/xe/xe_sync.c
> > @@ -338,6 +338,21 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync,
> > int num_sync,
> >  	return ERR_PTR(-ENOMEM);
> >  }
> > 
> > +/**
> > + * __xe_sync_ufence_get() - Get user fence from user fence
> > + * @ufence: input user fence
> > + *
> > + * Get a user fence reference from user fence
> > + *
> > + * Return: xe_user_fence pointer with reference
> > + */
> > +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence
> > *ufence)
> > +{
> > +	user_fence_get(ufence);
> > +
> > +	return ufence;
> > +}
> > +
> >  /**
> >   * xe_sync_ufence_get() - Get user fence from sync
> >   * @sync: input sync
> > diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
> > index 3e03396af2c6..006dbf780793 100644
> > --- a/drivers/gpu/drm/xe/xe_sync.h
> > +++ b/drivers/gpu/drm/xe/xe_sync.h
> > @@ -37,6 +37,7 @@ static inline bool xe_sync_is_ufence(struct
> > xe_sync_entry *sync)
> >  	return !!sync->ufence;
> >  }
> > 
> > +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence
> > *ufence);
> >  struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync);
> >  void xe_sync_ufence_put(struct xe_user_fence *ufence);
> >  int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 0319e70577fe..1da68a03407b 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -1900,17 +1900,10 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma
> > *vma, struct xe_exec_queue *q,
> >  {
> >  	struct dma_fence *fence;
> >  	struct xe_exec_queue *wait_exec_queue =
> > to_wait_exec_queue(vm, q);
> > -	struct xe_user_fence *ufence;
> > 
> >  	xe_vm_assert_held(vm);
> >  	xe_bo_assert_held(bo);
> > 
> > -	ufence = find_ufence_get(syncs, num_syncs);
> > -	if (vma->ufence && ufence)
> > -		xe_sync_ufence_put(vma->ufence);
> > -
> > -	vma->ufence = ufence ?: vma->ufence;
> > -
> >  	if (immediate) {
> >  		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > tile_mask,
> >  				       first_op, last_op);
> > @@ -2918,20 +2911,57 @@ static struct dma_fence *ops_execute(struct
> > xe_vm *vm,
> >  	return fence;
> >  }
> > 
> > +static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence
> > *ufence)
> > +{
> > +	if (vma->ufence)
> > +		xe_sync_ufence_put(vma->ufence);
> > +	vma->ufence = __xe_sync_ufence_get(ufence);
> > +}
> > +
> > +static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
> > +			  struct xe_user_fence *ufence)
> > +{
> > +	switch (op->base.op) {
> > +	case DRM_GPUVA_OP_MAP:
> > +		vma_add_ufence(op->map.vma, ufence);
> > +		break;
> > +	case DRM_GPUVA_OP_REMAP:
> > +		if (op->remap.prev)
> > +			vma_add_ufence(op->remap.prev, ufence);
> > +		if (op->remap.next)
> > +			vma_add_ufence(op->remap.next, ufence);
> > +		break;
> > +	case DRM_GPUVA_OP_UNMAP:
> > +		break;
> > +	case DRM_GPUVA_OP_PREFETCH:
> > +		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va),
> > ufence);
> > +		break;
> > +	default:
> > +		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > +	}
> > +}
> > +
> >  static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops
> > *vops,
> >  				   struct dma_fence *fence)
> >  {
> >  	struct xe_exec_queue *wait_exec_queue =
> > to_wait_exec_queue(vm, vops->q);
> > +	struct xe_user_fence *ufence;
> >  	struct xe_vma_op *op;
> >  	int i;
> > 
> > +	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
> >  	list_for_each_entry(op, &vops->list, link) {
> > +		if (ufence)
> > +			op_add_ufence(vm, op, ufence);
> > +
> >  		if (op->base.op == DRM_GPUVA_OP_UNMAP)
> >  			xe_vma_destroy(gpuva_to_vma(op-
> > >base.unmap.va), fence);
> >  		else if (op->base.op == DRM_GPUVA_OP_REMAP)
> >  			xe_vma_destroy(gpuva_to_vma(op-
> > >base.remap.unmap->va),
> >  				       fence);
> >  	}
> > +	if (ufence)
> > +		xe_sync_ufence_put(ufence);
> >  	for (i = 0; i < vops->num_syncs; i++)
> >  		xe_sync_entry_signal(vops->syncs + i, fence);
> >  	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
> > --
> > 2.34.1
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 03/13] drm/xe: Move migrate to prefetch to op_lock_and_prep function
  2024-04-18 19:27   ` Zeng, Oak
@ 2024-04-19 19:52     ` Matthew Brost
  2024-04-23  3:32       ` Zeng, Oak
  0 siblings, 1 reply; 40+ messages in thread
From: Matthew Brost @ 2024-04-19 19:52 UTC (permalink / raw)
  To: Zeng, Oak; +Cc: intel-xe

On Thu, Apr 18, 2024 at 01:27:13PM -0600, Zeng, Oak wrote:
> 
> 
> > -----Original Message-----
> > From: Brost, Matthew <matthew.brost@intel.com>
> > Sent: Wednesday, April 10, 2024 1:41 AM
> > To: intel-xe@lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> > <oak.zeng@intel.com>
> > Subject: [PATCH 03/13] drm/xe: Move migrate to prefetch to
> > op_lock_and_prep function
> > 
> > All non-binding operations in VM bind IOCTL should be in the lock and
> > prepare step rather than the execution step. Move prefetch to conform to
> > this pattern.
> > 
> > v2:
> >  - Rebase
> >  - New function names (Oak)
> >  - Update stale comment (Oak)
> > 
> > Cc: Oak Zeng <oak.zeng@intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_vm.c | 30 +++++++++++++++---------------
> >  1 file changed, 15 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 84c6b10b4b78..2c0521573154 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -2039,20 +2039,10 @@ static const u32 region_to_mem_type[] = {
> > 
> >  static struct dma_fence *
> >  xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
> > -	       struct xe_exec_queue *q, u32 region,
> > -	       struct xe_sync_entry *syncs, u32 num_syncs,
> > -	       bool first_op, bool last_op)
> > +	       struct xe_exec_queue *q, struct xe_sync_entry *syncs,
> > +	       u32 num_syncs, bool first_op, bool last_op)
> 
> 
> I am wondering, do you still need this function? The original prefetch function is migration + vm_bind. Now you moved the migration to lock_and_prepare step, only vm bind left...
> 
> Even if you keep this function, we should change the name... it is not a prefetch anymore...
> 

I'd rather leave as is for the following reasons:

1. The code is slightly different and skip the bind under certain conditions
2. It still implements the prefetch op so name applies
3. This is just a staging patch and this function gets deleted once a
version of [1] is merged, I'd rather not squabble / nit pick code that
is temporary. The goal to not regress behavior while making progress
towards [1].

Matt

[1] https://patchwork.freedesktop.org/patch/582024/?series=125608&rev=5

> Oak
> 
> >  {
> >  	struct xe_exec_queue *wait_exec_queue =
> > to_wait_exec_queue(vm, q);
> > -	int err;
> > -
> > -	xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
> > -
> > -	if (!xe_vma_has_no_bo(vma)) {
> > -		err = xe_bo_migrate(xe_vma_bo(vma),
> > region_to_mem_type[region]);
> > -		if (err)
> > -			return ERR_PTR(err);
> > -	}
> > 
> >  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> > {
> >  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> > num_syncs,
> > @@ -2592,8 +2582,7 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> >  				     op->flags & XE_VMA_OP_LAST);
> >  		break;
> >  	case DRM_GPUVA_OP_PREFETCH:
> > -		fence = xe_vm_prefetch(vm, vma, op->q, op-
> > >prefetch.region,
> > -				       op->syncs, op->num_syncs,
> > +		fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op-
> > >num_syncs,
> >  				       op->flags & XE_VMA_OP_FIRST,
> >  				       op->flags & XE_VMA_OP_LAST);
> >  		break;
> > @@ -2823,9 +2812,20 @@ static int op_lock_and_prep(struct drm_exec
> > *exec, struct xe_vm *vm,
> >  					    false);
> >  		break;
> >  	case DRM_GPUVA_OP_PREFETCH:
> > +	{
> > +		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > +		u32 region = op->prefetch.region;
> > +
> > +		xe_assert(vm->xe, region <=
> > ARRAY_SIZE(region_to_mem_type));
> > +
> >  		err = vma_lock_and_validate(exec,
> > -					    gpuva_to_vma(op-
> > >base.prefetch.va), true);
> > +					    gpuva_to_vma(op-
> > >base.prefetch.va),
> > +					    false);
> > +		if (!err && !xe_vma_has_no_bo(vma))
> > +			err = xe_bo_migrate(xe_vma_bo(vma),
> > +					    region_to_mem_type[region]);
> >  		break;
> > +	}
> >  	default:
> >  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> >  	}
> > --
> > 2.34.1
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 02/13] drm/xe: Add ops_execute function which returns a fence
  2024-04-18 19:36     ` Matthew Brost
@ 2024-04-23  3:09       ` Zeng, Oak
  0 siblings, 0 replies; 40+ messages in thread
From: Zeng, Oak @ 2024-04-23  3:09 UTC (permalink / raw)
  To: Brost, Matthew; +Cc: intel-xe



> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Thursday, April 18, 2024 3:37 PM
> To: Zeng, Oak <oak.zeng@intel.com>
> Cc: intel-xe@lists.freedesktop.org
> Subject: Re: [PATCH 02/13] drm/xe: Add ops_execute function which returns
> a fence
> 
> On Thu, Apr 18, 2024 at 10:16:15AM -0600, Zeng, Oak wrote:
> >
> >
> > > -----Original Message-----
> > > From: Brost, Matthew <matthew.brost@intel.com>
> > > Sent: Wednesday, April 10, 2024 1:41 AM
> > > To: intel-xe@lists.freedesktop.org
> > > Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> > > <oak.zeng@intel.com>
> > > Subject: [PATCH 02/13] drm/xe: Add ops_execute function which returns
> a
> > > fence
> > >
> > > Add ops_execute function which returns a fence. This will be helpful to
> > > initiate all binds (VM bind IOCTL, rebinds in exec IOCTL, rebinds in
> > > preempt rebind worker, and rebinds in pagefaults) via a gpuva ops list.
> > > Returning a fence is needed in various paths.
> > >
> > > v2:
> > >  - Rebase
> > >
> > > Cc: Oak Zeng <oak.zeng@intel.com>
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_vm.c | 211 +++++++++++++++++++---------------
> ---
> > >  1 file changed, 111 insertions(+), 100 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 6375c136e21a..84c6b10b4b78 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -1834,16 +1834,17 @@ find_ufence_get(struct xe_sync_entry
> *syncs,
> > > u32 num_syncs)
> > >  	return NULL;
> > >  }
> > >
> > > -static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
> > > -			struct xe_exec_queue *q, struct xe_sync_entry
> > > *syncs,
> > > -			u32 num_syncs, bool immediate, bool first_op,
> > > -			bool last_op)
> > > +static struct dma_fence *
> > > +xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> > > xe_exec_queue *q,
> > > +	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
> > > +	   bool immediate, bool first_op, bool last_op)
> > >  {
> > >  	struct dma_fence *fence;
> > >  	struct xe_exec_queue *wait_exec_queue =
> > > to_wait_exec_queue(vm, q);
> > >  	struct xe_user_fence *ufence;
> > >
> > >  	xe_vm_assert_held(vm);
> > > +	xe_bo_assert_held(bo);
> > >
> > >  	ufence = find_ufence_get(syncs, num_syncs);
> > >  	if (vma->ufence && ufence)
> > > @@ -1855,7 +1856,7 @@ static int __xe_vm_bind(struct xe_vm *vm,
> struct
> > > xe_vma *vma,
> > >  		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > > first_op,
> > >  				       last_op);
> > >  		if (IS_ERR(fence))
> > > -			return PTR_ERR(fence);
> > > +			return fence;
> > >  	} else {
> > >  		int i;
> > >
> > > @@ -1870,26 +1871,14 @@ static int __xe_vm_bind(struct xe_vm *vm,
> > > struct xe_vma *vma,
> > >
> > >  	if (last_op)
> > >  		xe_exec_queue_last_fence_set(wait_exec_queue, vm,
> > > fence);
> > > -	dma_fence_put(fence);
> > > -
> > > -	return 0;
> > > -}
> > > -
> > > -static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> > > xe_exec_queue *q,
> > > -		      struct xe_bo *bo, struct xe_sync_entry *syncs,
> > > -		      u32 num_syncs, bool immediate, bool first_op,
> > > -		      bool last_op)
> > > -{
> > > -	xe_vm_assert_held(vm);
> > > -	xe_bo_assert_held(bo);
> > >
> > > -	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate,
> > > first_op,
> > > -			    last_op);
> > > +	return fence;
> > >  }
> > >
> > > -static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
> > > -			struct xe_exec_queue *q, struct xe_sync_entry
> > > *syncs,
> > > -			u32 num_syncs, bool first_op, bool last_op)
> > > +static struct dma_fence *
> > > +xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
> > > +	     struct xe_exec_queue *q, struct xe_sync_entry *syncs,
> > > +	     u32 num_syncs, bool first_op, bool last_op)
> > >  {
> > >  	struct dma_fence *fence;
> > >  	struct xe_exec_queue *wait_exec_queue =
> > > to_wait_exec_queue(vm, q);
> > > @@ -1899,14 +1888,13 @@ static int xe_vm_unbind(struct xe_vm *vm,
> > > struct xe_vma *vma,
> > >
> > >  	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op,
> > > last_op);
> > >  	if (IS_ERR(fence))
> > > -		return PTR_ERR(fence);
> > > +		return fence;
> > >
> > >  	xe_vma_destroy(vma, fence);
> > >  	if (last_op)
> > >  		xe_exec_queue_last_fence_set(wait_exec_queue, vm,
> > > fence);
> > > -	dma_fence_put(fence);
> > >
> > > -	return 0;
> > > +	return fence;
> > >  }
> > >
> > >  #define ALL_DRM_XE_VM_CREATE_FLAGS
> > > (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
> > > @@ -2049,10 +2037,11 @@ static const u32 region_to_mem_type[] = {
> > >  	XE_PL_VRAM1,
> > >  };
> > >
> > > -static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
> > > -			  struct xe_exec_queue *q, u32 region,
> > > -			  struct xe_sync_entry *syncs, u32 num_syncs,
> > > -			  bool first_op, bool last_op)
> > > +static struct dma_fence *
> > > +xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
> > > +	       struct xe_exec_queue *q, u32 region,
> > > +	       struct xe_sync_entry *syncs, u32 num_syncs,
> > > +	       bool first_op, bool last_op)
> > >  {
> > >  	struct xe_exec_queue *wait_exec_queue =
> > > to_wait_exec_queue(vm, q);
> > >  	int err;
> > > @@ -2062,27 +2051,24 @@ static int xe_vm_prefetch(struct xe_vm *vm,
> > > struct xe_vma *vma,
> > >  	if (!xe_vma_has_no_bo(vma)) {
> > >  		err = xe_bo_migrate(xe_vma_bo(vma),
> > > region_to_mem_type[region]);
> > >  		if (err)
> > > -			return err;
> > > +			return ERR_PTR(err);
> > >  	}
> > >
> > >  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> > > {
> > >  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> > > num_syncs,
> > >  				  true, first_op, last_op);
> > >  	} else {
> > > +		struct dma_fence *fence =
> > > +			xe_exec_queue_last_fence_get(wait_exec_queue,
> > > vm);
> > >  		int i;
> > >
> > >  		/* Nothing to do, signal fences now */
> > >  		if (last_op) {
> > > -			for (i = 0; i < num_syncs; i++) {
> > > -				struct dma_fence *fence =
> > > -
> > > 	xe_exec_queue_last_fence_get(wait_exec_queue, vm);
> > > -
> > > +			for (i = 0; i < num_syncs; i++)
> > >  				xe_sync_entry_signal(&syncs[i], fence);
> > > -				dma_fence_put(fence);
> > > -			}
> > >  		}
> > >
> > > -		return 0;
> > > +		return fence;
> > >  	}
> > >  }
> > >
> > > @@ -2535,10 +2521,10 @@ static int vm_bind_ioctl_ops_parse(struct
> xe_vm
> > > *vm, struct xe_exec_queue *q,
> > >  	return 0;
> > >  }
> > >
> > > -static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
> > > -		      struct xe_vma_op *op)
> > > +static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma
> > > *vma,
> > > +				    struct xe_vma_op *op)
> > >  {
> > > -	int err;
> > > +	struct dma_fence *fence = NULL;
> > >
> > >  	lockdep_assert_held_write(&vm->lock);
> > >
> > > @@ -2547,11 +2533,11 @@ static int op_execute(struct xe_vm *vm,
> struct
> > > xe_vma *vma,
> > >
> > >  	switch (op->base.op) {
> > >  	case DRM_GPUVA_OP_MAP:
> > > -		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> > > -				 op->syncs, op->num_syncs,
> > > -				 op->map.immediate
> > > || !xe_vm_in_fault_mode(vm),
> > > -				 op->flags & XE_VMA_OP_FIRST,
> > > -				 op->flags & XE_VMA_OP_LAST);
> > > +		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> > > +				   op->syncs, op->num_syncs,
> > > +				   op->map.immediate
> > > || !xe_vm_in_fault_mode(vm),
> > > +				   op->flags & XE_VMA_OP_FIRST,
> > > +				   op->flags & XE_VMA_OP_LAST);
> > >  		break;
> > >  	case DRM_GPUVA_OP_REMAP:
> > >  	{
> > > @@ -2561,37 +2547,39 @@ static int op_execute(struct xe_vm *vm,
> struct
> > > xe_vma *vma,
> > >  		if (!op->remap.unmap_done) {
> > >  			if (prev || next)
> > >  				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
> > > -			err = xe_vm_unbind(vm, vma, op->q, op->syncs,
> > > -					   op->num_syncs,
> > > -					   op->flags & XE_VMA_OP_FIRST,
> > > -					   op->flags & XE_VMA_OP_LAST &&
> > > -					   !prev && !next);
> > > -			if (err)
> > > +			fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
> > > +					     op->num_syncs,
> > > +					     op->flags & XE_VMA_OP_FIRST,
> > > +					     op->flags & XE_VMA_OP_LAST &&
> > > +					     !prev && !next);
> > > +			if (IS_ERR(fence))
> > >  				break;
> > >  			op->remap.unmap_done = true;
> > >  		}
> > >
> > >  		if (prev) {
> > >  			op->remap.prev->gpuva.flags |=
> > > XE_VMA_LAST_REBIND;
> > > -			err = xe_vm_bind(vm, op->remap.prev, op->q,
> > > -					 xe_vma_bo(op->remap.prev), op-
> > > >syncs,
> > > -					 op->num_syncs, true, false,
> > > -					 op->flags & XE_VMA_OP_LAST
> > > && !next);
> > > +			dma_fence_put(fence);
> > > +			fence = xe_vm_bind(vm, op->remap.prev, op->q,
> > > +					   xe_vma_bo(op->remap.prev), op-
> > > >syncs,
> > > +					   op->num_syncs, true, false,
> > > +					   op->flags & XE_VMA_OP_LAST
> > > && !next);
> > >  			op->remap.prev->gpuva.flags &=
> > > ~XE_VMA_LAST_REBIND;
> > > -			if (err)
> > > +			if (IS_ERR(fence))
> > >  				break;
> > >  			op->remap.prev = NULL;
> > >  		}
> > >
> > >  		if (next) {
> > >  			op->remap.next->gpuva.flags |=
> > > XE_VMA_LAST_REBIND;
> > > -			err = xe_vm_bind(vm, op->remap.next, op->q,
> > > -					 xe_vma_bo(op->remap.next),
> > > -					 op->syncs, op->num_syncs,
> > > -					 true, false,
> > > -					 op->flags & XE_VMA_OP_LAST);
> > > +			dma_fence_put(fence);
> > > +			fence = xe_vm_bind(vm, op->remap.next, op->q,
> > > +					   xe_vma_bo(op->remap.next),
> > > +					   op->syncs, op->num_syncs,
> > > +					   true, false,
> > > +					   op->flags & XE_VMA_OP_LAST);
> > >  			op->remap.next->gpuva.flags &=
> > > ~XE_VMA_LAST_REBIND;
> > > -			if (err)
> > > +			if (IS_ERR(fence))
> > >  				break;
> > >  			op->remap.next = NULL;
> > >  		}
> > > @@ -2599,34 +2587,36 @@ static int op_execute(struct xe_vm *vm,
> struct
> > > xe_vma *vma,
> > >  		break;
> > >  	}
> > >  	case DRM_GPUVA_OP_UNMAP:
> > > -		err = xe_vm_unbind(vm, vma, op->q, op->syncs,
> > > -				   op->num_syncs, op->flags &
> > > XE_VMA_OP_FIRST,
> > > -				   op->flags & XE_VMA_OP_LAST);
> > > +		fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
> > > +				     op->num_syncs, op->flags &
> > > XE_VMA_OP_FIRST,
> > > +				     op->flags & XE_VMA_OP_LAST);
> > >  		break;
> > >  	case DRM_GPUVA_OP_PREFETCH:
> > > -		err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
> > > -				     op->syncs, op->num_syncs,
> > > -				     op->flags & XE_VMA_OP_FIRST,
> > > -				     op->flags & XE_VMA_OP_LAST);
> > > +		fence = xe_vm_prefetch(vm, vma, op->q, op-
> > > >prefetch.region,
> > > +				       op->syncs, op->num_syncs,
> > > +				       op->flags & XE_VMA_OP_FIRST,
> > > +				       op->flags & XE_VMA_OP_LAST);
> > >  		break;
> > >  	default:
> > >  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > >  	}
> > >
> > > -	if (err)
> > > +	if (IS_ERR(fence))
> > >  		trace_xe_vma_fail(vma);
> > >
> > > -	return err;
> > > +	return fence;
> > >  }
> > >
> > > -static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma
> *vma,
> > > -			       struct xe_vma_op *op)
> > > +static struct dma_fence *
> > > +__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> > > +		    struct xe_vma_op *op)
> > >  {
> > > +	struct dma_fence *fence;
> > >  	int err;
> > >
> > >  retry_userptr:
> > > -	err = op_execute(vm, vma, op);
> > > -	if (err == -EAGAIN) {
> > > +	fence = op_execute(vm, vma, op);
> > > +	if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
> > >  		lockdep_assert_held_write(&vm->lock);
> > >
> > >  		if (op->base.op == DRM_GPUVA_OP_REMAP) {
> > > @@ -2643,22 +2633,24 @@ static int __xe_vma_op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  			if (!err)
> > >  				goto retry_userptr;
> > >
> > > +			fence = ERR_PTR(err);
> > >  			trace_xe_vma_fail(vma);
> > >  		}
> > >  	}
> > >
> > > -	return err;
> > > +	return fence;
> > >  }
> > >
> > > -static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op
> *op)
> > > +static struct dma_fence *
> > > +xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
> > >  {
> > > -	int ret = 0;
> > > +	struct dma_fence *fence = ERR_PTR(-ENOMEM);
> > >
> > >  	lockdep_assert_held_write(&vm->lock);
> > >
> > >  	switch (op->base.op) {
> > >  	case DRM_GPUVA_OP_MAP:
> > > -		ret = __xe_vma_op_execute(vm, op->map.vma, op);
> > > +		fence = __xe_vma_op_execute(vm, op->map.vma, op);
> > >  		break;
> > >  	case DRM_GPUVA_OP_REMAP:
> > >  	{
> > > @@ -2671,23 +2663,23 @@ static int xe_vma_op_execute(struct xe_vm
> *vm,
> > > struct xe_vma_op *op)
> > >  		else
> > >  			vma = op->remap.next;
> > >
> > > -		ret = __xe_vma_op_execute(vm, vma, op);
> > > +		fence = __xe_vma_op_execute(vm, vma, op);
> > >  		break;
> > >  	}
> > >  	case DRM_GPUVA_OP_UNMAP:
> > > -		ret = __xe_vma_op_execute(vm, gpuva_to_vma(op-
> > > >base.unmap.va),
> > > -					  op);
> > > +		fence = __xe_vma_op_execute(vm, gpuva_to_vma(op-
> > > >base.unmap.va),
> > > +					    op);
> > >  		break;
> > >  	case DRM_GPUVA_OP_PREFETCH:
> > > -		ret = __xe_vma_op_execute(vm,
> > > -					  gpuva_to_vma(op-
> > > >base.prefetch.va),
> > > -					  op);
> > > +		fence = __xe_vma_op_execute(vm,
> > > +					    gpuva_to_vma(op-
> > > >base.prefetch.va),
> > > +					    op);
> > >  		break;
> > >  	default:
> > >  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > >  	}
> > >
> > > -	return ret;
> > > +	return fence;
> > >  }
> > >
> > >  static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op
> *op)
> > > @@ -2861,11 +2853,35 @@ static int
> > > vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
> > >  	return 0;
> > >  }
> > >
> > > +static struct dma_fence *ops_execute(struct xe_vm *vm,
> > > +				     struct list_head *ops_list,
> > > +				     bool cleanup)
> > > +{
> > > +	struct xe_vma_op *op, *next;
> > > +	struct dma_fence *fence = NULL;
> > > +
> > > +	list_for_each_entry_safe(op, next, ops_list, link) {
> > > +		if (!IS_ERR(fence)) {
> > > +			dma_fence_put(fence);
> > > +			fence = xe_vma_op_execute(vm, op);
> > > +		}
> > > +		if (IS_ERR(fence)) {
> > > +			drm_warn(&vm->xe->drm, "VM op(%d) failed
> > > with %ld",
> > > +				 op->base.op, PTR_ERR(fence));
> > > +			fence = ERR_PTR(-ENOSPC);
> >
> > There is a comment before not addressed. Copy as below:
> >
> >
> > > > Once error happen for one operation, you seem to print the same error
> > > message for all the rest operations....because fence =
> xe_vma_op_execute(vm,
> > > op) is not called anymore after the first error
> > > >
> > >
> > > Yes.
> >
> > Is this problematic though? Lets say you have 2 ops in the list and
> op_execute failed with op1. You will print as below:
> >
> > VM op1 failed with xxx
> > VM op1 failed with xxx
> >
> 
> I don't think that is a problem and changes later in the series once
> xe_vma_op_cleanup is removed from this function.

Right.

> 
> >
> >
> > > +		}
> > > +		if (cleanup)
> > > +			xe_vma_op_cleanup(vm, op);
> > > +	}
> > > +
> > > +	return fence;
> > > +}
> > > +
> > >  static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> > >  				     struct list_head *ops_list)
> > >  {
> > >  	struct drm_exec exec;
> > > -	struct xe_vma_op *op, *next;
> > > +	struct dma_fence *fence;
> > >  	int err;
> > >
> > >  	lockdep_assert_held_write(&vm->lock);
> > > @@ -2878,19 +2894,14 @@ static int vm_bind_ioctl_ops_execute(struct
> > > xe_vm *vm,
> > >  		if (err)
> > >  			goto unlock;
> > >
> > > -		list_for_each_entry_safe(op, next, ops_list, link) {
> > > -			err = xe_vma_op_execute(vm, op);
> > > -			if (err) {
> > > -				drm_warn(&vm->xe->drm, "VM op(%d)
> > > failed with %d",
> > > -					 op->base.op, err);
> > > -				/*
> > > -				 * FIXME: Killing VM rather than proper error
> > > handling
> > > -				 */
> > > -				xe_vm_kill(vm, false);
> > > -				err = -ENOSPC;
> > > -				goto unlock;
> > > -			}
> > > -			xe_vma_op_cleanup(vm, op);
> > > +		fence = ops_execute(vm, ops_list, true);
> > > +		if (IS_ERR(fence)) {
> > > +			err = PTR_ERR(fence);
> > > +			/* FIXME: Killing VM rather than proper error
> > > handling */
> > > +			xe_vm_kill(vm, false);
> > > +			goto unlock;
> > > +		} else {
> > > +			dma_fence_put(fence);
> >
> > I don't get here. You introduced function ops_execute to return the last
> fence of all the operations. But you just put the fence here. Isn't you intend
> to wait for this fence somehow? What is the point to return a fence from
> ops_execute?
> > i
> 
> It is used patch #7 [1] and #9 [2] in this series.
> 
> In [1], the returned fence is used wait on ops to compelete before
> signaling page fault complete to GuC.
> 
> In [2], the returned fence is used as an argument to
> vm_bind_ioctl_ops_fini which attaches VMA destroy to fence, installs
> fence in IOCTL out-syncs, and sets last fence on the exec queue.

I see. Thanks for explaining. Patch is:

Reviewed-by: Oak Zeng <oak.zeng@intel.com>

> 
> Matt
> 
> [1] https://patchwork.freedesktop.org/patch/588594/?series=132246&rev=1
> [2] https://patchwork.freedesktop.org/patch/588595/?series=132246&rev=1
> 
> > Oak
> >
> >
> > >  		}
> > >  	}
> > >
> > > --
> > > 2.34.1
> >

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind
  2024-04-19  4:14     ` Matthew Brost
@ 2024-04-23  3:17       ` Zeng, Oak
  0 siblings, 0 replies; 40+ messages in thread
From: Zeng, Oak @ 2024-04-23  3:17 UTC (permalink / raw)
  To: Brost, Matthew; +Cc: intel-xe



> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Friday, April 19, 2024 12:14 AM
> To: Zeng, Oak <oak.zeng@intel.com>
> Cc: intel-xe@lists.freedesktop.org
> Subject: Re: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement
> xe_vm_rebind
> 
> On Thu, Apr 18, 2024 at 09:43:06PM -0600, Zeng, Oak wrote:
> >
> >
> > > -----Original Message-----
> > > From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> > > Matthew Brost
> > > Sent: Wednesday, April 10, 2024 1:41 AM
> > > To: intel-xe@lists.freedesktop.org
> > > Cc: Brost, Matthew <matthew.brost@intel.com>
> > > Subject: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement
> > > xe_vm_rebind
> > >
> > > All page tables updates are moving to a xe_vma_ops interface to
> > > implement 1 job per VM bind IOCTL.
> >
> > Just want to make sure I understand it correctly. So far after this patch, the
> rebind is still many jobs (one job per vma), right?
> >
> 
> Yes. A follow on series will convert to 1 job for all of the rebind list.
> 
> >
> >  Convert xe_vm_rebind to use a
> > > xe_vma_ops based interface.
> > >
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_vm.c | 78
> +++++++++++++++++++++++++++++++-
> > > ------
> > >  1 file changed, 64 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 4cd485d5bc0a..9d82396cf5d5 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -811,37 +811,87 @@ int xe_vm_userptr_check_repin(struct xe_vm
> *vm)
> > >  		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
> > >  }
> > >
> > > -static struct dma_fence *
> > > -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> > > -	       struct xe_sync_entry *syncs, u32 num_syncs,
> > > -	       bool first_op, bool last_op);
> > > +static void xe_vm_populate_rebind(struct xe_vma_op *op, struct
> xe_vma
> > > *vma,
> > > +				  u8 tile_mask)
> > > +{
> > > +	INIT_LIST_HEAD(&op->link);
> > > +	op->base.op = DRM_GPUVA_OP_MAP;
> > > +	op->base.map.va.addr = vma->gpuva.va.addr;
> > > +	op->base.map.va.range = vma->gpuva.va.range;
> > > +	op->base.map.gem.obj = vma->gpuva.gem.obj;
> > > +	op->base.map.gem.offset = vma->gpuva.gem.offset;
> > > +	op->map.vma = vma;
> > > +	op->map.immediate = true;
> > > +	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
> > > +	op->map.is_null = xe_vma_is_null(vma);
> > > +}
> > > +
> > > +static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct
> xe_vma
> > > *vma,
> > > +				u8 tile_mask)
> > > +{
> > > +	struct xe_vma_op *op;
> > > +
> > > +	op = kzalloc(sizeof(*op), GFP_KERNEL);
> > > +	if (!op)
> > > +		return -ENOMEM;
> > > +
> > > +	xe_vm_populate_rebind(op, vma, tile_mask);
> > > +	list_add_tail(&op->link, &vops->list);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static struct dma_fence *ops_execute(struct xe_vm *vm,
> > > +				     struct xe_vma_ops *vops,
> > > +				     bool cleanup);
> > > +static void xe_vma_ops_init(struct xe_vma_ops *vops);
> > >
> > >  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> > >  {
> > >  	struct dma_fence *fence;
> > >  	struct xe_vma *vma, *next;
> > > +	struct xe_vma_ops vops;
> > > +	struct xe_vma_op *op, *next_op;
> > > +	int err;
> > >
> > >  	lockdep_assert_held(&vm->lock);
> > > -	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
> > > +	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
> > > +	    list_empty(&vm->rebind_list))
> > >  		return 0;
> > >
> > > +	xe_vma_ops_init(&vops);
> > > +
> > >  	xe_vm_assert_held(vm);
> > > -	list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > > -				 combined_links.rebind) {
> > > +	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind)
> > > {
> > >  		xe_assert(vm->xe, vma->tile_present);
> > >
> > > -		list_del_init(&vma->combined_links.rebind);
> > >  		if (rebind_worker)
> > >  			trace_xe_vma_rebind_worker(vma);
> > >  		else
> > >  			trace_xe_vma_rebind_exec(vma);
> > > -		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
> > > -		if (IS_ERR(fence))
> > > -			return PTR_ERR(fence);
> > > +
> > > +		err = xe_vm_ops_add_rebind(&vops, vma,
> > > +					   vma->tile_present);
> > > +		if (err)
> > > +			goto free_ops;
> > > +	}
> > > +
> > > +	fence = ops_execute(vm, &vops, false);
> > > +	if (IS_ERR(fence)) {
> > > +		err = PTR_ERR(fence);
> >
> > So here, if above ops_execute partially succeed (some vma bind failed,
> some succeed), for those vmas which are successfully bound, it is kept in the
> vm's rebind_list. Is this the correct behavior? Next time we will rebind them
> again....
> >
> 
> The VM is killed if any VMA ops fails so it doesn't really matter, also
> it safe to issue a rebind twice.
> 
> In the follow up series, once we have 1 job per the rebind list we can
> cope with errors and not kill the VM. In that case we must leave
> everything on the rebind list.
> 
> So this patch is correct now and for the follow on series.

I see. Patch is:

Reviewed-by: Oak Zeng <oak.zeng@intel.com>


> 
> Matt
> 
> >
> > Oak
> >
> >
> > > +	} else {
> > >  		dma_fence_put(fence);
> > > +		list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > > +					 combined_links.rebind)
> > > +			list_del_init(&vma->combined_links.rebind);
> > > +	}
> > > +free_ops:
> > > +	list_for_each_entry_safe(op, next_op, &vops.list, link) {
> > > +		list_del(&op->link);
> > > +		kfree(op);
> > >  	}
> > >
> > > -	return 0;
> > > +	return err;
> > >  }
> > >
> > >  static void xe_vma_free(struct xe_vma *vma)
> > > @@ -2516,7 +2566,7 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  {
> > >  	struct dma_fence *fence = NULL;
> > >
> > > -	lockdep_assert_held_write(&vm->lock);
> > > +	lockdep_assert_held(&vm->lock);
> > >
> > >  	xe_vm_assert_held(vm);
> > >  	xe_bo_assert_held(xe_vma_bo(vma));
> > > @@ -2635,7 +2685,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> > > xe_vma_op *op)
> > >  {
> > >  	struct dma_fence *fence = ERR_PTR(-ENOMEM);
> > >
> > > -	lockdep_assert_held_write(&vm->lock);
> > > +	lockdep_assert_held(&vm->lock);
> > >
> > >  	switch (op->base.op) {
> > >  	case DRM_GPUVA_OP_MAP:
> > > --
> > > 2.34.1
> >

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and cleanup
  2024-04-19 19:16     ` Matthew Brost
@ 2024-04-23  3:22       ` Zeng, Oak
  0 siblings, 0 replies; 40+ messages in thread
From: Zeng, Oak @ 2024-04-23  3:22 UTC (permalink / raw)
  To: Brost, Matthew; +Cc: intel-xe



> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Friday, April 19, 2024 3:16 PM
> To: Zeng, Oak <oak.zeng@intel.com>
> Cc: intel-xe@lists.freedesktop.org
> Subject: Re: [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling
> and cleanup
> 
> On Thu, Apr 18, 2024 at 10:19:04PM -0600, Zeng, Oak wrote:
> > It is a nice clean up. See one question inline
> >
> > > -----Original Message-----
> > > From: Brost, Matthew <matthew.brost@intel.com>
> > > Sent: Wednesday, April 10, 2024 1:41 AM
> > > To: intel-xe@lists.freedesktop.org
> > > Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> > > <oak.zeng@intel.com>
> > > Subject: [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling
> and
> > > cleanup
> > >
> > > Clean up everything in VM bind IOCTL in 1 path for both errors and
> > > non-errors. Also move VM bind IOCTL cleanup from ops (also used by
> > > non-IOCTL binds) to the VM bind IOCTL.
> > >
> > > v2:
> > >  - Break ops_execute on error (Oak)
> > >
> > > Cc: Oak Zeng <oak.zeng@intel.com>
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_vm.c       | 67 ++++++--------------------------
> > >  drivers/gpu/drm/xe/xe_vm_types.h |  5 ---
> > >  2 files changed, 12 insertions(+), 60 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 9d82396cf5d5..8f5b24c8f6cd 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -842,8 +842,7 @@ static int xe_vm_ops_add_rebind(struct
> xe_vma_ops
> > > *vops, struct xe_vma *vma,
> > >  }
> > >
> > >  static struct dma_fence *ops_execute(struct xe_vm *vm,
> > > -				     struct xe_vma_ops *vops,
> > > -				     bool cleanup);
> > > +				     struct xe_vma_ops *vops);
> > >  static void xe_vma_ops_init(struct xe_vma_ops *vops);
> > >
> > >  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> > > @@ -876,7 +875,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool
> > > rebind_worker)
> > >  			goto free_ops;
> > >  	}
> > >
> > > -	fence = ops_execute(vm, &vops, false);
> > > +	fence = ops_execute(vm, &vops);
> > >  	if (IS_ERR(fence)) {
> > >  		err = PTR_ERR(fence);
> > >  	} else {
> > > @@ -2551,7 +2550,6 @@ static int vm_bind_ioctl_ops_parse(struct
> xe_vm
> > > *vm, struct xe_exec_queue *q,
> > >  	if (!last_op)
> > >  		return 0;
> > >
> > > -	last_op->ops = ops;
> > >  	if (last) {
> > >  		last_op->flags |= XE_VMA_OP_LAST;
> > >  		last_op->num_syncs = num_syncs;
> > > @@ -2721,25 +2719,6 @@ xe_vma_op_execute(struct xe_vm *vm,
> struct
> > > xe_vma_op *op)
> > >  	return fence;
> > >  }
> > >
> > > -static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op
> *op)
> > > -{
> > > -	bool last = op->flags & XE_VMA_OP_LAST;
> > > -
> > > -	if (last) {
> > > -		while (op->num_syncs--)
> > > -			xe_sync_entry_cleanup(&op->syncs[op-
> > > >num_syncs]);
> >
> > I understand all the other part of this function is not needed anymore. But I
> didn't figure out why sync_entry_cleanup is not needed? You still have syncs,
> do you? They are allocated in bind_ioctl function and seems you didn't touch
> it in this patch....Can you explain?
> >
> 
> It is called in the main IOCTL code (xe_vm_bind_ioctl) now.
> 
> See below.
> 
> > Oak
> >
> >
> > > -		kfree(op->syncs);
> > > -		if (op->q)
> > > -			xe_exec_queue_put(op->q);
> > > -	}
> > > -	if (!list_empty(&op->link))
> > > -		list_del(&op->link);
> > > -	if (op->ops)
> > > -		drm_gpuva_ops_free(&vm->gpuvm, op->ops);
> > > -	if (last)
> > > -		xe_vm_put(vm);
> > > -}
> > > -
> > >  static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op
> *op,
> > >  			     bool post_commit, bool prev_post_commit,
> > >  			     bool next_post_commit)
> > > @@ -2816,8 +2795,6 @@ static void vm_bind_ioctl_ops_unwind(struct
> > > xe_vm *vm,
> > >  					 op->flags &
> > > XE_VMA_OP_PREV_COMMITTED,
> > >  					 op->flags &
> > > XE_VMA_OP_NEXT_COMMITTED);
> > >  		}
> > > -
> > > -		drm_gpuva_ops_free(&vm->gpuvm, __ops);
> > >  	}
> > >  }
> > >
> > > @@ -2904,24 +2881,20 @@ static int
> > > vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
> > >  }
> > >
> > >  static struct dma_fence *ops_execute(struct xe_vm *vm,
> > > -				     struct xe_vma_ops *vops,
> > > -				     bool cleanup)
> > > +				     struct xe_vma_ops *vops)
> > >  {
> > >  	struct xe_vma_op *op, *next;
> > >  	struct dma_fence *fence = NULL;
> > >
> > >  	list_for_each_entry_safe(op, next, &vops->list, link) {
> > > -		if (!IS_ERR(fence)) {
> > > -			dma_fence_put(fence);
> > > -			fence = xe_vma_op_execute(vm, op);
> > > -		}
> > > +		dma_fence_put(fence);
> > > +		fence = xe_vma_op_execute(vm, op);
> > >  		if (IS_ERR(fence)) {
> > >  			drm_warn(&vm->xe->drm, "VM op(%d) failed
> > > with %ld",
> > >  				 op->base.op, PTR_ERR(fence));
> > >  			fence = ERR_PTR(-ENOSPC);
> > > +			break;
> > >  		}
> > > -		if (cleanup)
> > > -			xe_vma_op_cleanup(vm, op);
> > >  	}
> > >
> > >  	return fence;
> > > @@ -2944,7 +2917,7 @@ static int vm_bind_ioctl_ops_execute(struct
> xe_vm
> > > *vm,
> > >  		if (err)
> > >  			goto unlock;
> > >
> > > -		fence = ops_execute(vm, vops, true);
> > > +		fence = ops_execute(vm, vops);
> > >  		if (IS_ERR(fence)) {
> > >  			err = PTR_ERR(fence);
> > >  			/* FIXME: Killing VM rather than proper error
> > > handling */
> > > @@ -3305,30 +3278,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
> > > void *data, struct drm_file *file)
> > >  		goto unwind_ops;
> > >  	}
> > >
> > > -	xe_vm_get(vm);
> > > -	if (q)
> > > -		xe_exec_queue_get(q);
> > > -
> > >  	err = vm_bind_ioctl_ops_execute(vm, &vops);
> > >
> > > -	up_write(&vm->lock);
> > > -
> > > -	if (q)
> > > -		xe_exec_queue_put(q);
> > > -	xe_vm_put(vm);
> > > -
> > > -	for (i = 0; bos && i < args->num_binds; ++i)
> > > -		xe_bo_put(bos[i]);
> > > -
> > > -	kvfree(bos);
> > > -	kvfree(ops);
> > > -	if (args->num_binds > 1)
> > > -		kvfree(bind_ops);
> > > -
> > > -	return err;
> > > -
> 
> We now fall throuh to the cleanup in both success and error paths...
> 
> > >  unwind_ops:
> > > -	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> > > +	if (err && err != -ENODATA)
> > > +		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> > > +	for (i = args->num_binds - 1; i >= 0; --i)
> > > +		if (ops[i])
> > > +			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
> > >  free_syncs:
> > >  	if (err == -ENODATA)
> > >  		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
> 
> The next few lines of code call xe_sync_entry_cleanup.


Got it. Patch is:

Reviewed-by: Oak Zeng <oak.zeng@intel.com>
> 
> Matt
> 
> > > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > > b/drivers/gpu/drm/xe/xe_vm_types.h
> > > index 466b6c62d1f9..149ab892967e 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > > @@ -330,11 +330,6 @@ enum xe_vma_op_flags {
> > >  struct xe_vma_op {
> > >  	/** @base: GPUVA base operation */
> > >  	struct drm_gpuva_op base;
> > > -	/**
> > > -	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
> > > -	 * operations is processed
> > > -	 */
> > > -	struct drm_gpuva_ops *ops;
> > >  	/** @q: exec queue for this operation */
> > >  	struct xe_exec_queue *q;
> > >  	/**
> > > --
> > > 2.34.1
> >

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds
  2024-04-19 19:33     ` Matthew Brost
@ 2024-04-23  3:27       ` Zeng, Oak
  0 siblings, 0 replies; 40+ messages in thread
From: Zeng, Oak @ 2024-04-23  3:27 UTC (permalink / raw)
  To: Brost, Matthew; +Cc: intel-xe



> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Friday, April 19, 2024 3:34 PM
> To: Zeng, Oak <oak.zeng@intel.com>
> Cc: intel-xe@lists.freedesktop.org
> Subject: Re: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page
> fault rebinds
> 
> On Fri, Apr 19, 2024 at 08:22:29AM -0600, Zeng, Oak wrote:
> >
> >
> > > -----Original Message-----
> > > From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> > > Matthew Brost
> > > Sent: Wednesday, April 10, 2024 1:41 AM
> > > To: intel-xe@lists.freedesktop.org
> > > Cc: Brost, Matthew <matthew.brost@intel.com>
> > > Subject: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page
> fault
> > > rebinds
> > >
> > > All page tables updates are moving to a xe_vma_ops interface to
> > > implement 1 job per VM bind IOCTL.
> >
> > Can you explain why using xe_vma_ops interface is necessary even to bind
> one vma? I understand it make sense to use this interface to bind multiple
> vmas. See also below
> >
> 
> Essentially once we switch to 1 bind per IOCTL [1] xe_vma_ops is passed
> around throughout all the layers. The xe_vma_ops list a single atomic
> unit for updating the GPUVA state, internal PT, and GPU page tables. If
> at point something fails, xe_vma_ops can be unwound restoring all the
> original state.


Ok, that make sense to me. Maybe explain this a little in the commit message? The current commit message doesn't speak why we are moving to xe_vma_ops interface. Anyway, Patch is:

Reviewed-by: Oak Zeng <oak.zeng@intel.com>

> 
> i.e. __xe_pt_bind_vma is will be deleted and replaces with a function
> that accepts a xe_vma_ops list, ops_execute() is the correct place to
> hook into the software pipeline as we already have the locks and only
> internal PT and GPU page tables need to be updated.
> 
> [1] https://patchwork.freedesktop.org/patch/582024/?series=125608&rev=5
> 
> >
> >  Add xe_vma_rebind function which is
> > > implemented using xe_vma_ops interface. Use xe_vma_rebind in page
> > > faults
> > > for rebinds.
> > >
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_gt_pagefault.c | 16 ++++----
> > >  drivers/gpu/drm/xe/xe_vm.c           | 57 +++++++++++++++++++++++----
> -
> > >  drivers/gpu/drm/xe/xe_vm.h           |  2 +
> > >  drivers/gpu/drm/xe/xe_vm_types.h     |  2 +
> > >  4 files changed, 58 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > index fa9e9853c53b..040dd142c49c 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > @@ -19,7 +19,6 @@
> > >  #include "xe_guc.h"
> > >  #include "xe_guc_ct.h"
> > >  #include "xe_migrate.h"
> > > -#include "xe_pt.h"
> > >  #include "xe_trace.h"
> > >  #include "xe_vm.h"
> > >
> > > @@ -204,15 +203,14 @@ static int handle_pagefault(struct xe_gt *gt,
> struct
> > > pagefault *pf)
> > >  		drm_exec_retry_on_contention(&exec);
> > >  		if (ret)
> > >  			goto unlock_dma_resv;
> > > -	}
> > >
> > > -	/* Bind VMA only to the GT that has faulted */
> > > -	trace_xe_vma_pf_bind(vma);
> > > -	fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile),
> > > NULL, 0,
> > > -				 vma->tile_present & BIT(tile->id));
> > > -	if (IS_ERR(fence)) {
> > > -		ret = PTR_ERR(fence);
> > > -		goto unlock_dma_resv;
> > > +		/* Bind VMA only to the GT that has faulted */
> > > +		trace_xe_vma_pf_bind(vma);
> > > +		fence = xe_vma_rebind(vm, vma, BIT(tile->id));
> > > +		if (IS_ERR(fence)) {
> > > +			ret = PTR_ERR(fence);
> > > +			goto unlock_dma_resv;
> > > +		}
> > >  	}
> > >
> > >  	/*
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 8f5b24c8f6cd..54a69fbfbb00 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -815,6 +815,7 @@ static void xe_vm_populate_rebind(struct
> > > xe_vma_op *op, struct xe_vma *vma,
> > >  				  u8 tile_mask)
> > >  {
> > >  	INIT_LIST_HEAD(&op->link);
> > > +	op->tile_mask = tile_mask;
> > >  	op->base.op = DRM_GPUVA_OP_MAP;
> > >  	op->base.map.va.addr = vma->gpuva.va.addr;
> > >  	op->base.map.va.range = vma->gpuva.va.range;
> > > @@ -893,6 +894,33 @@ int xe_vm_rebind(struct xe_vm *vm, bool
> > > rebind_worker)
> > >  	return err;
> > >  }
> > >
> > > +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma
> *vma,
> > > u8 tile_mask)
> >
> >
> > I try to figure out why this function is necessary. We are only binding one
> vma here. Why we need to create xe_vma_ops list? We are only adding one
> vma to this list....
> >
> 
> See above ability to directly modify page tables without a xe_vma_ops
> list will be removed.
> 
> Matt
> 
> > Oak
> >
> > > +{
> > > +	struct dma_fence *fence = NULL;
> > > +	struct xe_vma_ops vops;
> > > +	struct xe_vma_op *op, *next_op;
> > > +	int err;
> > > +
> > > +	lockdep_assert_held(&vm->lock);
> > > +	xe_vm_assert_held(vm);
> > > +	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
> > > +
> > > +	xe_vma_ops_init(&vops);
> > > +
> > > +	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
> > > +	if (err)
> > > +		return ERR_PTR(err);
> > > +
> > > +	fence = ops_execute(vm, &vops);
> > > +
> > > +	list_for_each_entry_safe(op, next_op, &vops.list, link) {
> > > +		list_del(&op->link);
> > > +		kfree(op);
> > > +	}
> > > +
> > > +	return fence;
> > > +}
> > > +
> > >  static void xe_vma_free(struct xe_vma *vma)
> > >  {
> > >  	if (xe_vma_is_userptr(vma))
> > > @@ -1796,7 +1824,7 @@ xe_vm_unbind_vma(struct xe_vma *vma,
> struct
> > > xe_exec_queue *q,
> > >  static struct dma_fence *
> > >  xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> > >  	       struct xe_sync_entry *syncs, u32 num_syncs,
> > > -	       bool first_op, bool last_op)
> > > +	       u8 tile_mask, bool first_op, bool last_op)
> > >  {
> > >  	struct xe_tile *tile;
> > >  	struct dma_fence *fence;
> > > @@ -1804,7 +1832,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> > > xe_exec_queue *q,
> > >  	struct dma_fence_array *cf = NULL;
> > >  	struct xe_vm *vm = xe_vma_vm(vma);
> > >  	int cur_fence = 0, i;
> > > -	int number_tiles = hweight8(vma->tile_mask);
> > > +	int number_tiles = hweight8(tile_mask);
> > >  	int err;
> > >  	u8 id;
> > >
> > > @@ -1818,7 +1846,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> > > xe_exec_queue *q,
> > >  	}
> > >
> > >  	for_each_tile(tile, vm->xe, id) {
> > > -		if (!(vma->tile_mask & BIT(id)))
> > > +		if (!(tile_mask & BIT(id)))
> > >  			goto next;
> > >
> > >  		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
> > > @@ -1886,7 +1914,7 @@ find_ufence_get(struct xe_sync_entry *syncs,
> u32
> > > num_syncs)
> > >  static struct dma_fence *
> > >  xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> > > xe_exec_queue *q,
> > >  	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
> > > -	   bool immediate, bool first_op, bool last_op)
> > > +	   u8 tile_mask, bool immediate, bool first_op, bool last_op)
> > >  {
> > >  	struct dma_fence *fence;
> > >  	struct xe_exec_queue *wait_exec_queue =
> > > to_wait_exec_queue(vm, q);
> > > @@ -1902,8 +1930,8 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma
> > > *vma, struct xe_exec_queue *q,
> > >  	vma->ufence = ufence ?: vma->ufence;
> > >
> > >  	if (immediate) {
> > > -		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > > first_op,
> > > -				       last_op);
> > > +		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > > tile_mask,
> > > +				       first_op, last_op);
> > >  		if (IS_ERR(fence))
> > >  			return fence;
> > >  	} else {
> > > @@ -2095,7 +2123,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct
> xe_vma
> > > *vma,
> > >
> > >  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> > > {
> > >  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> > > num_syncs,
> > > -				  true, first_op, last_op);
> > > +				  vma->tile_mask, true, first_op, last_op);
> > >  	} else {
> > >  		struct dma_fence *fence =
> > >  			xe_exec_queue_last_fence_get(wait_exec_queue,
> > > vm);
> > > @@ -2408,10 +2436,15 @@ static int vm_bind_ioctl_ops_parse(struct
> xe_vm
> > > *vm, struct xe_exec_queue *q,
> > >  	struct xe_device *xe = vm->xe;
> > >  	struct xe_vma_op *last_op = NULL;
> > >  	struct drm_gpuva_op *__op;
> > > +	struct xe_tile *tile;
> > > +	u8 id, tile_mask = 0;
> > >  	int err = 0;
> > >
> > >  	lockdep_assert_held_write(&vm->lock);
> > >
> > > +	for_each_tile(tile, vm->xe, id)
> > > +		tile_mask |= 0x1 << id;
> > > +
> > >  	drm_gpuva_for_each_op(__op, ops) {
> > >  		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
> > >  		struct xe_vma *vma;
> > > @@ -2428,6 +2461,7 @@ static int vm_bind_ioctl_ops_parse(struct
> xe_vm
> > > *vm, struct xe_exec_queue *q,
> > >  		}
> > >
> > >  		op->q = q;
> > > +		op->tile_mask = tile_mask;
> > >
> > >  		switch (op->base.op) {
> > >  		case DRM_GPUVA_OP_MAP:
> > > @@ -2574,6 +2608,7 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> > >  				   op->syncs, op->num_syncs,
> > >  				   op->map.immediate
> > > || !xe_vm_in_fault_mode(vm),
> > > +				   op->tile_mask,
> > >  				   op->flags & XE_VMA_OP_FIRST,
> > >  				   op->flags & XE_VMA_OP_LAST);
> > >  		break;
> > > @@ -2600,7 +2635,9 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  			dma_fence_put(fence);
> > >  			fence = xe_vm_bind(vm, op->remap.prev, op->q,
> > >  					   xe_vma_bo(op->remap.prev), op-
> > > >syncs,
> > > -					   op->num_syncs, true, false,
> > > +					   op->num_syncs,
> > > +					   op->remap.prev->tile_mask, true,
> > > +					   false,
> > >  					   op->flags & XE_VMA_OP_LAST
> > > && !next);
> > >  			op->remap.prev->gpuva.flags &=
> > > ~XE_VMA_LAST_REBIND;
> > >  			if (IS_ERR(fence))
> > > @@ -2614,8 +2651,8 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  			fence = xe_vm_bind(vm, op->remap.next, op->q,
> > >  					   xe_vma_bo(op->remap.next),
> > >  					   op->syncs, op->num_syncs,
> > > -					   true, false,
> > > -					   op->flags & XE_VMA_OP_LAST);
> > > +					   op->remap.next->tile_mask, true,
> > > +					   false, op->flags &
> > > XE_VMA_OP_LAST);
> > >  			op->remap.next->gpuva.flags &=
> > > ~XE_VMA_LAST_REBIND;
> > >  			if (IS_ERR(fence))
> > >  				break;
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> > > index 306cd0934a19..204a4ff63f88 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.h
> > > +++ b/drivers/gpu/drm/xe/xe_vm.h
> > > @@ -208,6 +208,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm
> > > *vm);
> > >  int xe_vm_userptr_check_repin(struct xe_vm *vm);
> > >
> > >  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
> > > +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma
> *vma,
> > > +				u8 tile_mask);
> > >
> > >  int xe_vm_invalidate_vma(struct xe_vma *vma);
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > > b/drivers/gpu/drm/xe/xe_vm_types.h
> > > index 149ab892967e..e9cd6da6263a 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > > @@ -343,6 +343,8 @@ struct xe_vma_op {
> > >  	struct list_head link;
> > >  	/** @flags: operation flags */
> > >  	enum xe_vma_op_flags flags;
> > > +	/** @tile_mask: Tile mask for operation */
> > > +	u8 tile_mask;
> > >
> > >  	union {
> > >  		/** @map: VMA map operation specific data */
> > > --
> > > 2.34.1
> >

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 03/13] drm/xe: Move migrate to prefetch to op_lock_and_prep function
  2024-04-19 19:52     ` Matthew Brost
@ 2024-04-23  3:32       ` Zeng, Oak
  0 siblings, 0 replies; 40+ messages in thread
From: Zeng, Oak @ 2024-04-23  3:32 UTC (permalink / raw)
  To: Brost, Matthew; +Cc: intel-xe



> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Friday, April 19, 2024 3:53 PM
> To: Zeng, Oak <oak.zeng@intel.com>
> Cc: intel-xe@lists.freedesktop.org
> Subject: Re: [PATCH 03/13] drm/xe: Move migrate to prefetch to
> op_lock_and_prep function
> 
> On Thu, Apr 18, 2024 at 01:27:13PM -0600, Zeng, Oak wrote:
> >
> >
> > > -----Original Message-----
> > > From: Brost, Matthew <matthew.brost@intel.com>
> > > Sent: Wednesday, April 10, 2024 1:41 AM
> > > To: intel-xe@lists.freedesktop.org
> > > Cc: Brost, Matthew <matthew.brost@intel.com>; Zeng, Oak
> > > <oak.zeng@intel.com>
> > > Subject: [PATCH 03/13] drm/xe: Move migrate to prefetch to
> > > op_lock_and_prep function
> > >
> > > All non-binding operations in VM bind IOCTL should be in the lock and
> > > prepare step rather than the execution step. Move prefetch to conform
> to
> > > this pattern.
> > >
> > > v2:
> > >  - Rebase
> > >  - New function names (Oak)
> > >  - Update stale comment (Oak)
> > >
> > > Cc: Oak Zeng <oak.zeng@intel.com>
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_vm.c | 30 +++++++++++++++---------------
> > >  1 file changed, 15 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 84c6b10b4b78..2c0521573154 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -2039,20 +2039,10 @@ static const u32 region_to_mem_type[] = {
> > >
> > >  static struct dma_fence *
> > >  xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
> > > -	       struct xe_exec_queue *q, u32 region,
> > > -	       struct xe_sync_entry *syncs, u32 num_syncs,
> > > -	       bool first_op, bool last_op)
> > > +	       struct xe_exec_queue *q, struct xe_sync_entry *syncs,
> > > +	       u32 num_syncs, bool first_op, bool last_op)
> >
> >
> > I am wondering, do you still need this function? The original prefetch
> function is migration + vm_bind. Now you moved the migration to
> lock_and_prepare step, only vm bind left...
> >
> > Even if you keep this function, we should change the name... it is not a
> prefetch anymore...
> >
> 
> I'd rather leave as is for the following reasons:
> 
> 1. The code is slightly different and skip the bind under certain conditions
> 2. It still implements the prefetch op so name applies
> 3. This is just a staging patch and this function gets deleted once a
> version of [1] is merged, I'd rather not squabble / nit pick code that
> is temporary. The goal to not regress behavior while making progress
> towards [1].


Yah, I eventually found this function is deleted in below series... Patch is:

Reviewed-by: Oak Zeng <oak.zeng@intel.com>
> 
> Matt
> 
> [1] https://patchwork.freedesktop.org/patch/582024/?series=125608&rev=5
> 
> > Oak
> >
> > >  {
> > >  	struct xe_exec_queue *wait_exec_queue =
> > > to_wait_exec_queue(vm, q);
> > > -	int err;
> > > -
> > > -	xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
> > > -
> > > -	if (!xe_vma_has_no_bo(vma)) {
> > > -		err = xe_bo_migrate(xe_vma_bo(vma),
> > > region_to_mem_type[region]);
> > > -		if (err)
> > > -			return ERR_PTR(err);
> > > -	}
> > >
> > >  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> > > {
> > >  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> > > num_syncs,
> > > @@ -2592,8 +2582,7 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  				     op->flags & XE_VMA_OP_LAST);
> > >  		break;
> > >  	case DRM_GPUVA_OP_PREFETCH:
> > > -		fence = xe_vm_prefetch(vm, vma, op->q, op-
> > > >prefetch.region,
> > > -				       op->syncs, op->num_syncs,
> > > +		fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op-
> > > >num_syncs,
> > >  				       op->flags & XE_VMA_OP_FIRST,
> > >  				       op->flags & XE_VMA_OP_LAST);
> > >  		break;
> > > @@ -2823,9 +2812,20 @@ static int op_lock_and_prep(struct drm_exec
> > > *exec, struct xe_vm *vm,
> > >  					    false);
> > >  		break;
> > >  	case DRM_GPUVA_OP_PREFETCH:
> > > +	{
> > > +		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > +		u32 region = op->prefetch.region;
> > > +
> > > +		xe_assert(vm->xe, region <=
> > > ARRAY_SIZE(region_to_mem_type));
> > > +
> > >  		err = vma_lock_and_validate(exec,
> > > -					    gpuva_to_vma(op-
> > > >base.prefetch.va), true);
> > > +					    gpuva_to_vma(op-
> > > >base.prefetch.va),
> > > +					    false);
> > > +		if (!err && !xe_vma_has_no_bo(vma))
> > > +			err = xe_bo_migrate(xe_vma_bo(vma),
> > > +					    region_to_mem_type[region]);
> > >  		break;
> > > +	}
> > >  	default:
> > >  		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > >  	}
> > > --
> > > 2.34.1
> >

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini
  2024-04-19 19:45     ` Matthew Brost
@ 2024-04-23  3:36       ` Zeng, Oak
  0 siblings, 0 replies; 40+ messages in thread
From: Zeng, Oak @ 2024-04-23  3:36 UTC (permalink / raw)
  To: Brost, Matthew; +Cc: intel-xe



> -----Original Message-----
> From: Brost, Matthew <matthew.brost@intel.com>
> Sent: Friday, April 19, 2024 3:45 PM
> To: Zeng, Oak <oak.zeng@intel.com>
> Cc: intel-xe@lists.freedesktop.org
> Subject: Re: [PATCH 11/13] drm/xe: Move ufence add to
> vm_bind_ioctl_ops_fini
> 
> On Fri, Apr 19, 2024 at 09:24:18AM -0600, Zeng, Oak wrote:
> >
> >
> > > -----Original Message-----
> > > From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of
> > > Matthew Brost
> > > Sent: Wednesday, April 10, 2024 1:41 AM
> > > To: intel-xe@lists.freedesktop.org
> > > Cc: Brost, Matthew <matthew.brost@intel.com>
> > > Subject: [PATCH 11/13] drm/xe: Move ufence add to
> vm_bind_ioctl_ops_fini
> > >
> > > Rather than adding a ufence to a VMA in the bind function, add the
> > > ufence to all VMAs in the IOCTL that require binds in
> > > vm_bind_ioctl_ops_install_fences.
> >
> > This is a typo right? From the codes, it should be vm_bind_ioctl_ops_fini
> >
> 
> Yes, typo. Will fix in next rev.
> 
> > I also want to make sure I understand here: so the ufence added to vma is
> *only* used to make sure last vma bind has been completed upon vma
> unbind time. So even though it is more natural to set ufence at bind function,
> it is safe to set it after all operations are submitted (vm_bind_ioctl_ops_fini
> func). No vm_bind ioctl (and the vma unbind triggered by ioctl) can go
> through *before* the last vm bind ioctl's ops finish, right?
> >
> 
> The ufence is attached to all VMAs being bound in an IOCTL. It prevents
> any of those VMAs from being unbound until the attached ufence has
> signaled (binding operation complete).
> 
> It is safe (and correct) to attach the ufence *after* operations
> submitted because we are past the point of failure and are under the
> VM->lock. The ufence could be signaled or unsignaled when attached to
> the VMA. Safe to attach signaled ufences due to ref counting. Future
> IOCTLs return -EBUSY if trying to unbind a VMA which an unsignaled
> ufence.

Thanks for explaining. Patch LGTM:

Reviewed-by: Oak Zeng <oak.zeng@intel.com>

> 
> Matt
> 
> > Oak
> >
> >
> >
> >  This will help with the transition to
> > > job 1 per VM bind IOCTL.
> > >
> > > v2:
> > >  - Rebase
> > >
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_sync.c | 15 ++++++++++++
> > >  drivers/gpu/drm/xe/xe_sync.h |  1 +
> > >  drivers/gpu/drm/xe/xe_vm.c   | 44
> ++++++++++++++++++++++++++++++--
> > > ----
> > >  3 files changed, 53 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_sync.c
> b/drivers/gpu/drm/xe/xe_sync.c
> > > index 65f1f1628235..2883d9aca404 100644
> > > --- a/drivers/gpu/drm/xe/xe_sync.c
> > > +++ b/drivers/gpu/drm/xe/xe_sync.c
> > > @@ -338,6 +338,21 @@ xe_sync_in_fence_get(struct xe_sync_entry
> *sync,
> > > int num_sync,
> > >  	return ERR_PTR(-ENOMEM);
> > >  }
> > >
> > > +/**
> > > + * __xe_sync_ufence_get() - Get user fence from user fence
> > > + * @ufence: input user fence
> > > + *
> > > + * Get a user fence reference from user fence
> > > + *
> > > + * Return: xe_user_fence pointer with reference
> > > + */
> > > +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence
> > > *ufence)
> > > +{
> > > +	user_fence_get(ufence);
> > > +
> > > +	return ufence;
> > > +}
> > > +
> > >  /**
> > >   * xe_sync_ufence_get() - Get user fence from sync
> > >   * @sync: input sync
> > > diff --git a/drivers/gpu/drm/xe/xe_sync.h
> b/drivers/gpu/drm/xe/xe_sync.h
> > > index 3e03396af2c6..006dbf780793 100644
> > > --- a/drivers/gpu/drm/xe/xe_sync.h
> > > +++ b/drivers/gpu/drm/xe/xe_sync.h
> > > @@ -37,6 +37,7 @@ static inline bool xe_sync_is_ufence(struct
> > > xe_sync_entry *sync)
> > >  	return !!sync->ufence;
> > >  }
> > >
> > > +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence
> > > *ufence);
> > >  struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry
> *sync);
> > >  void xe_sync_ufence_put(struct xe_user_fence *ufence);
> > >  int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 0319e70577fe..1da68a03407b 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -1900,17 +1900,10 @@ xe_vm_bind(struct xe_vm *vm, struct
> xe_vma
> > > *vma, struct xe_exec_queue *q,
> > >  {
> > >  	struct dma_fence *fence;
> > >  	struct xe_exec_queue *wait_exec_queue =
> > > to_wait_exec_queue(vm, q);
> > > -	struct xe_user_fence *ufence;
> > >
> > >  	xe_vm_assert_held(vm);
> > >  	xe_bo_assert_held(bo);
> > >
> > > -	ufence = find_ufence_get(syncs, num_syncs);
> > > -	if (vma->ufence && ufence)
> > > -		xe_sync_ufence_put(vma->ufence);
> > > -
> > > -	vma->ufence = ufence ?: vma->ufence;
> > > -
> > >  	if (immediate) {
> > >  		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > > tile_mask,
> > >  				       first_op, last_op);
> > > @@ -2918,20 +2911,57 @@ static struct dma_fence *ops_execute(struct
> > > xe_vm *vm,
> > >  	return fence;
> > >  }
> > >
> > > +static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence
> > > *ufence)
> > > +{
> > > +	if (vma->ufence)
> > > +		xe_sync_ufence_put(vma->ufence);
> > > +	vma->ufence = __xe_sync_ufence_get(ufence);
> > > +}
> > > +
> > > +static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
> > > +			  struct xe_user_fence *ufence)
> > > +{
> > > +	switch (op->base.op) {
> > > +	case DRM_GPUVA_OP_MAP:
> > > +		vma_add_ufence(op->map.vma, ufence);
> > > +		break;
> > > +	case DRM_GPUVA_OP_REMAP:
> > > +		if (op->remap.prev)
> > > +			vma_add_ufence(op->remap.prev, ufence);
> > > +		if (op->remap.next)
> > > +			vma_add_ufence(op->remap.next, ufence);
> > > +		break;
> > > +	case DRM_GPUVA_OP_UNMAP:
> > > +		break;
> > > +	case DRM_GPUVA_OP_PREFETCH:
> > > +		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va),
> > > ufence);
> > > +		break;
> > > +	default:
> > > +		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > > +	}
> > > +}
> > > +
> > >  static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct
> xe_vma_ops
> > > *vops,
> > >  				   struct dma_fence *fence)
> > >  {
> > >  	struct xe_exec_queue *wait_exec_queue =
> > > to_wait_exec_queue(vm, vops->q);
> > > +	struct xe_user_fence *ufence;
> > >  	struct xe_vma_op *op;
> > >  	int i;
> > >
> > > +	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
> > >  	list_for_each_entry(op, &vops->list, link) {
> > > +		if (ufence)
> > > +			op_add_ufence(vm, op, ufence);
> > > +
> > >  		if (op->base.op == DRM_GPUVA_OP_UNMAP)
> > >  			xe_vma_destroy(gpuva_to_vma(op-
> > > >base.unmap.va), fence);
> > >  		else if (op->base.op == DRM_GPUVA_OP_REMAP)
> > >  			xe_vma_destroy(gpuva_to_vma(op-
> > > >base.remap.unmap->va),
> > >  				       fence);
> > >  	}
> > > +	if (ufence)
> > > +		xe_sync_ufence_put(ufence);
> > >  	for (i = 0; i < vops->num_syncs; i++)
> > >  		xe_sync_entry_signal(vops->syncs + i, fence);
> > >  	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
> > > --
> > > 2.34.1
> >

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2024-04-23  3:36 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-10  5:40 [PATCH 00/13] Prep patches for 1 job per VM bind IOCTL Matthew Brost
2024-04-10  5:40 ` [PATCH 01/13] drm/xe: Lock all gpuva ops during " Matthew Brost
2024-04-16 15:51   ` Zeng, Oak
2024-04-16 17:02     ` Matthew Brost
2024-04-10  5:40 ` [PATCH 02/13] drm/xe: Add ops_execute function which returns a fence Matthew Brost
2024-04-18 16:16   ` Zeng, Oak
2024-04-18 19:36     ` Matthew Brost
2024-04-23  3:09       ` Zeng, Oak
2024-04-10  5:40 ` [PATCH 03/13] drm/xe: Move migrate to prefetch to op_lock_and_prep function Matthew Brost
2024-04-18 19:27   ` Zeng, Oak
2024-04-19 19:52     ` Matthew Brost
2024-04-23  3:32       ` Zeng, Oak
2024-04-10  5:40 ` [PATCH 04/13] drm/xe: Add struct xe_vma_ops abstraction Matthew Brost
2024-04-10  5:40 ` [PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind Matthew Brost
2024-04-19  3:43   ` Zeng, Oak
2024-04-19  4:14     ` Matthew Brost
2024-04-23  3:17       ` Zeng, Oak
2024-04-10  5:40 ` [PATCH 06/13] drm/xe: Simplify VM bind IOCTL error handling and cleanup Matthew Brost
2024-04-19  4:19   ` Zeng, Oak
2024-04-19 19:16     ` Matthew Brost
2024-04-23  3:22       ` Zeng, Oak
2024-04-10  5:40 ` [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds Matthew Brost
2024-04-19 14:22   ` Zeng, Oak
2024-04-19 19:33     ` Matthew Brost
2024-04-23  3:27       ` Zeng, Oak
2024-04-10  5:40 ` [PATCH 08/13] drm/xe: Add some members to xe_vma_ops Matthew Brost
2024-04-19 14:24   ` Zeng, Oak
2024-04-10  5:40 ` [PATCH 09/13] drm/xe: Add vm_bind_ioctl_ops_fini helper Matthew Brost
2024-04-19 14:51   ` Zeng, Oak
2024-04-10  5:40 ` [PATCH 10/13] drm/xe: Move ufence check to op_lock Matthew Brost
2024-04-19 14:56   ` Zeng, Oak
2024-04-19 19:34     ` Matthew Brost
2024-04-10  5:40 ` [PATCH 11/13] drm/xe: Move ufence add to vm_bind_ioctl_ops_fini Matthew Brost
2024-04-19 15:24   ` Zeng, Oak
2024-04-19 19:45     ` Matthew Brost
2024-04-23  3:36       ` Zeng, Oak
2024-04-10  5:40 ` [PATCH 12/13] drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this Matthew Brost
2024-04-19 16:00   ` Zeng, Oak
2024-04-10  5:40 ` [PATCH 13/13] drm/xe: Delete PT update selftest Matthew Brost
2024-04-10  6:28 ` ✗ CI.Patch_applied: failure for Prep patches for 1 job per VM bind IOCTL Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.