All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: Remove asynchronous vma binding
@ 2021-04-20 10:34 ` Maarten Lankhorst
  0 siblings, 0 replies; 9+ messages in thread
From: Maarten Lankhorst @ 2021-04-20 10:34 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, dri-devel

Commit e3793468b466 ("drm/i915: Use the async worker to avoid reclaim
tainting the ggtt->mutex") moves the vma binding to dma_fence_work,
but dma_fence_work has has signalling fence semantics.

On braswell, we can call stop_machine inside fence_work, causing a splat
because memory allocation inside stop_machine is allowed.

This patch does not fix that lockdep splat yet, but will allow the next
patch to remove it.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c |   3 -
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c       |   1 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c       |   1 -
 drivers/gpu/drm/i915/gt/intel_ggtt.c       |   4 -
 drivers/gpu/drm/i915/gt/intel_gtt.h        |   2 -
 drivers/gpu/drm/i915/i915_gem.c            |   6 -
 drivers/gpu/drm/i915/i915_vma.c            | 174 +++------------------
 drivers/gpu/drm/i915/i915_vma.h            |   5 +-
 drivers/gpu/drm/i915/i915_vma_types.h      |   3 -
 9 files changed, 21 insertions(+), 178 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index b0597de206de..ec04df0a3b89 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -505,9 +505,6 @@ static void dbg_poison(struct i915_ggtt *ggtt,
 	if (!drm_mm_node_allocated(&ggtt->error_capture))
 		return;
 
-	if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND)
-		return; /* beware stop_machine() inversion */
-
 	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
 
 	mutex_lock(&ggtt->error_mutex);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index e08dff376339..0c5a9a767849 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -436,7 +436,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
 	ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
 	ppgtt->base.vm.top = 1;
 
-	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
 	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
 	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 176c19633412..92f8a23e66cc 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -736,7 +736,6 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
 			goto err_free_pd;
 	}
 
-	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
 	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 670c1271e7d5..e1ec6edae1fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -127,7 +127,6 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-		i915_vma_wait_for_bind(vma);
 
 		if (i915_vma_is_pinned(vma))
 			continue;
@@ -671,7 +670,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
 	ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
 
 	ggtt->alias = ppgtt;
-	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
 
 	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
 	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
@@ -911,8 +909,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
 		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
 		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
-		ggtt->vm.bind_async_flags =
-			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 	}
 
 	ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index e67e34e17913..d9d2ca8b4b61 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -230,8 +230,6 @@ struct i915_address_space {
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 	u64 reserved;		/* size addr space reserved */
 
-	unsigned int bind_async_flags;
-
 	/*
 	 * Each active user context has its own address space (in full-ppgtt).
 	 * Since the vm may be shared between multiple contexts, we count how
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b23f58e94cfb..4639c47c038b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -946,12 +946,6 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
 		mutex_unlock(&ggtt->vm.mutex);
 	}
 
-	ret = i915_vma_wait_for_bind(vma);
-	if (ret) {
-		i915_vma_unpin(vma);
-		return ERR_PTR(ret);
-	}
-
 	return vma;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 07490db51cdc..e4b2590d41ce 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -289,79 +289,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
-struct i915_vma_work {
-	struct dma_fence_work base;
-	struct i915_address_space *vm;
-	struct i915_vm_pt_stash stash;
-	struct i915_vma *vma;
-	struct drm_i915_gem_object *pinned;
-	struct i915_sw_dma_fence_cb cb;
-	enum i915_cache_level cache_level;
-	unsigned int flags;
-};
-
-static int __vma_bind(struct dma_fence_work *work)
-{
-	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
-	struct i915_vma *vma = vw->vma;
-
-	vma->ops->bind_vma(vw->vm, &vw->stash,
-			   vma, vw->cache_level, vw->flags);
-	return 0;
-}
-
-static void __vma_release(struct dma_fence_work *work)
-{
-	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
-
-	if (vw->pinned) {
-		__i915_gem_object_unpin_pages(vw->pinned);
-		i915_gem_object_put(vw->pinned);
-	}
-
-	i915_vm_free_pt_stash(vw->vm, &vw->stash);
-	i915_vm_put(vw->vm);
-}
-
-static const struct dma_fence_work_ops bind_ops = {
-	.name = "bind",
-	.work = __vma_bind,
-	.release = __vma_release,
-};
-
-struct i915_vma_work *i915_vma_work(void)
-{
-	struct i915_vma_work *vw;
-
-	vw = kzalloc(sizeof(*vw), GFP_KERNEL);
-	if (!vw)
-		return NULL;
-
-	dma_fence_work_init(&vw->base, &bind_ops);
-	vw->base.dma.error = -EAGAIN; /* disable the worker by default */
-
-	return vw;
-}
-
-int i915_vma_wait_for_bind(struct i915_vma *vma)
-{
-	int err = 0;
-
-	if (rcu_access_pointer(vma->active.excl.fence)) {
-		struct dma_fence *fence;
-
-		rcu_read_lock();
-		fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
-		rcu_read_unlock();
-		if (fence) {
-			err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
-			dma_fence_put(fence);
-		}
-	}
-
-	return err;
-}
-
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
@@ -375,8 +302,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
  */
 int i915_vma_bind(struct i915_vma *vma,
 		  enum i915_cache_level cache_level,
-		  u32 flags,
-		  struct i915_vma_work *work)
+		  u32 flags, struct i915_vm_pt_stash *stash)
 {
 	u32 bind_flags;
 	u32 vma_flags;
@@ -405,39 +331,7 @@ int i915_vma_bind(struct i915_vma *vma,
 	GEM_BUG_ON(!vma->pages);
 
 	trace_i915_vma_bind(vma, bind_flags);
-	if (work && bind_flags & vma->vm->bind_async_flags) {
-		struct dma_fence *prev;
-
-		work->vma = vma;
-		work->cache_level = cache_level;
-		work->flags = bind_flags;
-
-		/*
-		 * Note we only want to chain up to the migration fence on
-		 * the pages (not the object itself). As we don't track that,
-		 * yet, we have to use the exclusive fence instead.
-		 *
-		 * Also note that we do not want to track the async vma as
-		 * part of the obj->resv->excl_fence as it only affects
-		 * execution and not content or object's backing store lifetime.
-		 */
-		prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
-		if (prev) {
-			__i915_sw_fence_await_dma_fence(&work->base.chain,
-							prev,
-							&work->cb);
-			dma_fence_put(prev);
-		}
-
-		work->base.dma.error = 0; /* enable the queue_work() */
-
-		if (vma->obj) {
-			__i915_gem_object_pin_pages(vma->obj);
-			work->pinned = i915_gem_object_get(vma->obj);
-		}
-	} else {
-		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
-	}
+	vma->ops->bind_vma(vma->vm, stash, vma, cache_level, bind_flags);
 
 	atomic_or(bind_flags, &vma->flags);
 	return 0;
@@ -531,9 +425,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 	if (!drm_mm_node_allocated(&vma->node))
 		return false;
 
-	if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
-		return true;
-
 	if (vma->node.size < size)
 		return true;
 
@@ -752,7 +643,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
 		if (unlikely(flags & ~bound))
 			return false;
 
-		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
+		if (unlikely(bound & I915_VMA_OVERFLOW))
 			return false;
 
 		if (!(bound & I915_VMA_PIN_MASK))
@@ -770,7 +661,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
 	 */
 	mutex_lock(&vma->vm->mutex);
 	do {
-		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
+		if (unlikely(bound & I915_VMA_OVERFLOW)) {
 			pinned = false;
 			break;
 		}
@@ -857,10 +748,10 @@ static void vma_unbind_pages(struct i915_vma *vma)
 int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		    u64 size, u64 alignment, u64 flags)
 {
-	struct i915_vma_work *work = NULL;
 	intel_wakeref_t wakeref = 0;
 	unsigned int bound;
 	int err;
+	struct i915_vm_pt_stash stash = {};
 
 #ifdef CONFIG_PROVE_LOCKING
 	if (debug_locks && !WARN_ON(!ww) && vma->resv)
@@ -883,33 +774,21 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	if (flags & PIN_GLOBAL)
 		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
 
-	if (flags & vma->vm->bind_async_flags) {
-		/* lock VM */
-		err = i915_vm_lock_objects(vma->vm, ww);
+	if (vma->vm->allocate_va_range) {
+		err = i915_vm_alloc_pt_stash(vma->vm,
+					     &stash,
+					     vma->size);
 		if (err)
 			goto err_rpm;
 
-		work = i915_vma_work();
-		if (!work) {
-			err = -ENOMEM;
+		err = i915_vm_lock_objects(vma->vm, ww);
+		if (err)
 			goto err_rpm;
-		}
 
-		work->vm = i915_vm_get(vma->vm);
-
-		/* Allocate enough page directories to used PTE */
-		if (vma->vm->allocate_va_range) {
-			err = i915_vm_alloc_pt_stash(vma->vm,
-						     &work->stash,
-						     vma->size);
-			if (err)
-				goto err_fence;
-
-			err = i915_vm_pin_pt_stash(vma->vm,
-						   &work->stash);
-			if (err)
-				goto err_fence;
-		}
+		err = i915_vm_pin_pt_stash(vma->vm,
+					   &stash);
+		if (err)
+			goto err_rpm;
 	}
 
 	/*
@@ -932,7 +811,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	err = mutex_lock_interruptible_nested(&vma->vm->mutex,
 					      !(flags & PIN_GLOBAL));
 	if (err)
-		goto err_fence;
+		goto err_rpm;
 
 	/* No more allocations allowed now we hold vm->mutex */
 
@@ -942,11 +821,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	}
 
 	bound = atomic_read(&vma->flags);
-	if (unlikely(bound & I915_VMA_ERROR)) {
-		err = -ENOMEM;
-		goto err_unlock;
-	}
-
 	if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
 		err = -EAGAIN; /* pins are meant to be fairly temporary */
 		goto err_unlock;
@@ -973,7 +847,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	GEM_BUG_ON(!vma->pages);
 	err = i915_vma_bind(vma,
 			    vma->obj ? vma->obj->cache_level : 0,
-			    flags, work);
+			    flags, &stash);
 	if (err)
 		goto err_remove;
 
@@ -996,10 +870,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	i915_active_release(&vma->active);
 err_unlock:
 	mutex_unlock(&vma->vm->mutex);
-err_fence:
-	if (work)
-		dma_fence_work_commit_imm(&work->base);
 err_rpm:
+	i915_vm_free_pt_stash(vma->vm, &stash);
 	if (wakeref)
 		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
 	vma_put_pages(vma);
@@ -1034,14 +906,8 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 			err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
 		else
 			err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
-		if (err != -ENOSPC) {
-			if (!err) {
-				err = i915_vma_wait_for_bind(vma);
-				if (err)
-					i915_vma_unpin(vma);
-			}
+		if (err != -ENOSPC)
 			return err;
-		}
 
 		/* Unlike i915_vma_pin, we don't take no for an answer! */
 		flush_idle_contexts(vm->gt);
@@ -1306,7 +1172,7 @@ void __i915_vma_evict(struct i915_vma *vma)
 		trace_i915_vma_unbind(vma);
 		vma->ops->unbind_vma(vma->vm, vma);
 	}
-	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
+	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_GGTT_WRITE),
 		   &vma->flags);
 
 	i915_vma_detach(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 8df784a026d2..d1d0fc76ef99 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -195,11 +195,10 @@ i915_vma_compare(struct i915_vma *vma,
 	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
 }
 
-struct i915_vma_work *i915_vma_work(void);
 int i915_vma_bind(struct i915_vma *vma,
 		  enum i915_cache_level cache_level,
 		  u32 flags,
-		  struct i915_vma_work *work);
+		  struct i915_vm_pt_stash *stash);
 
 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
 bool i915_vma_misplaced(const struct i915_vma *vma,
@@ -418,8 +417,6 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
 void i915_vma_make_shrinkable(struct i915_vma *vma);
 void i915_vma_make_purgeable(struct i915_vma *vma);
 
-int i915_vma_wait_for_bind(struct i915_vma *vma);
-
 static inline int i915_vma_sync(struct i915_vma *vma)
 {
 	/* Wait for the asynchronous bindings and pending GPU reads */
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index 6b1bfa230b82..82868a755a96 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -240,9 +240,6 @@ struct i915_vma {
 
 #define I915_VMA_ALLOC_BIT	12
 
-#define I915_VMA_ERROR_BIT	13
-#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
-
 #define I915_VMA_GGTT_BIT	14
 #define I915_VMA_CAN_FENCE_BIT	15
 #define I915_VMA_USERFAULT_BIT	16
-- 
2.31.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-gfx] [PATCH 1/2] drm/i915: Remove asynchronous vma binding
@ 2021-04-20 10:34 ` Maarten Lankhorst
  0 siblings, 0 replies; 9+ messages in thread
From: Maarten Lankhorst @ 2021-04-20 10:34 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, dri-devel

Commit e3793468b466 ("drm/i915: Use the async worker to avoid reclaim
tainting the ggtt->mutex") moves the vma binding to dma_fence_work,
but dma_fence_work has has signalling fence semantics.

On braswell, we can call stop_machine inside fence_work, causing a splat
because memory allocation inside stop_machine is allowed.

This patch does not fix that lockdep splat yet, but will allow the next
patch to remove it.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c |   3 -
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c       |   1 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c       |   1 -
 drivers/gpu/drm/i915/gt/intel_ggtt.c       |   4 -
 drivers/gpu/drm/i915/gt/intel_gtt.h        |   2 -
 drivers/gpu/drm/i915/i915_gem.c            |   6 -
 drivers/gpu/drm/i915/i915_vma.c            | 174 +++------------------
 drivers/gpu/drm/i915/i915_vma.h            |   5 +-
 drivers/gpu/drm/i915/i915_vma_types.h      |   3 -
 9 files changed, 21 insertions(+), 178 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index b0597de206de..ec04df0a3b89 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -505,9 +505,6 @@ static void dbg_poison(struct i915_ggtt *ggtt,
 	if (!drm_mm_node_allocated(&ggtt->error_capture))
 		return;
 
-	if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND)
-		return; /* beware stop_machine() inversion */
-
 	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
 
 	mutex_lock(&ggtt->error_mutex);
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index e08dff376339..0c5a9a767849 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -436,7 +436,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
 	ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
 	ppgtt->base.vm.top = 1;
 
-	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
 	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
 	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 176c19633412..92f8a23e66cc 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -736,7 +736,6 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
 			goto err_free_pd;
 	}
 
-	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
 	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 670c1271e7d5..e1ec6edae1fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -127,7 +127,6 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-		i915_vma_wait_for_bind(vma);
 
 		if (i915_vma_is_pinned(vma))
 			continue;
@@ -671,7 +670,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
 	ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
 
 	ggtt->alias = ppgtt;
-	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
 
 	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
 	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
@@ -911,8 +909,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
 		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
 		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
-		ggtt->vm.bind_async_flags =
-			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 	}
 
 	ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index e67e34e17913..d9d2ca8b4b61 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -230,8 +230,6 @@ struct i915_address_space {
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 	u64 reserved;		/* size addr space reserved */
 
-	unsigned int bind_async_flags;
-
 	/*
 	 * Each active user context has its own address space (in full-ppgtt).
 	 * Since the vm may be shared between multiple contexts, we count how
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b23f58e94cfb..4639c47c038b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -946,12 +946,6 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
 		mutex_unlock(&ggtt->vm.mutex);
 	}
 
-	ret = i915_vma_wait_for_bind(vma);
-	if (ret) {
-		i915_vma_unpin(vma);
-		return ERR_PTR(ret);
-	}
-
 	return vma;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 07490db51cdc..e4b2590d41ce 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -289,79 +289,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
-struct i915_vma_work {
-	struct dma_fence_work base;
-	struct i915_address_space *vm;
-	struct i915_vm_pt_stash stash;
-	struct i915_vma *vma;
-	struct drm_i915_gem_object *pinned;
-	struct i915_sw_dma_fence_cb cb;
-	enum i915_cache_level cache_level;
-	unsigned int flags;
-};
-
-static int __vma_bind(struct dma_fence_work *work)
-{
-	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
-	struct i915_vma *vma = vw->vma;
-
-	vma->ops->bind_vma(vw->vm, &vw->stash,
-			   vma, vw->cache_level, vw->flags);
-	return 0;
-}
-
-static void __vma_release(struct dma_fence_work *work)
-{
-	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
-
-	if (vw->pinned) {
-		__i915_gem_object_unpin_pages(vw->pinned);
-		i915_gem_object_put(vw->pinned);
-	}
-
-	i915_vm_free_pt_stash(vw->vm, &vw->stash);
-	i915_vm_put(vw->vm);
-}
-
-static const struct dma_fence_work_ops bind_ops = {
-	.name = "bind",
-	.work = __vma_bind,
-	.release = __vma_release,
-};
-
-struct i915_vma_work *i915_vma_work(void)
-{
-	struct i915_vma_work *vw;
-
-	vw = kzalloc(sizeof(*vw), GFP_KERNEL);
-	if (!vw)
-		return NULL;
-
-	dma_fence_work_init(&vw->base, &bind_ops);
-	vw->base.dma.error = -EAGAIN; /* disable the worker by default */
-
-	return vw;
-}
-
-int i915_vma_wait_for_bind(struct i915_vma *vma)
-{
-	int err = 0;
-
-	if (rcu_access_pointer(vma->active.excl.fence)) {
-		struct dma_fence *fence;
-
-		rcu_read_lock();
-		fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
-		rcu_read_unlock();
-		if (fence) {
-			err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
-			dma_fence_put(fence);
-		}
-	}
-
-	return err;
-}
-
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
@@ -375,8 +302,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
  */
 int i915_vma_bind(struct i915_vma *vma,
 		  enum i915_cache_level cache_level,
-		  u32 flags,
-		  struct i915_vma_work *work)
+		  u32 flags, struct i915_vm_pt_stash *stash)
 {
 	u32 bind_flags;
 	u32 vma_flags;
@@ -405,39 +331,7 @@ int i915_vma_bind(struct i915_vma *vma,
 	GEM_BUG_ON(!vma->pages);
 
 	trace_i915_vma_bind(vma, bind_flags);
-	if (work && bind_flags & vma->vm->bind_async_flags) {
-		struct dma_fence *prev;
-
-		work->vma = vma;
-		work->cache_level = cache_level;
-		work->flags = bind_flags;
-
-		/*
-		 * Note we only want to chain up to the migration fence on
-		 * the pages (not the object itself). As we don't track that,
-		 * yet, we have to use the exclusive fence instead.
-		 *
-		 * Also note that we do not want to track the async vma as
-		 * part of the obj->resv->excl_fence as it only affects
-		 * execution and not content or object's backing store lifetime.
-		 */
-		prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
-		if (prev) {
-			__i915_sw_fence_await_dma_fence(&work->base.chain,
-							prev,
-							&work->cb);
-			dma_fence_put(prev);
-		}
-
-		work->base.dma.error = 0; /* enable the queue_work() */
-
-		if (vma->obj) {
-			__i915_gem_object_pin_pages(vma->obj);
-			work->pinned = i915_gem_object_get(vma->obj);
-		}
-	} else {
-		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
-	}
+	vma->ops->bind_vma(vma->vm, stash, vma, cache_level, bind_flags);
 
 	atomic_or(bind_flags, &vma->flags);
 	return 0;
@@ -531,9 +425,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 	if (!drm_mm_node_allocated(&vma->node))
 		return false;
 
-	if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
-		return true;
-
 	if (vma->node.size < size)
 		return true;
 
@@ -752,7 +643,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
 		if (unlikely(flags & ~bound))
 			return false;
 
-		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
+		if (unlikely(bound & I915_VMA_OVERFLOW))
 			return false;
 
 		if (!(bound & I915_VMA_PIN_MASK))
@@ -770,7 +661,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
 	 */
 	mutex_lock(&vma->vm->mutex);
 	do {
-		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
+		if (unlikely(bound & I915_VMA_OVERFLOW)) {
 			pinned = false;
 			break;
 		}
@@ -857,10 +748,10 @@ static void vma_unbind_pages(struct i915_vma *vma)
 int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		    u64 size, u64 alignment, u64 flags)
 {
-	struct i915_vma_work *work = NULL;
 	intel_wakeref_t wakeref = 0;
 	unsigned int bound;
 	int err;
+	struct i915_vm_pt_stash stash = {};
 
 #ifdef CONFIG_PROVE_LOCKING
 	if (debug_locks && !WARN_ON(!ww) && vma->resv)
@@ -883,33 +774,21 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	if (flags & PIN_GLOBAL)
 		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
 
-	if (flags & vma->vm->bind_async_flags) {
-		/* lock VM */
-		err = i915_vm_lock_objects(vma->vm, ww);
+	if (vma->vm->allocate_va_range) {
+		err = i915_vm_alloc_pt_stash(vma->vm,
+					     &stash,
+					     vma->size);
 		if (err)
 			goto err_rpm;
 
-		work = i915_vma_work();
-		if (!work) {
-			err = -ENOMEM;
+		err = i915_vm_lock_objects(vma->vm, ww);
+		if (err)
 			goto err_rpm;
-		}
 
-		work->vm = i915_vm_get(vma->vm);
-
-		/* Allocate enough page directories to used PTE */
-		if (vma->vm->allocate_va_range) {
-			err = i915_vm_alloc_pt_stash(vma->vm,
-						     &work->stash,
-						     vma->size);
-			if (err)
-				goto err_fence;
-
-			err = i915_vm_pin_pt_stash(vma->vm,
-						   &work->stash);
-			if (err)
-				goto err_fence;
-		}
+		err = i915_vm_pin_pt_stash(vma->vm,
+					   &stash);
+		if (err)
+			goto err_rpm;
 	}
 
 	/*
@@ -932,7 +811,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	err = mutex_lock_interruptible_nested(&vma->vm->mutex,
 					      !(flags & PIN_GLOBAL));
 	if (err)
-		goto err_fence;
+		goto err_rpm;
 
 	/* No more allocations allowed now we hold vm->mutex */
 
@@ -942,11 +821,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	}
 
 	bound = atomic_read(&vma->flags);
-	if (unlikely(bound & I915_VMA_ERROR)) {
-		err = -ENOMEM;
-		goto err_unlock;
-	}
-
 	if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
 		err = -EAGAIN; /* pins are meant to be fairly temporary */
 		goto err_unlock;
@@ -973,7 +847,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	GEM_BUG_ON(!vma->pages);
 	err = i915_vma_bind(vma,
 			    vma->obj ? vma->obj->cache_level : 0,
-			    flags, work);
+			    flags, &stash);
 	if (err)
 		goto err_remove;
 
@@ -996,10 +870,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	i915_active_release(&vma->active);
 err_unlock:
 	mutex_unlock(&vma->vm->mutex);
-err_fence:
-	if (work)
-		dma_fence_work_commit_imm(&work->base);
 err_rpm:
+	i915_vm_free_pt_stash(vma->vm, &stash);
 	if (wakeref)
 		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
 	vma_put_pages(vma);
@@ -1034,14 +906,8 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 			err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
 		else
 			err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
-		if (err != -ENOSPC) {
-			if (!err) {
-				err = i915_vma_wait_for_bind(vma);
-				if (err)
-					i915_vma_unpin(vma);
-			}
+		if (err != -ENOSPC)
 			return err;
-		}
 
 		/* Unlike i915_vma_pin, we don't take no for an answer! */
 		flush_idle_contexts(vm->gt);
@@ -1306,7 +1172,7 @@ void __i915_vma_evict(struct i915_vma *vma)
 		trace_i915_vma_unbind(vma);
 		vma->ops->unbind_vma(vma->vm, vma);
 	}
-	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
+	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_GGTT_WRITE),
 		   &vma->flags);
 
 	i915_vma_detach(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 8df784a026d2..d1d0fc76ef99 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -195,11 +195,10 @@ i915_vma_compare(struct i915_vma *vma,
 	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
 }
 
-struct i915_vma_work *i915_vma_work(void);
 int i915_vma_bind(struct i915_vma *vma,
 		  enum i915_cache_level cache_level,
 		  u32 flags,
-		  struct i915_vma_work *work);
+		  struct i915_vm_pt_stash *stash);
 
 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
 bool i915_vma_misplaced(const struct i915_vma *vma,
@@ -418,8 +417,6 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
 void i915_vma_make_shrinkable(struct i915_vma *vma);
 void i915_vma_make_purgeable(struct i915_vma *vma);
 
-int i915_vma_wait_for_bind(struct i915_vma *vma);
-
 static inline int i915_vma_sync(struct i915_vma *vma)
 {
 	/* Wait for the asynchronous bindings and pending GPU reads */
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index 6b1bfa230b82..82868a755a96 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -240,9 +240,6 @@ struct i915_vma {
 
 #define I915_VMA_ALLOC_BIT	12
 
-#define I915_VMA_ERROR_BIT	13
-#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
-
 #define I915_VMA_GGTT_BIT	14
 #define I915_VMA_CAN_FENCE_BIT	15
 #define I915_VMA_USERFAULT_BIT	16
-- 
2.31.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/i915: Use trylock in shrinker for ggtt on bsw vt-d and bxt, v2.
  2021-04-20 10:34 ` [Intel-gfx] " Maarten Lankhorst
@ 2021-04-20 10:34   ` Maarten Lankhorst
  -1 siblings, 0 replies; 9+ messages in thread
From: Maarten Lankhorst @ 2021-04-20 10:34 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel

The stop_machine() lock may allocate memory, but is called inside
vm->mutex, which is taken in the shrinker. This will cause a lockdep
splat, as can be seen below:

<4>[  462.585762] ======================================================
<4>[  462.585768] WARNING: possible circular locking dependency detected
<4>[  462.585773] 5.12.0-rc5-CI-Trybot_7644+ #1 Tainted: G     U
<4>[  462.585779] ------------------------------------------------------
<4>[  462.585783] i915_selftest/5540 is trying to acquire lock:
<4>[  462.585788] ffffffff826440b0 (cpu_hotplug_lock){++++}-{0:0}, at: stop_machine+0x12/0x30
<4>[  462.585814]
                  but task is already holding lock:
<4>[  462.585818] ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915]
<4>[  462.586301]
                  which lock already depends on the new lock.

<4>[  462.586305]
                  the existing dependency chain (in reverse order) is:
<4>[  462.586309]
                  -> #2 (&vm->mutex/1){+.+.}-{3:3}:
<4>[  462.586323]        i915_gem_shrinker_taints_mutex+0x2d/0x50 [i915]
<4>[  462.586719]        i915_address_space_init+0x12d/0x130 [i915]
<4>[  462.587092]        ppgtt_init+0x4e/0x80 [i915]
<4>[  462.587467]        gen8_ppgtt_create+0x3e/0x5c0 [i915]
<4>[  462.587828]        i915_ppgtt_create+0x28/0xf0 [i915]
<4>[  462.588203]        intel_gt_init+0x123/0x370 [i915]
<4>[  462.588572]        i915_gem_init+0x129/0x1f0 [i915]
<4>[  462.588971]        i915_driver_probe+0x753/0xd80 [i915]
<4>[  462.589320]        i915_pci_probe+0x43/0x1d0 [i915]
<4>[  462.589671]        pci_device_probe+0x9e/0x110
<4>[  462.589680]        really_probe+0xea/0x410
<4>[  462.589690]        driver_probe_device+0xd9/0x140
<4>[  462.589697]        device_driver_attach+0x4a/0x50
<4>[  462.589704]        __driver_attach+0x83/0x140
<4>[  462.589711]        bus_for_each_dev+0x75/0xc0
<4>[  462.589718]        bus_add_driver+0x14b/0x1f0
<4>[  462.589724]        driver_register+0x66/0xb0
<4>[  462.589731]        i915_init+0x70/0x87 [i915]
<4>[  462.590053]        do_one_initcall+0x56/0x2e0
<4>[  462.590061]        do_init_module+0x55/0x200
<4>[  462.590068]        load_module+0x2703/0x2990
<4>[  462.590074]        __do_sys_finit_module+0xad/0x110
<4>[  462.590080]        do_syscall_64+0x33/0x80
<4>[  462.590089]        entry_SYSCALL_64_after_hwframe+0x44/0xae
<4>[  462.590096]
                  -> #1 (fs_reclaim){+.+.}-{0:0}:
<4>[  462.590109]        fs_reclaim_acquire+0x9f/0xd0
<4>[  462.590118]        kmem_cache_alloc_trace+0x3d/0x430
<4>[  462.590126]        intel_cpuc_prepare+0x3b/0x1b0
<4>[  462.590133]        cpuhp_invoke_callback+0x9e/0x890
<4>[  462.590141]        _cpu_up+0xa4/0x130
<4>[  462.590147]        cpu_up+0x82/0x90
<4>[  462.590153]        bringup_nonboot_cpus+0x4a/0x60
<4>[  462.590159]        smp_init+0x21/0x5c
<4>[  462.590167]        kernel_init_freeable+0x8a/0x1b7
<4>[  462.590175]        kernel_init+0x5/0xff
<4>[  462.590181]        ret_from_fork+0x22/0x30
<4>[  462.590187]
                  -> #0 (cpu_hotplug_lock){++++}-{0:0}:
<4>[  462.590199]        __lock_acquire+0x1520/0x2590
<4>[  462.590207]        lock_acquire+0xd1/0x3d0
<4>[  462.590213]        cpus_read_lock+0x39/0xc0
<4>[  462.590219]        stop_machine+0x12/0x30
<4>[  462.590226]        bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915]
<4>[  462.590601]        ggtt_bind_vma+0x5d/0x80 [i915]
<4>[  462.590970]        i915_vma_bind+0xdc/0x1c0 [i915]
<4>[  462.591374]        i915_vma_pin_ww+0x435/0xb40 [i915]
<4>[  462.591779]        make_obj_busy+0xcb/0x330 [i915]
<4>[  462.592170]        igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915]
<4>[  462.592562]        __i915_subtests.cold.7+0x42/0x92 [i915]
<4>[  462.592995]        __run_selftests.part.3+0x10d/0x172 [i915]
<4>[  462.593428]        i915_live_selftests.cold.5+0x1f/0x47 [i915]
<4>[  462.593860]        i915_pci_probe+0x93/0x1d0 [i915]
<4>[  462.594210]        pci_device_probe+0x9e/0x110
<4>[  462.594217]        really_probe+0xea/0x410
<4>[  462.594226]        driver_probe_device+0xd9/0x140
<4>[  462.594233]        device_driver_attach+0x4a/0x50
<4>[  462.594240]        __driver_attach+0x83/0x140
<4>[  462.594247]        bus_for_each_dev+0x75/0xc0
<4>[  462.594254]        bus_add_driver+0x14b/0x1f0
<4>[  462.594260]        driver_register+0x66/0xb0
<4>[  462.594267]        i915_init+0x70/0x87 [i915]
<4>[  462.594586]        do_one_initcall+0x56/0x2e0
<4>[  462.594592]        do_init_module+0x55/0x200
<4>[  462.594599]        load_module+0x2703/0x2990
<4>[  462.594605]        __do_sys_finit_module+0xad/0x110
<4>[  462.594612]        do_syscall_64+0x33/0x80
<4>[  462.594618]        entry_SYSCALL_64_after_hwframe+0x44/0xae
<4>[  462.594625]
                  other info that might help us debug this:

<4>[  462.594629] Chain exists of:
                    cpu_hotplug_lock --> fs_reclaim --> &vm->mutex/1

<4>[  462.594645]  Possible unsafe locking scenario:

<4>[  462.594648]        CPU0                    CPU1
<4>[  462.594652]        ----                    ----
<4>[  462.594655]   lock(&vm->mutex/1);
<4>[  462.594664]                                lock(fs_reclaim);
<4>[  462.594671]                                lock(&vm->mutex/1);
<4>[  462.594679]   lock(cpu_hotplug_lock);
<4>[  462.594686]
                   *** DEADLOCK ***

<4>[  462.594690] 4 locks held by i915_selftest/5540:
<4>[  462.594696]  #0: ffff888100fbc240 (&dev->mutex){....}-{3:3}, at: device_driver_attach+0x18/0x50
<4>[  462.594715]  #1: ffffc900006cb9a0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: make_obj_busy+0x81/0x330 [i915]
<4>[  462.595118]  #2: ffff88812a6081e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: make_obj_busy+0x21f/0x330 [i915]
<4>[  462.595519]  #3: ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915]
<4>[  462.595934]
                  stack backtrace:
<4>[  462.595939] CPU: 0 PID: 5540 Comm: i915_selftest Tainted: G     U            5.12.0-rc5-CI-Trybot_7644+ #1
<4>[  462.595947] Hardware name: GOOGLE Kefka/Kefka, BIOS MrChromebox 02/04/2018
<4>[  462.595952] Call Trace:
<4>[  462.595961]  dump_stack+0x7f/0xad
<4>[  462.595974]  check_noncircular+0x12e/0x150
<4>[  462.595982]  ? save_stack.isra.17+0x3f/0x70
<4>[  462.595991]  ? drm_mm_insert_node_in_range+0x34a/0x5b0
<4>[  462.596000]  ? i915_vma_pin_ww+0x9ec/0xb40 [i915]
<4>[  462.596410]  __lock_acquire+0x1520/0x2590
<4>[  462.596419]  ? do_init_module+0x55/0x200
<4>[  462.596429]  lock_acquire+0xd1/0x3d0
<4>[  462.596435]  ? stop_machine+0x12/0x30
<4>[  462.596445]  ? gen8_ggtt_insert_entries+0xf0/0xf0 [i915]
<4>[  462.596816]  cpus_read_lock+0x39/0xc0
<4>[  462.596824]  ? stop_machine+0x12/0x30
<4>[  462.596831]  stop_machine+0x12/0x30
<4>[  462.596839]  bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915]
<4>[  462.597210]  ggtt_bind_vma+0x5d/0x80 [i915]
<4>[  462.597580]  i915_vma_bind+0xdc/0x1c0 [i915]
<4>[  462.597986]  i915_vma_pin_ww+0x435/0xb40 [i915]
<4>[  462.598395]  ? make_obj_busy+0xcb/0x330 [i915]
<4>[  462.598786]  make_obj_busy+0xcb/0x330 [i915]
<4>[  462.599180]  ? 0xffffffff81000000
<4>[  462.599187]  ? debug_mutex_unlock+0x50/0xa0
<4>[  462.599198]  igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915]
<4>[  462.599592]  __i915_subtests.cold.7+0x42/0x92 [i915]
<4>[  462.600026]  ? i915_perf_selftests+0x20/0x20 [i915]
<4>[  462.600422]  ? __i915_nop_setup+0x10/0x10 [i915]
<4>[  462.600820]  __run_selftests.part.3+0x10d/0x172 [i915]
<4>[  462.601253]  i915_live_selftests.cold.5+0x1f/0x47 [i915]
<4>[  462.601686]  i915_pci_probe+0x93/0x1d0 [i915]
<4>[  462.602037]  ? _raw_spin_unlock_irqrestore+0x3d/0x60
<4>[  462.602047]  pci_device_probe+0x9e/0x110
<4>[  462.602057]  really_probe+0xea/0x410
<4>[  462.602067]  driver_probe_device+0xd9/0x140
<4>[  462.602075]  device_driver_attach+0x4a/0x50
<4>[  462.602084]  __driver_attach+0x83/0x140
<4>[  462.602091]  ? device_driver_attach+0x50/0x50
<4>[  462.602099]  ? device_driver_attach+0x50/0x50
<4>[  462.602107]  bus_for_each_dev+0x75/0xc0
<4>[  462.602116]  bus_add_driver+0x14b/0x1f0
<4>[  462.602124]  driver_register+0x66/0xb0
<4>[  462.602133]  i915_init+0x70/0x87 [i915]
<4>[  462.602453]  ? 0xffffffffa0606000
<4>[  462.602458]  do_one_initcall+0x56/0x2e0
<4>[  462.602466]  ? kmem_cache_alloc_trace+0x374/0x430
<4>[  462.602476]  do_init_module+0x55/0x200
<4>[  462.602484]  load_module+0x2703/0x2990
<4>[  462.602500]  ? __do_sys_finit_module+0xad/0x110
<4>[  462.602507]  __do_sys_finit_module+0xad/0x110
<4>[  462.602519]  do_syscall_64+0x33/0x80
<4>[  462.602527]  entry_SYSCALL_64_after_hwframe+0x44/0xae
<4>[  462.602535] RIP: 0033:0x7fab69d8d89d

Changes since v1:
- Add lockdep annotations during init, to ensure that lockdep is primed.
  This also fixes a false positive when reading /proc/lockdep_stats
  during module reload.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 13 +++++++++----
 drivers/gpu/drm/i915/gt/intel_ggtt.c         |  8 +++++---
 drivers/gpu/drm/i915/gt/intel_gtt.c          | 17 ++++++++++++++++-
 drivers/gpu/drm/i915/i915_drv.h              | 11 +++++++++--
 drivers/gpu/drm/i915/i915_gem.c              | 15 +++++++++++++--
 5 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 3e248d3bd869..7545ddd83659 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -38,15 +38,17 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 }
 
 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
-			      unsigned long shrink)
+			      unsigned long shrink, bool trylock_vm)
 {
 	unsigned long flags;
 
 	flags = 0;
 	if (shrink & I915_SHRINK_ACTIVE)
-		flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
+		flags |= I915_GEM_OBJECT_UNBIND_ACTIVE;
 	if (!(shrink & I915_SHRINK_BOUND))
-		flags = I915_GEM_OBJECT_UNBIND_TEST;
+		flags |= I915_GEM_OBJECT_UNBIND_TEST;
+	if (trylock_vm)
+		flags |= I915_GEM_OBJECT_UNBIND_VM_TRYLOCK;
 
 	if (i915_gem_object_unbind(obj, flags) == 0)
 		return true;
@@ -116,6 +118,9 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
 	unsigned long scanned = 0;
 	int err;
 
+	/* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */
+	bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915);
+
 	trace_i915_gem_shrink(i915, target, shrink);
 
 	/*
@@ -203,7 +208,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
 			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 
 			err = 0;
-			if (unsafe_drop_pages(obj, shrink)) {
+			if (unsafe_drop_pages(obj, shrink, trylock_vm)) {
 				/* May arrive from get_pages on another bo */
 				if (!ww) {
 					if (!i915_gem_object_trylock(obj))
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index e1ec6edae1fb..e6e6efacab87 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -904,9 +904,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
 	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
 
-	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
-	if (intel_ggtt_update_needs_vtd_wa(i915) ||
-	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
+	/*
+	 * Serialize GTT updates with aperture access on BXT if VT-d is on,
+	 * and always on CHV.
+	 */
+	if (intel_vm_no_concurrent_access_wa(i915)) {
 		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
 		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 941f8af016d6..a994234f2bd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -132,7 +132,22 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
 	 */
 	mutex_init(&vm->mutex);
 	lockdep_set_subclass(&vm->mutex, subclass);
-	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+
+	if (!intel_vm_no_concurrent_access_wa(vm->i915)) {
+		i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+	} else {
+		/*
+		 * CHV + BXT VTD workaround use stop_machine(),
+		 * which is allowed to allocate memory. This means &vm->mutex
+		 * is the outer lock, and in theory we can allocate memory inside
+		 * it through stop_machine().
+		 *
+		 * Add the annotation for this, we use trylock in shrinker.
+		 */
+		mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_);
+		might_alloc(GFP_KERNEL);
+		mutex_release(&vm->mutex.dep_map, _THIS_IP_);
+	}
 	dma_resv_init(&vm->resv);
 
 	GEM_BUG_ON(!vm->total);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e20294e9227a..4ef4f45dfb49 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1712,9 +1712,15 @@ static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
 }
 
 static inline bool
-intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
+intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
 {
-	return IS_BROXTON(dev_priv) && intel_vtd_active();
+	return IS_BROXTON(i915) && intel_vtd_active();
+}
+
+static inline bool
+intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915)
+{
+	return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915);
 }
 
 /* i915_drv.c */
@@ -1794,6 +1800,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
 #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
 #define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
+#define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4639c47c038b..dcf91a12172f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -157,8 +157,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 		if (vma) {
 			ret = -EBUSY;
 			if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
-			    !i915_vma_is_active(vma))
-				ret = i915_vma_unbind(vma);
+			    !i915_vma_is_active(vma)) {
+				if (i915_is_ggtt(vma->vm) &&
+				    flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK) {
+					if (mutex_trylock(&vma->vm->mutex)) {
+						ret = __i915_vma_unbind(vma);
+						mutex_unlock(&vma->vm->mutex);
+					} else {
+						ret = -EBUSY;
+					}
+				} else {
+					ret = i915_vma_unbind(vma);
+				}
+			}
 
 			__i915_vma_put(vma);
 		}
-- 
2.31.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-gfx] [PATCH 2/2] drm/i915: Use trylock in shrinker for ggtt on bsw vt-d and bxt, v2.
@ 2021-04-20 10:34   ` Maarten Lankhorst
  0 siblings, 0 replies; 9+ messages in thread
From: Maarten Lankhorst @ 2021-04-20 10:34 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel

The stop_machine() lock may allocate memory, but is called inside
vm->mutex, which is taken in the shrinker. This will cause a lockdep
splat, as can be seen below:

<4>[  462.585762] ======================================================
<4>[  462.585768] WARNING: possible circular locking dependency detected
<4>[  462.585773] 5.12.0-rc5-CI-Trybot_7644+ #1 Tainted: G     U
<4>[  462.585779] ------------------------------------------------------
<4>[  462.585783] i915_selftest/5540 is trying to acquire lock:
<4>[  462.585788] ffffffff826440b0 (cpu_hotplug_lock){++++}-{0:0}, at: stop_machine+0x12/0x30
<4>[  462.585814]
                  but task is already holding lock:
<4>[  462.585818] ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915]
<4>[  462.586301]
                  which lock already depends on the new lock.

<4>[  462.586305]
                  the existing dependency chain (in reverse order) is:
<4>[  462.586309]
                  -> #2 (&vm->mutex/1){+.+.}-{3:3}:
<4>[  462.586323]        i915_gem_shrinker_taints_mutex+0x2d/0x50 [i915]
<4>[  462.586719]        i915_address_space_init+0x12d/0x130 [i915]
<4>[  462.587092]        ppgtt_init+0x4e/0x80 [i915]
<4>[  462.587467]        gen8_ppgtt_create+0x3e/0x5c0 [i915]
<4>[  462.587828]        i915_ppgtt_create+0x28/0xf0 [i915]
<4>[  462.588203]        intel_gt_init+0x123/0x370 [i915]
<4>[  462.588572]        i915_gem_init+0x129/0x1f0 [i915]
<4>[  462.588971]        i915_driver_probe+0x753/0xd80 [i915]
<4>[  462.589320]        i915_pci_probe+0x43/0x1d0 [i915]
<4>[  462.589671]        pci_device_probe+0x9e/0x110
<4>[  462.589680]        really_probe+0xea/0x410
<4>[  462.589690]        driver_probe_device+0xd9/0x140
<4>[  462.589697]        device_driver_attach+0x4a/0x50
<4>[  462.589704]        __driver_attach+0x83/0x140
<4>[  462.589711]        bus_for_each_dev+0x75/0xc0
<4>[  462.589718]        bus_add_driver+0x14b/0x1f0
<4>[  462.589724]        driver_register+0x66/0xb0
<4>[  462.589731]        i915_init+0x70/0x87 [i915]
<4>[  462.590053]        do_one_initcall+0x56/0x2e0
<4>[  462.590061]        do_init_module+0x55/0x200
<4>[  462.590068]        load_module+0x2703/0x2990
<4>[  462.590074]        __do_sys_finit_module+0xad/0x110
<4>[  462.590080]        do_syscall_64+0x33/0x80
<4>[  462.590089]        entry_SYSCALL_64_after_hwframe+0x44/0xae
<4>[  462.590096]
                  -> #1 (fs_reclaim){+.+.}-{0:0}:
<4>[  462.590109]        fs_reclaim_acquire+0x9f/0xd0
<4>[  462.590118]        kmem_cache_alloc_trace+0x3d/0x430
<4>[  462.590126]        intel_cpuc_prepare+0x3b/0x1b0
<4>[  462.590133]        cpuhp_invoke_callback+0x9e/0x890
<4>[  462.590141]        _cpu_up+0xa4/0x130
<4>[  462.590147]        cpu_up+0x82/0x90
<4>[  462.590153]        bringup_nonboot_cpus+0x4a/0x60
<4>[  462.590159]        smp_init+0x21/0x5c
<4>[  462.590167]        kernel_init_freeable+0x8a/0x1b7
<4>[  462.590175]        kernel_init+0x5/0xff
<4>[  462.590181]        ret_from_fork+0x22/0x30
<4>[  462.590187]
                  -> #0 (cpu_hotplug_lock){++++}-{0:0}:
<4>[  462.590199]        __lock_acquire+0x1520/0x2590
<4>[  462.590207]        lock_acquire+0xd1/0x3d0
<4>[  462.590213]        cpus_read_lock+0x39/0xc0
<4>[  462.590219]        stop_machine+0x12/0x30
<4>[  462.590226]        bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915]
<4>[  462.590601]        ggtt_bind_vma+0x5d/0x80 [i915]
<4>[  462.590970]        i915_vma_bind+0xdc/0x1c0 [i915]
<4>[  462.591374]        i915_vma_pin_ww+0x435/0xb40 [i915]
<4>[  462.591779]        make_obj_busy+0xcb/0x330 [i915]
<4>[  462.592170]        igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915]
<4>[  462.592562]        __i915_subtests.cold.7+0x42/0x92 [i915]
<4>[  462.592995]        __run_selftests.part.3+0x10d/0x172 [i915]
<4>[  462.593428]        i915_live_selftests.cold.5+0x1f/0x47 [i915]
<4>[  462.593860]        i915_pci_probe+0x93/0x1d0 [i915]
<4>[  462.594210]        pci_device_probe+0x9e/0x110
<4>[  462.594217]        really_probe+0xea/0x410
<4>[  462.594226]        driver_probe_device+0xd9/0x140
<4>[  462.594233]        device_driver_attach+0x4a/0x50
<4>[  462.594240]        __driver_attach+0x83/0x140
<4>[  462.594247]        bus_for_each_dev+0x75/0xc0
<4>[  462.594254]        bus_add_driver+0x14b/0x1f0
<4>[  462.594260]        driver_register+0x66/0xb0
<4>[  462.594267]        i915_init+0x70/0x87 [i915]
<4>[  462.594586]        do_one_initcall+0x56/0x2e0
<4>[  462.594592]        do_init_module+0x55/0x200
<4>[  462.594599]        load_module+0x2703/0x2990
<4>[  462.594605]        __do_sys_finit_module+0xad/0x110
<4>[  462.594612]        do_syscall_64+0x33/0x80
<4>[  462.594618]        entry_SYSCALL_64_after_hwframe+0x44/0xae
<4>[  462.594625]
                  other info that might help us debug this:

<4>[  462.594629] Chain exists of:
                    cpu_hotplug_lock --> fs_reclaim --> &vm->mutex/1

<4>[  462.594645]  Possible unsafe locking scenario:

<4>[  462.594648]        CPU0                    CPU1
<4>[  462.594652]        ----                    ----
<4>[  462.594655]   lock(&vm->mutex/1);
<4>[  462.594664]                                lock(fs_reclaim);
<4>[  462.594671]                                lock(&vm->mutex/1);
<4>[  462.594679]   lock(cpu_hotplug_lock);
<4>[  462.594686]
                   *** DEADLOCK ***

<4>[  462.594690] 4 locks held by i915_selftest/5540:
<4>[  462.594696]  #0: ffff888100fbc240 (&dev->mutex){....}-{3:3}, at: device_driver_attach+0x18/0x50
<4>[  462.594715]  #1: ffffc900006cb9a0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: make_obj_busy+0x81/0x330 [i915]
<4>[  462.595118]  #2: ffff88812a6081e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: make_obj_busy+0x21f/0x330 [i915]
<4>[  462.595519]  #3: ffff888125369c70 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x38e/0xb40 [i915]
<4>[  462.595934]
                  stack backtrace:
<4>[  462.595939] CPU: 0 PID: 5540 Comm: i915_selftest Tainted: G     U            5.12.0-rc5-CI-Trybot_7644+ #1
<4>[  462.595947] Hardware name: GOOGLE Kefka/Kefka, BIOS MrChromebox 02/04/2018
<4>[  462.595952] Call Trace:
<4>[  462.595961]  dump_stack+0x7f/0xad
<4>[  462.595974]  check_noncircular+0x12e/0x150
<4>[  462.595982]  ? save_stack.isra.17+0x3f/0x70
<4>[  462.595991]  ? drm_mm_insert_node_in_range+0x34a/0x5b0
<4>[  462.596000]  ? i915_vma_pin_ww+0x9ec/0xb40 [i915]
<4>[  462.596410]  __lock_acquire+0x1520/0x2590
<4>[  462.596419]  ? do_init_module+0x55/0x200
<4>[  462.596429]  lock_acquire+0xd1/0x3d0
<4>[  462.596435]  ? stop_machine+0x12/0x30
<4>[  462.596445]  ? gen8_ggtt_insert_entries+0xf0/0xf0 [i915]
<4>[  462.596816]  cpus_read_lock+0x39/0xc0
<4>[  462.596824]  ? stop_machine+0x12/0x30
<4>[  462.596831]  stop_machine+0x12/0x30
<4>[  462.596839]  bxt_vtd_ggtt_insert_entries__BKL+0x36/0x50 [i915]
<4>[  462.597210]  ggtt_bind_vma+0x5d/0x80 [i915]
<4>[  462.597580]  i915_vma_bind+0xdc/0x1c0 [i915]
<4>[  462.597986]  i915_vma_pin_ww+0x435/0xb40 [i915]
<4>[  462.598395]  ? make_obj_busy+0xcb/0x330 [i915]
<4>[  462.598786]  make_obj_busy+0xcb/0x330 [i915]
<4>[  462.599180]  ? 0xffffffff81000000
<4>[  462.599187]  ? debug_mutex_unlock+0x50/0xa0
<4>[  462.599198]  igt_mmap_offset_exhaustion+0x45f/0x4c0 [i915]
<4>[  462.599592]  __i915_subtests.cold.7+0x42/0x92 [i915]
<4>[  462.600026]  ? i915_perf_selftests+0x20/0x20 [i915]
<4>[  462.600422]  ? __i915_nop_setup+0x10/0x10 [i915]
<4>[  462.600820]  __run_selftests.part.3+0x10d/0x172 [i915]
<4>[  462.601253]  i915_live_selftests.cold.5+0x1f/0x47 [i915]
<4>[  462.601686]  i915_pci_probe+0x93/0x1d0 [i915]
<4>[  462.602037]  ? _raw_spin_unlock_irqrestore+0x3d/0x60
<4>[  462.602047]  pci_device_probe+0x9e/0x110
<4>[  462.602057]  really_probe+0xea/0x410
<4>[  462.602067]  driver_probe_device+0xd9/0x140
<4>[  462.602075]  device_driver_attach+0x4a/0x50
<4>[  462.602084]  __driver_attach+0x83/0x140
<4>[  462.602091]  ? device_driver_attach+0x50/0x50
<4>[  462.602099]  ? device_driver_attach+0x50/0x50
<4>[  462.602107]  bus_for_each_dev+0x75/0xc0
<4>[  462.602116]  bus_add_driver+0x14b/0x1f0
<4>[  462.602124]  driver_register+0x66/0xb0
<4>[  462.602133]  i915_init+0x70/0x87 [i915]
<4>[  462.602453]  ? 0xffffffffa0606000
<4>[  462.602458]  do_one_initcall+0x56/0x2e0
<4>[  462.602466]  ? kmem_cache_alloc_trace+0x374/0x430
<4>[  462.602476]  do_init_module+0x55/0x200
<4>[  462.602484]  load_module+0x2703/0x2990
<4>[  462.602500]  ? __do_sys_finit_module+0xad/0x110
<4>[  462.602507]  __do_sys_finit_module+0xad/0x110
<4>[  462.602519]  do_syscall_64+0x33/0x80
<4>[  462.602527]  entry_SYSCALL_64_after_hwframe+0x44/0xae
<4>[  462.602535] RIP: 0033:0x7fab69d8d89d

Changes since v1:
- Add lockdep annotations during init, to ensure that lockdep is primed.
  This also fixes a false positive when reading /proc/lockdep_stats
  during module reload.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 13 +++++++++----
 drivers/gpu/drm/i915/gt/intel_ggtt.c         |  8 +++++---
 drivers/gpu/drm/i915/gt/intel_gtt.c          | 17 ++++++++++++++++-
 drivers/gpu/drm/i915/i915_drv.h              | 11 +++++++++--
 drivers/gpu/drm/i915/i915_gem.c              | 15 +++++++++++++--
 5 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 3e248d3bd869..7545ddd83659 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -38,15 +38,17 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 }
 
 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
-			      unsigned long shrink)
+			      unsigned long shrink, bool trylock_vm)
 {
 	unsigned long flags;
 
 	flags = 0;
 	if (shrink & I915_SHRINK_ACTIVE)
-		flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
+		flags |= I915_GEM_OBJECT_UNBIND_ACTIVE;
 	if (!(shrink & I915_SHRINK_BOUND))
-		flags = I915_GEM_OBJECT_UNBIND_TEST;
+		flags |= I915_GEM_OBJECT_UNBIND_TEST;
+	if (trylock_vm)
+		flags |= I915_GEM_OBJECT_UNBIND_VM_TRYLOCK;
 
 	if (i915_gem_object_unbind(obj, flags) == 0)
 		return true;
@@ -116,6 +118,9 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
 	unsigned long scanned = 0;
 	int err;
 
+	/* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */
+	bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915);
+
 	trace_i915_gem_shrink(i915, target, shrink);
 
 	/*
@@ -203,7 +208,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
 			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 
 			err = 0;
-			if (unsafe_drop_pages(obj, shrink)) {
+			if (unsafe_drop_pages(obj, shrink, trylock_vm)) {
 				/* May arrive from get_pages on another bo */
 				if (!ww) {
 					if (!i915_gem_object_trylock(obj))
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index e1ec6edae1fb..e6e6efacab87 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -904,9 +904,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
 	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
 
-	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
-	if (intel_ggtt_update_needs_vtd_wa(i915) ||
-	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
+	/*
+	 * Serialize GTT updates with aperture access on BXT if VT-d is on,
+	 * and always on CHV.
+	 */
+	if (intel_vm_no_concurrent_access_wa(i915)) {
 		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
 		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 941f8af016d6..a994234f2bd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -132,7 +132,22 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
 	 */
 	mutex_init(&vm->mutex);
 	lockdep_set_subclass(&vm->mutex, subclass);
-	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+
+	if (!intel_vm_no_concurrent_access_wa(vm->i915)) {
+		i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+	} else {
+		/*
+		 * CHV + BXT VTD workaround use stop_machine(),
+		 * which is allowed to allocate memory. This means &vm->mutex
+		 * is the outer lock, and in theory we can allocate memory inside
+		 * it through stop_machine().
+		 *
+		 * Add the annotation for this, we use trylock in shrinker.
+		 */
+		mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_);
+		might_alloc(GFP_KERNEL);
+		mutex_release(&vm->mutex.dep_map, _THIS_IP_);
+	}
 	dma_resv_init(&vm->resv);
 
 	GEM_BUG_ON(!vm->total);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e20294e9227a..4ef4f45dfb49 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1712,9 +1712,15 @@ static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
 }
 
 static inline bool
-intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
+intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
 {
-	return IS_BROXTON(dev_priv) && intel_vtd_active();
+	return IS_BROXTON(i915) && intel_vtd_active();
+}
+
+static inline bool
+intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915)
+{
+	return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915);
 }
 
 /* i915_drv.c */
@@ -1794,6 +1800,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
 #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
 #define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
+#define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4639c47c038b..dcf91a12172f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -157,8 +157,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 		if (vma) {
 			ret = -EBUSY;
 			if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
-			    !i915_vma_is_active(vma))
-				ret = i915_vma_unbind(vma);
+			    !i915_vma_is_active(vma)) {
+				if (i915_is_ggtt(vma->vm) &&
+				    flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK) {
+					if (mutex_trylock(&vma->vm->mutex)) {
+						ret = __i915_vma_unbind(vma);
+						mutex_unlock(&vma->vm->mutex);
+					} else {
+						ret = -EBUSY;
+					}
+				} else {
+					ret = i915_vma_unbind(vma);
+				}
+			}
 
 			__i915_vma_put(vma);
 		}
-- 
2.31.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] drm/i915: Remove asynchronous vma binding
  2021-04-20 10:34 ` [Intel-gfx] " Maarten Lankhorst
  (?)
  (?)
@ 2021-04-20 12:14 ` Patchwork
  -1 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2021-04-20 12:14 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Remove asynchronous vma binding
URL   : https://patchwork.freedesktop.org/series/89255/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
7d5c57627b7b drm/i915: Remove asynchronous vma binding
-:8: WARNING:REPEATED_WORD: Possible repeated word: 'has'
#8: 
but dma_fence_work has has signalling fence semantics.

total: 0 errors, 1 warnings, 0 checks, 362 lines checked
ca4212a94685 drm/i915: Use trylock in shrinker for ggtt on bsw vt-d and bxt, v2.
-:16: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#16: 
<4>[  462.585788] ffffffff826440b0 (cpu_hotplug_lock){++++}-{0:0}, at: stop_machine+0x12/0x30

total: 0 errors, 1 warnings, 0 checks, 119 lines checked


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✗ Fi.CI.DOCS: warning for series starting with [1/2] drm/i915: Remove asynchronous vma binding
  2021-04-20 10:34 ` [Intel-gfx] " Maarten Lankhorst
                   ` (2 preceding siblings ...)
  (?)
@ 2021-04-20 12:19 ` Patchwork
  -1 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2021-04-20 12:19 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Remove asynchronous vma binding
URL   : https://patchwork.freedesktop.org/series/89255/
State : warning

== Summary ==

$ make htmldocs 2>&1 > /dev/null | grep i915
./drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:104: warning: Function parameter or member 'ww' not described in 'i915_gem_shrink'
./drivers/gpu/drm/i915/i915_cmd_parser.c:1420: warning: Excess function parameter 'trampoline' description in 'intel_engine_cmd_parser'
./drivers/gpu/drm/i915/i915_cmd_parser.c:1420: warning: Function parameter or member 'jump_whitelist' not described in 'intel_engine_cmd_parser'
./drivers/gpu/drm/i915/i915_cmd_parser.c:1420: warning: Function parameter or member 'shadow_map' not described in 'intel_engine_cmd_parser'
./drivers/gpu/drm/i915/i915_cmd_parser.c:1420: warning: Function parameter or member 'batch_map' not described in 'intel_engine_cmd_parser'
./drivers/gpu/drm/i915/i915_cmd_parser.c:1420: warning: Excess function parameter 'trampoline' description in 'intel_engine_cmd_parser'


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/2] drm/i915: Remove asynchronous vma binding
  2021-04-20 10:34 ` [Intel-gfx] " Maarten Lankhorst
                   ` (3 preceding siblings ...)
  (?)
@ 2021-04-20 12:43 ` Patchwork
  -1 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2021-04-20 12:43 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 4506 bytes --]

== Series Details ==

Series: series starting with [1/2] drm/i915: Remove asynchronous vma binding
URL   : https://patchwork.freedesktop.org/series/89255/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_9985 -> Patchwork_19954
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_19954 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_19954, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19954/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_19954:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_ctx_create@basic-files:
    - fi-apl-guc:         [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9985/fi-apl-guc/igt@gem_ctx_create@basic-files.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19954/fi-apl-guc/igt@gem_ctx_create@basic-files.html

  
Known issues
------------

  Here are the changes found in Patchwork_19954 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@semaphore:
    - fi-bdw-5557u:       NOTRUN -> [SKIP][3] ([fdo#109271]) +23 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19954/fi-bdw-5557u/igt@amdgpu/amd_basic@semaphore.html

  * igt@amdgpu/amd_cs_nop@sync-fork-compute0:
    - fi-snb-2600:        NOTRUN -> [SKIP][4] ([fdo#109271]) +17 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19954/fi-snb-2600/igt@amdgpu/amd_cs_nop@sync-fork-compute0.html

  * igt@core_hotunplug@unbind-rebind:
    - fi-bdw-5557u:       NOTRUN -> [WARN][5] ([i915#2283])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19954/fi-bdw-5557u/igt@core_hotunplug@unbind-rebind.html

  * igt@i915_selftest@live@gt_heartbeat:
    - fi-tgl-y:           [PASS][6] -> [DMESG-FAIL][7] ([i915#541])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9985/fi-tgl-y/igt@i915_selftest@live@gt_heartbeat.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19954/fi-tgl-y/igt@i915_selftest@live@gt_heartbeat.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-tgl-u2:          [FAIL][8] ([i915#1888]) -> [PASS][9]
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9985/fi-tgl-u2/igt@gem_exec_suspend@basic-s3.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19954/fi-tgl-u2/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_selftest@live@hangcheck:
    - fi-snb-2600:        [INCOMPLETE][10] ([i915#2782]) -> [PASS][11]
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9985/fi-snb-2600/igt@i915_selftest@live@hangcheck.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19954/fi-snb-2600/igt@i915_selftest@live@hangcheck.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#1849]: https://gitlab.freedesktop.org/drm/intel/issues/1849
  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#2283]: https://gitlab.freedesktop.org/drm/intel/issues/2283
  [i915#2782]: https://gitlab.freedesktop.org/drm/intel/issues/2782
  [i915#3180]: https://gitlab.freedesktop.org/drm/intel/issues/3180
  [i915#541]: https://gitlab.freedesktop.org/drm/intel/issues/541


Participating hosts (42 -> 40)
------------------------------

  Missing    (2): fi-bsw-cyan fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_9985 -> Patchwork_19954

  CI-20190529: 20190529
  CI_DRM_9985: cc92ce47fdeaf67b9a47c4a291b501d30ae5a79e @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6071: d293ad78a035dcf681345ca15e0256e0c3b1e7bb @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_19954: ca4212a946857ba4148f2fa313f47d828fc410fc @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

ca4212a94685 drm/i915: Use trylock in shrinker for ggtt on bsw vt-d and bxt, v2.
7d5c57627b7b drm/i915: Remove asynchronous vma binding

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19954/index.html

[-- Attachment #1.2: Type: text/html, Size: 5239 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915: Remove asynchronous vma binding
  2021-04-20 10:34 ` [Intel-gfx] " Maarten Lankhorst
@ 2021-04-21 11:51   ` Daniel Vetter
  -1 siblings, 0 replies; 9+ messages in thread
From: Daniel Vetter @ 2021-04-21 11:51 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: Daniel Vetter, intel-gfx, dri-devel

On Tue, Apr 20, 2021 at 12:34:50PM +0200, Maarten Lankhorst wrote:
> Commit e3793468b466 ("drm/i915: Use the async worker to avoid reclaim
> tainting the ggtt->mutex") moves the vma binding to dma_fence_work,
> but dma_fence_work has has signalling fence semantics.
> 
> On braswell, we can call stop_machine inside fence_work, causing a splat
> because memory allocation inside stop_machine is allowed.
> 
> This patch does not fix that lockdep splat yet, but will allow the next
> patch to remove it.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Thomas pointed out that this removes pipelined pte writing for both ggtt
and ppgtt, and that's a bit much. So need to retract my ack here.
-Daniel

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c |   3 -
>  drivers/gpu/drm/i915/gt/gen6_ppgtt.c       |   1 -
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c       |   1 -
>  drivers/gpu/drm/i915/gt/intel_ggtt.c       |   4 -
>  drivers/gpu/drm/i915/gt/intel_gtt.h        |   2 -
>  drivers/gpu/drm/i915/i915_gem.c            |   6 -
>  drivers/gpu/drm/i915/i915_vma.c            | 174 +++------------------
>  drivers/gpu/drm/i915/i915_vma.h            |   5 +-
>  drivers/gpu/drm/i915/i915_vma_types.h      |   3 -
>  9 files changed, 21 insertions(+), 178 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index b0597de206de..ec04df0a3b89 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -505,9 +505,6 @@ static void dbg_poison(struct i915_ggtt *ggtt,
>  	if (!drm_mm_node_allocated(&ggtt->error_capture))
>  		return;
>  
> -	if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND)
> -		return; /* beware stop_machine() inversion */
> -
>  	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
>  
>  	mutex_lock(&ggtt->error_mutex);
> diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> index e08dff376339..0c5a9a767849 100644
> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> @@ -436,7 +436,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
>  	ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
>  	ppgtt->base.vm.top = 1;
>  
> -	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>  	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
>  	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
>  	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 176c19633412..92f8a23e66cc 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -736,7 +736,6 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
>  			goto err_free_pd;
>  	}
>  
> -	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>  	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
>  	ppgtt->vm.clear_range = gen8_ppgtt_clear;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 670c1271e7d5..e1ec6edae1fb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -127,7 +127,6 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
>  
>  	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
>  		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> -		i915_vma_wait_for_bind(vma);
>  
>  		if (i915_vma_is_pinned(vma))
>  			continue;
> @@ -671,7 +670,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
>  	ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
>  
>  	ggtt->alias = ppgtt;
> -	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
>  
>  	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
>  	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
> @@ -911,8 +909,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
>  		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
>  		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
> -		ggtt->vm.bind_async_flags =
> -			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
>  	}
>  
>  	ggtt->invalidate = gen8_ggtt_invalidate;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index e67e34e17913..d9d2ca8b4b61 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -230,8 +230,6 @@ struct i915_address_space {
>  	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
>  	u64 reserved;		/* size addr space reserved */
>  
> -	unsigned int bind_async_flags;
> -
>  	/*
>  	 * Each active user context has its own address space (in full-ppgtt).
>  	 * Since the vm may be shared between multiple contexts, we count how
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b23f58e94cfb..4639c47c038b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -946,12 +946,6 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
>  		mutex_unlock(&ggtt->vm.mutex);
>  	}
>  
> -	ret = i915_vma_wait_for_bind(vma);
> -	if (ret) {
> -		i915_vma_unpin(vma);
> -		return ERR_PTR(ret);
> -	}
> -
>  	return vma;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 07490db51cdc..e4b2590d41ce 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -289,79 +289,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>  	return vma;
>  }
>  
> -struct i915_vma_work {
> -	struct dma_fence_work base;
> -	struct i915_address_space *vm;
> -	struct i915_vm_pt_stash stash;
> -	struct i915_vma *vma;
> -	struct drm_i915_gem_object *pinned;
> -	struct i915_sw_dma_fence_cb cb;
> -	enum i915_cache_level cache_level;
> -	unsigned int flags;
> -};
> -
> -static int __vma_bind(struct dma_fence_work *work)
> -{
> -	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
> -	struct i915_vma *vma = vw->vma;
> -
> -	vma->ops->bind_vma(vw->vm, &vw->stash,
> -			   vma, vw->cache_level, vw->flags);
> -	return 0;
> -}
> -
> -static void __vma_release(struct dma_fence_work *work)
> -{
> -	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
> -
> -	if (vw->pinned) {
> -		__i915_gem_object_unpin_pages(vw->pinned);
> -		i915_gem_object_put(vw->pinned);
> -	}
> -
> -	i915_vm_free_pt_stash(vw->vm, &vw->stash);
> -	i915_vm_put(vw->vm);
> -}
> -
> -static const struct dma_fence_work_ops bind_ops = {
> -	.name = "bind",
> -	.work = __vma_bind,
> -	.release = __vma_release,
> -};
> -
> -struct i915_vma_work *i915_vma_work(void)
> -{
> -	struct i915_vma_work *vw;
> -
> -	vw = kzalloc(sizeof(*vw), GFP_KERNEL);
> -	if (!vw)
> -		return NULL;
> -
> -	dma_fence_work_init(&vw->base, &bind_ops);
> -	vw->base.dma.error = -EAGAIN; /* disable the worker by default */
> -
> -	return vw;
> -}
> -
> -int i915_vma_wait_for_bind(struct i915_vma *vma)
> -{
> -	int err = 0;
> -
> -	if (rcu_access_pointer(vma->active.excl.fence)) {
> -		struct dma_fence *fence;
> -
> -		rcu_read_lock();
> -		fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
> -		rcu_read_unlock();
> -		if (fence) {
> -			err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
> -			dma_fence_put(fence);
> -		}
> -	}
> -
> -	return err;
> -}
> -
>  /**
>   * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
>   * @vma: VMA to map
> @@ -375,8 +302,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
>   */
>  int i915_vma_bind(struct i915_vma *vma,
>  		  enum i915_cache_level cache_level,
> -		  u32 flags,
> -		  struct i915_vma_work *work)
> +		  u32 flags, struct i915_vm_pt_stash *stash)
>  {
>  	u32 bind_flags;
>  	u32 vma_flags;
> @@ -405,39 +331,7 @@ int i915_vma_bind(struct i915_vma *vma,
>  	GEM_BUG_ON(!vma->pages);
>  
>  	trace_i915_vma_bind(vma, bind_flags);
> -	if (work && bind_flags & vma->vm->bind_async_flags) {
> -		struct dma_fence *prev;
> -
> -		work->vma = vma;
> -		work->cache_level = cache_level;
> -		work->flags = bind_flags;
> -
> -		/*
> -		 * Note we only want to chain up to the migration fence on
> -		 * the pages (not the object itself). As we don't track that,
> -		 * yet, we have to use the exclusive fence instead.
> -		 *
> -		 * Also note that we do not want to track the async vma as
> -		 * part of the obj->resv->excl_fence as it only affects
> -		 * execution and not content or object's backing store lifetime.
> -		 */
> -		prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
> -		if (prev) {
> -			__i915_sw_fence_await_dma_fence(&work->base.chain,
> -							prev,
> -							&work->cb);
> -			dma_fence_put(prev);
> -		}
> -
> -		work->base.dma.error = 0; /* enable the queue_work() */
> -
> -		if (vma->obj) {
> -			__i915_gem_object_pin_pages(vma->obj);
> -			work->pinned = i915_gem_object_get(vma->obj);
> -		}
> -	} else {
> -		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
> -	}
> +	vma->ops->bind_vma(vma->vm, stash, vma, cache_level, bind_flags);
>  
>  	atomic_or(bind_flags, &vma->flags);
>  	return 0;
> @@ -531,9 +425,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
>  	if (!drm_mm_node_allocated(&vma->node))
>  		return false;
>  
> -	if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
> -		return true;
> -
>  	if (vma->node.size < size)
>  		return true;
>  
> @@ -752,7 +643,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
>  		if (unlikely(flags & ~bound))
>  			return false;
>  
> -		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
> +		if (unlikely(bound & I915_VMA_OVERFLOW))
>  			return false;
>  
>  		if (!(bound & I915_VMA_PIN_MASK))
> @@ -770,7 +661,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
>  	 */
>  	mutex_lock(&vma->vm->mutex);
>  	do {
> -		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
> +		if (unlikely(bound & I915_VMA_OVERFLOW)) {
>  			pinned = false;
>  			break;
>  		}
> @@ -857,10 +748,10 @@ static void vma_unbind_pages(struct i915_vma *vma)
>  int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  		    u64 size, u64 alignment, u64 flags)
>  {
> -	struct i915_vma_work *work = NULL;
>  	intel_wakeref_t wakeref = 0;
>  	unsigned int bound;
>  	int err;
> +	struct i915_vm_pt_stash stash = {};
>  
>  #ifdef CONFIG_PROVE_LOCKING
>  	if (debug_locks && !WARN_ON(!ww) && vma->resv)
> @@ -883,33 +774,21 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	if (flags & PIN_GLOBAL)
>  		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
>  
> -	if (flags & vma->vm->bind_async_flags) {
> -		/* lock VM */
> -		err = i915_vm_lock_objects(vma->vm, ww);
> +	if (vma->vm->allocate_va_range) {
> +		err = i915_vm_alloc_pt_stash(vma->vm,
> +					     &stash,
> +					     vma->size);
>  		if (err)
>  			goto err_rpm;
>  
> -		work = i915_vma_work();
> -		if (!work) {
> -			err = -ENOMEM;
> +		err = i915_vm_lock_objects(vma->vm, ww);
> +		if (err)
>  			goto err_rpm;
> -		}
>  
> -		work->vm = i915_vm_get(vma->vm);
> -
> -		/* Allocate enough page directories to used PTE */
> -		if (vma->vm->allocate_va_range) {
> -			err = i915_vm_alloc_pt_stash(vma->vm,
> -						     &work->stash,
> -						     vma->size);
> -			if (err)
> -				goto err_fence;
> -
> -			err = i915_vm_pin_pt_stash(vma->vm,
> -						   &work->stash);
> -			if (err)
> -				goto err_fence;
> -		}
> +		err = i915_vm_pin_pt_stash(vma->vm,
> +					   &stash);
> +		if (err)
> +			goto err_rpm;
>  	}
>  
>  	/*
> @@ -932,7 +811,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	err = mutex_lock_interruptible_nested(&vma->vm->mutex,
>  					      !(flags & PIN_GLOBAL));
>  	if (err)
> -		goto err_fence;
> +		goto err_rpm;
>  
>  	/* No more allocations allowed now we hold vm->mutex */
>  
> @@ -942,11 +821,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	}
>  
>  	bound = atomic_read(&vma->flags);
> -	if (unlikely(bound & I915_VMA_ERROR)) {
> -		err = -ENOMEM;
> -		goto err_unlock;
> -	}
> -
>  	if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
>  		err = -EAGAIN; /* pins are meant to be fairly temporary */
>  		goto err_unlock;
> @@ -973,7 +847,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	GEM_BUG_ON(!vma->pages);
>  	err = i915_vma_bind(vma,
>  			    vma->obj ? vma->obj->cache_level : 0,
> -			    flags, work);
> +			    flags, &stash);
>  	if (err)
>  		goto err_remove;
>  
> @@ -996,10 +870,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	i915_active_release(&vma->active);
>  err_unlock:
>  	mutex_unlock(&vma->vm->mutex);
> -err_fence:
> -	if (work)
> -		dma_fence_work_commit_imm(&work->base);
>  err_rpm:
> +	i915_vm_free_pt_stash(vma->vm, &stash);
>  	if (wakeref)
>  		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
>  	vma_put_pages(vma);
> @@ -1034,14 +906,8 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  			err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
>  		else
>  			err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
> -		if (err != -ENOSPC) {
> -			if (!err) {
> -				err = i915_vma_wait_for_bind(vma);
> -				if (err)
> -					i915_vma_unpin(vma);
> -			}
> +		if (err != -ENOSPC)
>  			return err;
> -		}
>  
>  		/* Unlike i915_vma_pin, we don't take no for an answer! */
>  		flush_idle_contexts(vm->gt);
> @@ -1306,7 +1172,7 @@ void __i915_vma_evict(struct i915_vma *vma)
>  		trace_i915_vma_unbind(vma);
>  		vma->ops->unbind_vma(vma->vm, vma);
>  	}
> -	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
> +	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_GGTT_WRITE),
>  		   &vma->flags);
>  
>  	i915_vma_detach(vma);
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 8df784a026d2..d1d0fc76ef99 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -195,11 +195,10 @@ i915_vma_compare(struct i915_vma *vma,
>  	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
>  }
>  
> -struct i915_vma_work *i915_vma_work(void);
>  int i915_vma_bind(struct i915_vma *vma,
>  		  enum i915_cache_level cache_level,
>  		  u32 flags,
> -		  struct i915_vma_work *work);
> +		  struct i915_vm_pt_stash *stash);
>  
>  bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
>  bool i915_vma_misplaced(const struct i915_vma *vma,
> @@ -418,8 +417,6 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
>  void i915_vma_make_shrinkable(struct i915_vma *vma);
>  void i915_vma_make_purgeable(struct i915_vma *vma);
>  
> -int i915_vma_wait_for_bind(struct i915_vma *vma);
> -
>  static inline int i915_vma_sync(struct i915_vma *vma)
>  {
>  	/* Wait for the asynchronous bindings and pending GPU reads */
> diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
> index 6b1bfa230b82..82868a755a96 100644
> --- a/drivers/gpu/drm/i915/i915_vma_types.h
> +++ b/drivers/gpu/drm/i915/i915_vma_types.h
> @@ -240,9 +240,6 @@ struct i915_vma {
>  
>  #define I915_VMA_ALLOC_BIT	12
>  
> -#define I915_VMA_ERROR_BIT	13
> -#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
> -
>  #define I915_VMA_GGTT_BIT	14
>  #define I915_VMA_CAN_FENCE_BIT	15
>  #define I915_VMA_USERFAULT_BIT	16
> -- 
> 2.31.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] drm/i915: Remove asynchronous vma binding
@ 2021-04-21 11:51   ` Daniel Vetter
  0 siblings, 0 replies; 9+ messages in thread
From: Daniel Vetter @ 2021-04-21 11:51 UTC (permalink / raw)
  To: Maarten Lankhorst; +Cc: Daniel Vetter, intel-gfx, dri-devel

On Tue, Apr 20, 2021 at 12:34:50PM +0200, Maarten Lankhorst wrote:
> Commit e3793468b466 ("drm/i915: Use the async worker to avoid reclaim
> tainting the ggtt->mutex") moves the vma binding to dma_fence_work,
> but dma_fence_work has has signalling fence semantics.
> 
> On braswell, we can call stop_machine inside fence_work, causing a splat
> because memory allocation inside stop_machine is allowed.
> 
> This patch does not fix that lockdep splat yet, but will allow the next
> patch to remove it.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Thomas pointed out that this removes pipelined pte writing for both ggtt
and ppgtt, and that's a bit much. So need to retract my ack here.
-Daniel

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c |   3 -
>  drivers/gpu/drm/i915/gt/gen6_ppgtt.c       |   1 -
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c       |   1 -
>  drivers/gpu/drm/i915/gt/intel_ggtt.c       |   4 -
>  drivers/gpu/drm/i915/gt/intel_gtt.h        |   2 -
>  drivers/gpu/drm/i915/i915_gem.c            |   6 -
>  drivers/gpu/drm/i915/i915_vma.c            | 174 +++------------------
>  drivers/gpu/drm/i915/i915_vma.h            |   5 +-
>  drivers/gpu/drm/i915/i915_vma_types.h      |   3 -
>  9 files changed, 21 insertions(+), 178 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index b0597de206de..ec04df0a3b89 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -505,9 +505,6 @@ static void dbg_poison(struct i915_ggtt *ggtt,
>  	if (!drm_mm_node_allocated(&ggtt->error_capture))
>  		return;
>  
> -	if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND)
> -		return; /* beware stop_machine() inversion */
> -
>  	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
>  
>  	mutex_lock(&ggtt->error_mutex);
> diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> index e08dff376339..0c5a9a767849 100644
> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> @@ -436,7 +436,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
>  	ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
>  	ppgtt->base.vm.top = 1;
>  
> -	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>  	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
>  	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
>  	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 176c19633412..92f8a23e66cc 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -736,7 +736,6 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
>  			goto err_free_pd;
>  	}
>  
> -	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>  	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
>  	ppgtt->vm.clear_range = gen8_ppgtt_clear;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 670c1271e7d5..e1ec6edae1fb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -127,7 +127,6 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
>  
>  	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
>  		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> -		i915_vma_wait_for_bind(vma);
>  
>  		if (i915_vma_is_pinned(vma))
>  			continue;
> @@ -671,7 +670,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
>  	ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
>  
>  	ggtt->alias = ppgtt;
> -	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
>  
>  	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
>  	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
> @@ -911,8 +909,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  	    IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
>  		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
>  		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
> -		ggtt->vm.bind_async_flags =
> -			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
>  	}
>  
>  	ggtt->invalidate = gen8_ggtt_invalidate;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index e67e34e17913..d9d2ca8b4b61 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -230,8 +230,6 @@ struct i915_address_space {
>  	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
>  	u64 reserved;		/* size addr space reserved */
>  
> -	unsigned int bind_async_flags;
> -
>  	/*
>  	 * Each active user context has its own address space (in full-ppgtt).
>  	 * Since the vm may be shared between multiple contexts, we count how
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b23f58e94cfb..4639c47c038b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -946,12 +946,6 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
>  		mutex_unlock(&ggtt->vm.mutex);
>  	}
>  
> -	ret = i915_vma_wait_for_bind(vma);
> -	if (ret) {
> -		i915_vma_unpin(vma);
> -		return ERR_PTR(ret);
> -	}
> -
>  	return vma;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 07490db51cdc..e4b2590d41ce 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -289,79 +289,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>  	return vma;
>  }
>  
> -struct i915_vma_work {
> -	struct dma_fence_work base;
> -	struct i915_address_space *vm;
> -	struct i915_vm_pt_stash stash;
> -	struct i915_vma *vma;
> -	struct drm_i915_gem_object *pinned;
> -	struct i915_sw_dma_fence_cb cb;
> -	enum i915_cache_level cache_level;
> -	unsigned int flags;
> -};
> -
> -static int __vma_bind(struct dma_fence_work *work)
> -{
> -	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
> -	struct i915_vma *vma = vw->vma;
> -
> -	vma->ops->bind_vma(vw->vm, &vw->stash,
> -			   vma, vw->cache_level, vw->flags);
> -	return 0;
> -}
> -
> -static void __vma_release(struct dma_fence_work *work)
> -{
> -	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
> -
> -	if (vw->pinned) {
> -		__i915_gem_object_unpin_pages(vw->pinned);
> -		i915_gem_object_put(vw->pinned);
> -	}
> -
> -	i915_vm_free_pt_stash(vw->vm, &vw->stash);
> -	i915_vm_put(vw->vm);
> -}
> -
> -static const struct dma_fence_work_ops bind_ops = {
> -	.name = "bind",
> -	.work = __vma_bind,
> -	.release = __vma_release,
> -};
> -
> -struct i915_vma_work *i915_vma_work(void)
> -{
> -	struct i915_vma_work *vw;
> -
> -	vw = kzalloc(sizeof(*vw), GFP_KERNEL);
> -	if (!vw)
> -		return NULL;
> -
> -	dma_fence_work_init(&vw->base, &bind_ops);
> -	vw->base.dma.error = -EAGAIN; /* disable the worker by default */
> -
> -	return vw;
> -}
> -
> -int i915_vma_wait_for_bind(struct i915_vma *vma)
> -{
> -	int err = 0;
> -
> -	if (rcu_access_pointer(vma->active.excl.fence)) {
> -		struct dma_fence *fence;
> -
> -		rcu_read_lock();
> -		fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
> -		rcu_read_unlock();
> -		if (fence) {
> -			err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
> -			dma_fence_put(fence);
> -		}
> -	}
> -
> -	return err;
> -}
> -
>  /**
>   * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
>   * @vma: VMA to map
> @@ -375,8 +302,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
>   */
>  int i915_vma_bind(struct i915_vma *vma,
>  		  enum i915_cache_level cache_level,
> -		  u32 flags,
> -		  struct i915_vma_work *work)
> +		  u32 flags, struct i915_vm_pt_stash *stash)
>  {
>  	u32 bind_flags;
>  	u32 vma_flags;
> @@ -405,39 +331,7 @@ int i915_vma_bind(struct i915_vma *vma,
>  	GEM_BUG_ON(!vma->pages);
>  
>  	trace_i915_vma_bind(vma, bind_flags);
> -	if (work && bind_flags & vma->vm->bind_async_flags) {
> -		struct dma_fence *prev;
> -
> -		work->vma = vma;
> -		work->cache_level = cache_level;
> -		work->flags = bind_flags;
> -
> -		/*
> -		 * Note we only want to chain up to the migration fence on
> -		 * the pages (not the object itself). As we don't track that,
> -		 * yet, we have to use the exclusive fence instead.
> -		 *
> -		 * Also note that we do not want to track the async vma as
> -		 * part of the obj->resv->excl_fence as it only affects
> -		 * execution and not content or object's backing store lifetime.
> -		 */
> -		prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
> -		if (prev) {
> -			__i915_sw_fence_await_dma_fence(&work->base.chain,
> -							prev,
> -							&work->cb);
> -			dma_fence_put(prev);
> -		}
> -
> -		work->base.dma.error = 0; /* enable the queue_work() */
> -
> -		if (vma->obj) {
> -			__i915_gem_object_pin_pages(vma->obj);
> -			work->pinned = i915_gem_object_get(vma->obj);
> -		}
> -	} else {
> -		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
> -	}
> +	vma->ops->bind_vma(vma->vm, stash, vma, cache_level, bind_flags);
>  
>  	atomic_or(bind_flags, &vma->flags);
>  	return 0;
> @@ -531,9 +425,6 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
>  	if (!drm_mm_node_allocated(&vma->node))
>  		return false;
>  
> -	if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
> -		return true;
> -
>  	if (vma->node.size < size)
>  		return true;
>  
> @@ -752,7 +643,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
>  		if (unlikely(flags & ~bound))
>  			return false;
>  
> -		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
> +		if (unlikely(bound & I915_VMA_OVERFLOW))
>  			return false;
>  
>  		if (!(bound & I915_VMA_PIN_MASK))
> @@ -770,7 +661,7 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
>  	 */
>  	mutex_lock(&vma->vm->mutex);
>  	do {
> -		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
> +		if (unlikely(bound & I915_VMA_OVERFLOW)) {
>  			pinned = false;
>  			break;
>  		}
> @@ -857,10 +748,10 @@ static void vma_unbind_pages(struct i915_vma *vma)
>  int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  		    u64 size, u64 alignment, u64 flags)
>  {
> -	struct i915_vma_work *work = NULL;
>  	intel_wakeref_t wakeref = 0;
>  	unsigned int bound;
>  	int err;
> +	struct i915_vm_pt_stash stash = {};
>  
>  #ifdef CONFIG_PROVE_LOCKING
>  	if (debug_locks && !WARN_ON(!ww) && vma->resv)
> @@ -883,33 +774,21 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	if (flags & PIN_GLOBAL)
>  		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
>  
> -	if (flags & vma->vm->bind_async_flags) {
> -		/* lock VM */
> -		err = i915_vm_lock_objects(vma->vm, ww);
> +	if (vma->vm->allocate_va_range) {
> +		err = i915_vm_alloc_pt_stash(vma->vm,
> +					     &stash,
> +					     vma->size);
>  		if (err)
>  			goto err_rpm;
>  
> -		work = i915_vma_work();
> -		if (!work) {
> -			err = -ENOMEM;
> +		err = i915_vm_lock_objects(vma->vm, ww);
> +		if (err)
>  			goto err_rpm;
> -		}
>  
> -		work->vm = i915_vm_get(vma->vm);
> -
> -		/* Allocate enough page directories to used PTE */
> -		if (vma->vm->allocate_va_range) {
> -			err = i915_vm_alloc_pt_stash(vma->vm,
> -						     &work->stash,
> -						     vma->size);
> -			if (err)
> -				goto err_fence;
> -
> -			err = i915_vm_pin_pt_stash(vma->vm,
> -						   &work->stash);
> -			if (err)
> -				goto err_fence;
> -		}
> +		err = i915_vm_pin_pt_stash(vma->vm,
> +					   &stash);
> +		if (err)
> +			goto err_rpm;
>  	}
>  
>  	/*
> @@ -932,7 +811,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	err = mutex_lock_interruptible_nested(&vma->vm->mutex,
>  					      !(flags & PIN_GLOBAL));
>  	if (err)
> -		goto err_fence;
> +		goto err_rpm;
>  
>  	/* No more allocations allowed now we hold vm->mutex */
>  
> @@ -942,11 +821,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	}
>  
>  	bound = atomic_read(&vma->flags);
> -	if (unlikely(bound & I915_VMA_ERROR)) {
> -		err = -ENOMEM;
> -		goto err_unlock;
> -	}
> -
>  	if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
>  		err = -EAGAIN; /* pins are meant to be fairly temporary */
>  		goto err_unlock;
> @@ -973,7 +847,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	GEM_BUG_ON(!vma->pages);
>  	err = i915_vma_bind(vma,
>  			    vma->obj ? vma->obj->cache_level : 0,
> -			    flags, work);
> +			    flags, &stash);
>  	if (err)
>  		goto err_remove;
>  
> @@ -996,10 +870,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	i915_active_release(&vma->active);
>  err_unlock:
>  	mutex_unlock(&vma->vm->mutex);
> -err_fence:
> -	if (work)
> -		dma_fence_work_commit_imm(&work->base);
>  err_rpm:
> +	i915_vm_free_pt_stash(vma->vm, &stash);
>  	if (wakeref)
>  		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
>  	vma_put_pages(vma);
> @@ -1034,14 +906,8 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  			err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
>  		else
>  			err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
> -		if (err != -ENOSPC) {
> -			if (!err) {
> -				err = i915_vma_wait_for_bind(vma);
> -				if (err)
> -					i915_vma_unpin(vma);
> -			}
> +		if (err != -ENOSPC)
>  			return err;
> -		}
>  
>  		/* Unlike i915_vma_pin, we don't take no for an answer! */
>  		flush_idle_contexts(vm->gt);
> @@ -1306,7 +1172,7 @@ void __i915_vma_evict(struct i915_vma *vma)
>  		trace_i915_vma_unbind(vma);
>  		vma->ops->unbind_vma(vma->vm, vma);
>  	}
> -	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
> +	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_GGTT_WRITE),
>  		   &vma->flags);
>  
>  	i915_vma_detach(vma);
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 8df784a026d2..d1d0fc76ef99 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -195,11 +195,10 @@ i915_vma_compare(struct i915_vma *vma,
>  	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
>  }
>  
> -struct i915_vma_work *i915_vma_work(void);
>  int i915_vma_bind(struct i915_vma *vma,
>  		  enum i915_cache_level cache_level,
>  		  u32 flags,
> -		  struct i915_vma_work *work);
> +		  struct i915_vm_pt_stash *stash);
>  
>  bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
>  bool i915_vma_misplaced(const struct i915_vma *vma,
> @@ -418,8 +417,6 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
>  void i915_vma_make_shrinkable(struct i915_vma *vma);
>  void i915_vma_make_purgeable(struct i915_vma *vma);
>  
> -int i915_vma_wait_for_bind(struct i915_vma *vma);
> -
>  static inline int i915_vma_sync(struct i915_vma *vma)
>  {
>  	/* Wait for the asynchronous bindings and pending GPU reads */
> diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
> index 6b1bfa230b82..82868a755a96 100644
> --- a/drivers/gpu/drm/i915/i915_vma_types.h
> +++ b/drivers/gpu/drm/i915/i915_vma_types.h
> @@ -240,9 +240,6 @@ struct i915_vma {
>  
>  #define I915_VMA_ALLOC_BIT	12
>  
> -#define I915_VMA_ERROR_BIT	13
> -#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
> -
>  #define I915_VMA_GGTT_BIT	14
>  #define I915_VMA_CAN_FENCE_BIT	15
>  #define I915_VMA_USERFAULT_BIT	16
> -- 
> 2.31.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-04-21 11:51 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-20 10:34 [PATCH 1/2] drm/i915: Remove asynchronous vma binding Maarten Lankhorst
2021-04-20 10:34 ` [Intel-gfx] " Maarten Lankhorst
2021-04-20 10:34 ` [PATCH 2/2] drm/i915: Use trylock in shrinker for ggtt on bsw vt-d and bxt, v2 Maarten Lankhorst
2021-04-20 10:34   ` [Intel-gfx] " Maarten Lankhorst
2021-04-20 12:14 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] drm/i915: Remove asynchronous vma binding Patchwork
2021-04-20 12:19 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2021-04-20 12:43 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2021-04-21 11:51 ` [PATCH 1/2] " Daniel Vetter
2021-04-21 11:51   ` [Intel-gfx] " Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.