All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
@ 2019-04-25  9:19 Chris Wilson
  2019-04-25  9:19 ` [PATCH 02/45] drm/i915/gvt: Pin the per-engine GVT shadow contexts Chris Wilson
                   ` (51 more replies)
  0 siblings, 52 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Currently there is an underlying assumption that i915_request_unsubmit()
is synchronous wrt the GPU -- that is the request is no longer in flight
as we remove it. In the near future that may change, and this may upset
our signaling as we can process an interrupt for that request while it
is no longer in flight.

CPU0					CPU1
intel_engine_breadcrumbs_irq
(queue request completion)
					i915_request_cancel_signaling
...					...
					i915_request_enable_signaling
dma_fence_signal

Hence in the time it took us to drop the lock to signal the request, a
preemption event may have occurred and re-queued the request. In the
process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
so reused the rq->signal_link that was in use on CPU0, leading to bad
pointer chasing in intel_engine_breadcrumbs_irq.

A related issue was that if someone started listening for a signal on a
completed but no longer in-flight request, we missed the opportunity to
immediately signal that request.

Furthermore, as intel_contexts may be immediately released during
request retirement, in order to be entirely sure that
intel_engine_breadcrumbs_irq may no longer dereference the intel_context
(ce->signals and ce->signal_link), we must wait for irq spinlock.

In order to prevent the race, we use a bit in the fence.flags to signal
the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
quickly signals to any outside observer that the fence is indeed signaled.

Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/dma-buf/dma-fence.c                 |  1 +
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 58 +++++++++++++--------
 drivers/gpu/drm/i915/i915_request.c         |  1 +
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 3aa8733f832a..9bf06042619a 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -29,6 +29,7 @@
 
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
 
 static DEFINE_SPINLOCK(dma_fence_stub_lock);
 static struct dma_fence dma_fence_stub;
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 3cbffd400b1b..4283224249d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kthread.h>
+#include <trace/events/dma_fence.h>
 #include <uapi/linux/sched/types.h>
 
 #include "i915_drv.h"
@@ -83,6 +84,7 @@ static inline bool __request_completed(const struct i915_request *rq)
 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	const ktime_t timestamp = ktime_get();
 	struct intel_context *ce, *cn;
 	struct list_head *pos, *next;
 	LIST_HEAD(signal);
@@ -104,6 +106,11 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 
 			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
 					     &rq->fence.flags));
+			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+			if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+					     &rq->fence.flags))
+				continue;
 
 			/*
 			 * Queue for execution after dropping the signaling
@@ -111,14 +118,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 			 * more signalers to the same context or engine.
 			 */
 			i915_request_get(rq);
-
-			/*
-			 * We may race with direct invocation of
-			 * dma_fence_signal(), e.g. i915_request_retire(),
-			 * so we need to acquire our reference to the request
-			 * before we cancel the breadcrumb.
-			 */
-			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 			list_add_tail(&rq->signal_link, &signal);
 		}
 
@@ -140,8 +139,21 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 	list_for_each_safe(pos, next, &signal) {
 		struct i915_request *rq =
 			list_entry(pos, typeof(*rq), signal_link);
+		struct dma_fence_cb *cur, *tmp;
+
+		trace_dma_fence_signaled(&rq->fence);
+
+		rq->fence.timestamp = timestamp;
+		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &rq->fence.flags);
+
+		spin_lock(&rq->lock);
+		list_for_each_entry_safe(cur, tmp, &rq->fence.cb_list, node) {
+			INIT_LIST_HEAD(&cur->node);
+			cur->func(&rq->fence, cur);
+		}
+		INIT_LIST_HEAD(&rq->fence.cb_list);
+		spin_unlock(&rq->lock);
 
-		dma_fence_signal(&rq->fence);
 		i915_request_put(rq);
 	}
 }
@@ -243,19 +255,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
 {
-	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-
-	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
-
-	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
-		return true;
+	lockdep_assert_held(&rq->lock);
+	lockdep_assert_irqs_disabled();
 
-	spin_lock(&b->irq_lock);
-	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
-	    !__request_completed(rq)) {
+	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
+		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 		struct intel_context *ce = rq->hw_context;
 		struct list_head *pos;
 
+		spin_lock(&b->irq_lock);
+		GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
 		__intel_breadcrumbs_arm_irq(b);
 
 		/*
@@ -284,8 +294,8 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 			list_move_tail(&ce->signal_link, &b->signalers);
 
 		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+		spin_unlock(&b->irq_lock);
 	}
-	spin_unlock(&b->irq_lock);
 
 	return !__request_completed(rq);
 }
@@ -294,9 +304,15 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 {
 	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
-	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
-		return;
+	lockdep_assert_held(&rq->lock);
+	lockdep_assert_irqs_disabled();
 
+	/*
+	 * We must wait for b->irq_lock so that we know the interrupt handler
+	 * has released its reference to the intel_context and has completed
+	 * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
+	 * required).
+	 */
 	spin_lock(&b->irq_lock);
 	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
 		struct intel_context *ce = rq->hw_context;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 5869c37a35e1..fa9037173f1d 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -417,6 +417,7 @@ void __i915_request_submit(struct i915_request *request)
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
 	    !i915_request_enable_breadcrumb(request))
 		intel_engine_queue_breadcrumbs(engine);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 02/45] drm/i915/gvt: Pin the per-engine GVT shadow contexts
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 03/45] drm/i915: Export intel_context_instance() Chris Wilson
                   ` (50 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Our eventual goal is to rid request construction of struct_mutex, with
the short term step of lifting the struct_mutex requirements into the
higher levels (i.e. the caller must ensure that the context is already
pinned into the GTT). In this patch, we pin GVT's shadow context upon
allocation and so keep them pinned into the GGTT for as long as the
virtual machine is alive, and so we can use the simpler request
construction path safe in the knowledge that the hard work is already
done.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h          |   2 +-
 drivers/gpu/drm/i915/gvt/kvmgt.c        |   2 +-
 drivers/gpu/drm/i915/gvt/mmio_context.c |   3 +-
 drivers/gpu/drm/i915/gvt/scheduler.c    | 137 ++++++++++++------------
 4 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index f5a328b5290a..b54f2bdc13a4 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -149,9 +149,9 @@ struct intel_vgpu_submission_ops {
 struct intel_vgpu_submission {
 	struct intel_vgpu_execlist execlist[I915_NUM_ENGINES];
 	struct list_head workload_q_head[I915_NUM_ENGINES];
+	struct intel_context *shadow[I915_NUM_ENGINES];
 	struct kmem_cache *workloads;
 	atomic_t running_workload_num;
-	struct i915_gem_context *shadow_ctx;
 	union {
 		u64 i915_context_pml4;
 		u64 i915_context_pdps[GEN8_3LVL_PDPES];
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index a68addf95c23..144301b778df 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1576,7 +1576,7 @@ hw_id_show(struct device *dev, struct device_attribute *attr,
 		struct intel_vgpu *vgpu = (struct intel_vgpu *)
 			mdev_get_drvdata(mdev);
 		return sprintf(buf, "%u\n",
-			       vgpu->submission.shadow_ctx->hw_id);
+			       vgpu->submission.shadow[0]->gem_context->hw_id);
 	}
 	return sprintf(buf, "\n");
 }
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index e7e14c842be4..b8823495022b 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -495,8 +495,7 @@ static void switch_mmio(struct intel_vgpu *pre,
 			 * itself.
 			 */
 			if (mmio->in_context &&
-			    !is_inhibit_context(intel_context_lookup(s->shadow_ctx,
-								     dev_priv->engine[ring_id])))
+			    !is_inhibit_context(s->shadow[ring_id]))
 				continue;
 
 			if (mmio->mask)
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 8998fa5ab198..40d9f549a0cd 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -36,6 +36,7 @@
 #include <linux/kthread.h>
 
 #include "i915_drv.h"
+#include "i915_gem_pm.h"
 #include "gvt.h"
 
 #define RING_CTX_OFF(x) \
@@ -277,18 +278,23 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static void shadow_context_descriptor_update(struct intel_context *ce)
+static void
+shadow_context_descriptor_update(struct intel_context *ce,
+				 struct intel_vgpu_workload *workload)
 {
-	u64 desc = 0;
-
-	desc = ce->lrc_desc;
+	u64 desc = ce->lrc_desc;
 
-	/* Update bits 0-11 of the context descriptor which includes flags
+	/*
+	 * Update bits 0-11 of the context descriptor which includes flags
 	 * like GEN8_CTX_* cached in desc_template
 	 */
 	desc &= U64_MAX << 12;
 	desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
 
+	desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
+	desc |= workload->ctx_desc.addressing_mode <<
+		GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
 	ce->lrc_desc = desc;
 }
 
@@ -365,26 +371,22 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
 	struct i915_request *rq;
-	int ret = 0;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	if (workload->req)
-		goto out;
+		return 0;
 
-	rq = i915_request_alloc(engine, shadow_ctx);
+	rq = i915_request_create(s->shadow[workload->ring_id]);
 	if (IS_ERR(rq)) {
 		gvt_vgpu_err("fail to allocate gem request\n");
-		ret = PTR_ERR(rq);
-		goto out;
+		return PTR_ERR(rq);
 	}
+
 	workload->req = i915_request_get(rq);
-out:
-	return ret;
+	return 0;
 }
 
 /**
@@ -399,10 +401,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
-	struct intel_context *ce;
 	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -410,29 +409,13 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	if (workload->shadow)
 		return 0;
 
-	/* pin shadow context by gvt even the shadow context will be pinned
-	 * when i915 alloc request. That is because gvt will update the guest
-	 * context from shadow context when workload is completed, and at that
-	 * moment, i915 may already unpined the shadow context to make the
-	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
-	 * the guest context, gvt can unpin the shadow_ctx safely.
-	 */
-	ce = intel_context_pin(shadow_ctx, engine);
-	if (IS_ERR(ce)) {
-		gvt_vgpu_err("fail to pin shadow context\n");
-		return PTR_ERR(ce);
-	}
-
-	shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
-	shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
-				    GEN8_CTX_ADDRESSING_MODE_SHIFT;
-
 	if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated))
-		shadow_context_descriptor_update(ce);
+		shadow_context_descriptor_update(s->shadow[workload->ring_id],
+						 workload);
 
 	ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
 	if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) {
 		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
@@ -444,8 +427,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	return 0;
 err_shadow:
 	release_shadow_wa_ctx(&workload->wa_ctx);
-err_unpin:
-	intel_context_unpin(ce);
 	return ret;
 }
 
@@ -672,7 +653,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct i915_request *rq;
 	int ring_id = workload->ring_id;
 	int ret;
@@ -683,7 +663,8 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	mutex_lock(&vgpu->vgpu_lock);
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
-	ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
+	ret = set_context_ppgtt_from_shadow(workload,
+					    s->shadow[ring_id]->gem_context);
 	if (ret < 0) {
 		gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
 		goto err_req;
@@ -994,8 +975,6 @@ static int workload_thread(void *priv)
 				workload->ring_id, workload,
 				workload->vgpu->id);
 
-		intel_runtime_pm_get(gvt->dev_priv);
-
 		gvt_dbg_sched("ring id %d will dispatch workload %p\n",
 				workload->ring_id, workload);
 
@@ -1025,7 +1004,6 @@ static int workload_thread(void *priv)
 			intel_uncore_forcewake_put(&gvt->dev_priv->uncore,
 					FORCEWAKE_ALL);
 
-		intel_runtime_pm_put_unchecked(gvt->dev_priv);
 		if (ret && (vgpu_is_vm_unhealthy(ret)))
 			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
 	}
@@ -1108,17 +1086,17 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
 }
 
 static void
-i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
+i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
+				struct i915_hw_ppgtt *ppgtt)
 {
-	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
 	int i;
 
-	if (i915_vm_is_4lvl(&i915_ppgtt->vm)) {
-		px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4;
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		px_dma(&ppgtt->pml4) = s->i915_context_pml4;
 	} else {
 		for (i = 0; i < GEN8_3LVL_PDPES; i++)
-			px_dma(i915_ppgtt->pdp.page_directory[i]) =
-						s->i915_context_pdps[i];
+			px_dma(ppgtt->pdp.page_directory[i]) =
+				s->i915_context_pdps[i];
 	}
 }
 
@@ -1132,10 +1110,15 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
 void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
 	intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
-	i915_context_ppgtt_root_restore(s);
-	i915_gem_context_put(s->shadow_ctx);
+
+	i915_context_ppgtt_root_restore(s, s->shadow[0]->gem_context->ppgtt);
+	for_each_engine(engine, vgpu->gvt->dev_priv, id)
+		intel_context_unpin(s->shadow[id]);
+
 	kmem_cache_destroy(s->workloads);
 }
 
@@ -1161,17 +1144,17 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
 }
 
 static void
-i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
+i915_context_ppgtt_root_save(struct intel_vgpu_submission *s,
+			     struct i915_hw_ppgtt *ppgtt)
 {
-	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
 	int i;
 
-	if (i915_vm_is_4lvl(&i915_ppgtt->vm))
-		s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4);
-	else {
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		s->i915_context_pml4 = px_dma(&ppgtt->pml4);
+	} else {
 		for (i = 0; i < GEN8_3LVL_PDPES; i++)
 			s->i915_context_pdps[i] =
-				px_dma(i915_ppgtt->pdp.page_directory[i]);
+				px_dma(ppgtt->pdp.page_directory[i]);
 	}
 }
 
@@ -1188,16 +1171,31 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
 int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	enum intel_engine_id i;
 	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id i;
 	int ret;
 
-	s->shadow_ctx = i915_gem_context_create_gvt(
-			&vgpu->gvt->dev_priv->drm);
-	if (IS_ERR(s->shadow_ctx))
-		return PTR_ERR(s->shadow_ctx);
+	ctx = i915_gem_context_create_gvt(&vgpu->gvt->dev_priv->drm);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	i915_context_ppgtt_root_save(s, ctx->ppgtt);
+
+	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
+		struct intel_context *ce;
+
+		INIT_LIST_HEAD(&s->workload_q_head[i]);
+		s->shadow[i] = ERR_PTR(-EINVAL);
 
-	i915_context_ppgtt_root_save(s);
+		ce = intel_context_pin(ctx, engine);
+		if (IS_ERR(ce)) {
+			ret = PTR_ERR(ce);
+			goto out_shadow_ctx;
+		}
+
+		s->shadow[i] = ce;
+	}
 
 	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
 
@@ -1213,16 +1211,21 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		goto out_shadow_ctx;
 	}
 
-	for_each_engine(engine, vgpu->gvt->dev_priv, i)
-		INIT_LIST_HEAD(&s->workload_q_head[i]);
-
 	atomic_set(&s->running_workload_num, 0);
 	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
 
+	i915_gem_context_put(ctx);
 	return 0;
 
 out_shadow_ctx:
-	i915_gem_context_put(s->shadow_ctx);
+	i915_context_ppgtt_root_restore(s, ctx->ppgtt);
+	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
+		if (IS_ERR(s->shadow[i]))
+			break;
+
+		intel_context_unpin(s->shadow[i]);
+	}
+	i915_gem_context_put(ctx);
 	return ret;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 03/45] drm/i915: Export intel_context_instance()
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
  2019-04-25  9:19 ` [PATCH 02/45] drm/i915/gvt: Pin the per-engine GVT shadow contexts Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 04/45] drm/i915/selftests: Use the real kernel context for sseu isolation tests Chris Wilson
                   ` (49 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

We want to pass in a intel_context into intel_context_pin() and that
requires us to first be able to lookup the intel_context!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c    | 37 +++++++++++-----------
 drivers/gpu/drm/i915/gt/intel_context.h    | 19 +++++++----
 drivers/gpu/drm/i915/gt/intel_engine_cs.c  |  8 ++++-
 drivers/gpu/drm/i915/gt/mock_engine.c      |  8 ++++-
 drivers/gpu/drm/i915/gvt/scheduler.c       |  7 +++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++--
 drivers/gpu/drm/i915/i915_perf.c           | 21 ++++++++----
 drivers/gpu/drm/i915/i915_request.c        | 11 ++++++-
 8 files changed, 83 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 298e463ad082..8b386202b374 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -104,7 +104,7 @@ void __intel_context_remove(struct intel_context *ce)
 	spin_unlock(&ctx->hw_contexts_lock);
 }
 
-static struct intel_context *
+struct intel_context *
 intel_context_instance(struct i915_gem_context *ctx,
 		       struct intel_engine_cs *engine)
 {
@@ -112,7 +112,7 @@ intel_context_instance(struct i915_gem_context *ctx,
 
 	ce = intel_context_lookup(ctx, engine);
 	if (likely(ce))
-		return ce;
+		return intel_context_get(ce);
 
 	ce = intel_context_alloc();
 	if (!ce)
@@ -125,7 +125,7 @@ intel_context_instance(struct i915_gem_context *ctx,
 		intel_context_free(ce);
 
 	GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
-	return pos;
+	return intel_context_get(pos);
 }
 
 struct intel_context *
@@ -139,30 +139,30 @@ intel_context_pin_lock(struct i915_gem_context *ctx,
 	if (IS_ERR(ce))
 		return ce;
 
-	if (mutex_lock_interruptible(&ce->pin_mutex))
+	if (mutex_lock_interruptible(&ce->pin_mutex)) {
+		intel_context_put(ce);
 		return ERR_PTR(-EINTR);
+	}
 
 	return ce;
 }
 
-struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx,
-		  struct intel_engine_cs *engine)
+void intel_context_pin_unlock(struct intel_context *ce)
+	__releases(ce->pin_mutex)
 {
-	struct intel_context *ce;
-	int err;
-
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
+	mutex_unlock(&ce->pin_mutex);
+	intel_context_put(ce);
+}
 
-	if (likely(atomic_inc_not_zero(&ce->pin_count)))
-		return ce;
+int __intel_context_do_pin(struct intel_context *ce)
+{
+	int err;
 
 	if (mutex_lock_interruptible(&ce->pin_mutex))
-		return ERR_PTR(-EINTR);
+		return -EINTR;
 
 	if (likely(!atomic_read(&ce->pin_count))) {
+		struct i915_gem_context *ctx = ce->gem_context;
 		intel_wakeref_t wakeref;
 
 		err = 0;
@@ -172,7 +172,6 @@ intel_context_pin(struct i915_gem_context *ctx,
 			goto err;
 
 		i915_gem_context_get(ctx);
-		GEM_BUG_ON(ce->gem_context != ctx);
 
 		mutex_lock(&ctx->mutex);
 		list_add(&ce->active_link, &ctx->active_engines);
@@ -186,11 +185,11 @@ intel_context_pin(struct i915_gem_context *ctx,
 	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
 
 	mutex_unlock(&ce->pin_mutex);
-	return ce;
+	return 0;
 
 err:
 	mutex_unlock(&ce->pin_mutex);
-	return ERR_PTR(err);
+	return err;
 }
 
 void intel_context_unpin(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 60379eb37949..b9a574587eb3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -49,11 +49,7 @@ intel_context_is_pinned(struct intel_context *ce)
 	return atomic_read(&ce->pin_count);
 }
 
-static inline void intel_context_pin_unlock(struct intel_context *ce)
-__releases(ce->pin_mutex)
-{
-	mutex_unlock(&ce->pin_mutex);
-}
+void intel_context_pin_unlock(struct intel_context *ce);
 
 struct intel_context *
 __intel_context_insert(struct i915_gem_context *ctx,
@@ -63,7 +59,18 @@ void
 __intel_context_remove(struct intel_context *ce);
 
 struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
+intel_context_instance(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine);
+
+int __intel_context_do_pin(struct intel_context *ce);
+
+static inline int intel_context_pin(struct intel_context *ce)
+{
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return 0;
+
+	return __intel_context_do_pin(ce);
+}
 
 static inline void __intel_context_pin(struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index cbebe812b317..5d26791f8d9e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -697,11 +697,17 @@ static int pin_context(struct i915_gem_context *ctx,
 		       struct intel_context **out)
 {
 	struct intel_context *ce;
+	int err;
 
-	ce = intel_context_pin(ctx, engine);
+	ce = intel_context_instance(ctx, engine);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
+	err = intel_context_pin(ce);
+	intel_context_put(ce);
+	if (err)
+		return err;
+
 	*out = ce;
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index a97a0ab35703..21f413ff5b8e 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -239,6 +239,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    int id)
 {
 	struct mock_engine *engine;
+	int err;
 
 	GEM_BUG_ON(id >= I915_NUM_ENGINES);
 
@@ -278,10 +279,15 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&engine->hw_queue);
 
 	engine->base.kernel_context =
-		intel_context_pin(i915->kernel_context, &engine->base);
+		intel_context_instance(i915->kernel_context, &engine->base);
 	if (IS_ERR(engine->base.kernel_context))
 		goto err_breadcrumbs;
 
+	err = intel_context_pin(engine->base.kernel_context);
+	intel_context_put(engine->base.kernel_context);
+	if (err)
+		goto err_breadcrumbs;
+
 	return &engine->base;
 
 err_breadcrumbs:
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 40d9f549a0cd..606fc2713240 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1188,12 +1188,17 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		INIT_LIST_HEAD(&s->workload_q_head[i]);
 		s->shadow[i] = ERR_PTR(-EINVAL);
 
-		ce = intel_context_pin(ctx, engine);
+		ce = intel_context_instance(ctx, engine);
 		if (IS_ERR(ce)) {
 			ret = PTR_ERR(ce);
 			goto out_shadow_ctx;
 		}
 
+		ret = intel_context_pin(ce);
+		intel_context_put(ce);
+		if (ret)
+			goto out_shadow_ctx;
+
 		s->shadow[i] = ce;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 794af8edc6a2..166a33c0d3ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2100,14 +2100,19 @@ static int eb_pin_context(struct i915_execbuffer *eb,
 	if (err)
 		return err;
 
+	ce = intel_context_instance(eb->gem_context, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
 	/*
 	 * Pinning the contexts may generate requests in order to acquire
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ce = intel_context_pin(eb->gem_context, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
+	err = intel_context_pin(ce);
+	intel_context_put(ce);
+	if (err)
+		return err;
 
 	eb->engine = engine;
 	eb->context = ce;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 328a740e72cb..afaeabe5e531 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1205,11 +1205,17 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
 {
 	struct intel_engine_cs *engine = i915->engine[RCS0];
 	struct intel_context *ce;
-	int ret;
+	int err;
 
-	ret = i915_mutex_lock_interruptible(&i915->drm);
-	if (ret)
-		return ERR_PTR(ret);
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
+
+	err = i915_mutex_lock_interruptible(&i915->drm);
+	if (err) {
+		intel_context_put(ce);
+		return ERR_PTR(err);
+	}
 
 	/*
 	 * As the ID is the gtt offset of the context's vma we
@@ -1217,10 +1223,11 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
 	 *
 	 * NB: implied RCS engine...
 	 */
-	ce = intel_context_pin(ctx, engine);
+	err = intel_context_pin(ce);
 	mutex_unlock(&i915->drm.struct_mutex);
-	if (IS_ERR(ce))
-		return ce;
+	intel_context_put(ce);
+	if (err)
+		return ERR_PTR(err);
 
 	i915->perf.oa.pinned_ctx = ce;
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index fa9037173f1d..f99e8d493d9c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -786,6 +786,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_context *ce;
 	struct i915_request *rq;
+	int err;
 
 	/*
 	 * Preempt contexts are reserved for exclusive use to inject a
@@ -799,13 +800,21 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ce = intel_context_pin(ctx, engine);
+	ce = intel_context_instance(ctx, engine);
 	if (IS_ERR(ce))
 		return ERR_CAST(ce);
 
+	err = intel_context_pin(ce);
+	if (err) {
+		rq = ERR_PTR(err);
+		goto err_put;
+	}
+
 	rq = i915_request_create(ce);
 	intel_context_unpin(ce);
 
+err_put:
+	intel_context_put(ce);
 	return rq;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 04/45] drm/i915/selftests: Use the real kernel context for sseu isolation tests
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
  2019-04-25  9:19 ` [PATCH 02/45] drm/i915/gvt: Pin the per-engine GVT shadow contexts Chris Wilson
  2019-04-25  9:19 ` [PATCH 03/45] drm/i915: Export intel_context_instance() Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 05/45] drm/i915/selftests: Pass around intel_context for sseu Chris Wilson
                   ` (48 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Simply the setup slightly for the sseu selftests to use the actual
kernel_context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/selftests/i915_gem_context.c   | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 71d896bbade2..807644ae6877 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -957,7 +957,6 @@ __sseu_finish(struct drm_i915_private *i915,
 	      const char *name,
 	      unsigned int flags,
 	      struct i915_gem_context *ctx,
-	      struct i915_gem_context *kctx,
 	      struct intel_engine_cs *engine,
 	      struct drm_i915_gem_object *obj,
 	      unsigned int expected,
@@ -979,7 +978,8 @@ __sseu_finish(struct drm_i915_private *i915,
 	if (ret)
 		goto out;
 
-	ret = __read_slice_count(i915, kctx, engine, obj, NULL, &rpcs);
+	ret = __read_slice_count(i915, i915->kernel_context, engine, obj,
+				 NULL, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
 
 out:
@@ -1011,22 +1011,17 @@ __sseu_test(struct drm_i915_private *i915,
 	    struct intel_sseu sseu)
 {
 	struct igt_spinner *spin = NULL;
-	struct i915_gem_context *kctx;
 	int ret;
 
-	kctx = kernel_context(i915);
-	if (IS_ERR(kctx))
-		return PTR_ERR(kctx);
-
 	ret = __sseu_prepare(i915, name, flags, ctx, engine, &spin);
 	if (ret)
-		goto out_context;
+		return ret;
 
 	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
 	if (ret)
 		goto out_spin;
 
-	ret = __sseu_finish(i915, name, flags, ctx, kctx, engine, obj,
+	ret = __sseu_finish(i915, name, flags, ctx, engine, obj,
 			    hweight32(sseu.slice_mask), spin);
 
 out_spin:
@@ -1035,10 +1030,6 @@ __sseu_test(struct drm_i915_private *i915,
 		igt_spinner_fini(spin);
 		kfree(spin);
 	}
-
-out_context:
-	kernel_context_close(kctx);
-
 	return ret;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 05/45] drm/i915/selftests: Pass around intel_context for sseu
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (2 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 04/45] drm/i915/selftests: Use the real kernel context for sseu isolation tests Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 06/45] drm/i915: Pass intel_context to intel_context_pin_lock() Chris Wilson
                   ` (47 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Combine the (i915_gem_context, intel_engine) into a single parameter,
the intel_context for convenience and later simplification.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/selftests/i915_gem_context.c | 74 +++++++++++--------
 1 file changed, 44 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 807644ae6877..8e2a94333559 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -755,8 +755,7 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
 
 static int
 emit_rpcs_query(struct drm_i915_gem_object *obj,
-		struct i915_gem_context *ctx,
-		struct intel_engine_cs *engine,
+		struct intel_context *ce,
 		struct i915_request **rq_out)
 {
 	struct i915_request *rq;
@@ -764,9 +763,9 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int err;
 
-	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
 
-	vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
+	vma = i915_vma_instance(obj, &ce->gem_context->ppgtt->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
@@ -784,13 +783,15 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 		goto err_vma;
 	}
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = i915_request_create(ce);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
 	}
 
-	err = engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0);
+	err = rq->engine->emit_bb_start(rq,
+					batch->node.start, batch->node.size,
+					0);
 	if (err)
 		goto err_request;
 
@@ -834,8 +835,7 @@ static int
 __sseu_prepare(struct drm_i915_private *i915,
 	       const char *name,
 	       unsigned int flags,
-	       struct i915_gem_context *ctx,
-	       struct intel_engine_cs *engine,
+	       struct intel_context *ce,
 	       struct igt_spinner **spin)
 {
 	struct i915_request *rq;
@@ -853,7 +853,10 @@ __sseu_prepare(struct drm_i915_private *i915,
 	if (ret)
 		goto err_free;
 
-	rq = igt_spinner_create_request(*spin, ctx, engine, MI_NOOP);
+	rq = igt_spinner_create_request(*spin,
+					ce->gem_context,
+					ce->engine,
+					MI_NOOP);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 		goto err_fini;
@@ -880,8 +883,7 @@ __sseu_prepare(struct drm_i915_private *i915,
 
 static int
 __read_slice_count(struct drm_i915_private *i915,
-		   struct i915_gem_context *ctx,
-		   struct intel_engine_cs *engine,
+		   struct intel_context *ce,
 		   struct drm_i915_gem_object *obj,
 		   struct igt_spinner *spin,
 		   u32 *rpcs)
@@ -892,7 +894,7 @@ __read_slice_count(struct drm_i915_private *i915,
 	u32 *buf, val;
 	long ret;
 
-	ret = emit_rpcs_query(obj, ctx, engine, &rq);
+	ret = emit_rpcs_query(obj, ce, &rq);
 	if (ret)
 		return ret;
 
@@ -956,29 +958,28 @@ static int
 __sseu_finish(struct drm_i915_private *i915,
 	      const char *name,
 	      unsigned int flags,
-	      struct i915_gem_context *ctx,
-	      struct intel_engine_cs *engine,
+	      struct intel_context *ce,
 	      struct drm_i915_gem_object *obj,
 	      unsigned int expected,
 	      struct igt_spinner *spin)
 {
-	unsigned int slices = hweight32(engine->sseu.slice_mask);
+	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
 	u32 rpcs = 0;
 	int ret = 0;
 
 	if (flags & TEST_RESET) {
-		ret = i915_reset_engine(engine, "sseu");
+		ret = i915_reset_engine(ce->engine, "sseu");
 		if (ret)
 			goto out;
 	}
 
-	ret = __read_slice_count(i915, ctx, engine, obj,
+	ret = __read_slice_count(i915, ce, obj,
 				 flags & TEST_RESET ? NULL : spin, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
 	if (ret)
 		goto out;
 
-	ret = __read_slice_count(i915, i915->kernel_context, engine, obj,
+	ret = __read_slice_count(i915, ce->engine->kernel_context, obj,
 				 NULL, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
 
@@ -993,7 +994,7 @@ __sseu_finish(struct drm_i915_private *i915,
 		if (ret)
 			return ret;
 
-		ret = __read_slice_count(i915, ctx, engine, obj, NULL, &rpcs);
+		ret = __read_slice_count(i915, ce, obj, NULL, &rpcs);
 		ret = __check_rpcs(name, rpcs, ret, expected,
 				   "Context", " after idle!");
 	}
@@ -1005,23 +1006,22 @@ static int
 __sseu_test(struct drm_i915_private *i915,
 	    const char *name,
 	    unsigned int flags,
-	    struct i915_gem_context *ctx,
-	    struct intel_engine_cs *engine,
+	    struct intel_context *ce,
 	    struct drm_i915_gem_object *obj,
 	    struct intel_sseu sseu)
 {
 	struct igt_spinner *spin = NULL;
 	int ret;
 
-	ret = __sseu_prepare(i915, name, flags, ctx, engine, &spin);
+	ret = __sseu_prepare(i915, name, flags, ce, &spin);
 	if (ret)
 		return ret;
 
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = __i915_gem_context_reconfigure_sseu(ce->gem_context, ce->engine, sseu);
 	if (ret)
 		goto out_spin;
 
-	ret = __sseu_finish(i915, name, flags, ctx, engine, obj,
+	ret = __sseu_finish(i915, name, flags, ce, obj,
 			    hweight32(sseu.slice_mask), spin);
 
 out_spin:
@@ -1042,6 +1042,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	struct intel_sseu default_sseu = engine->sseu;
 	struct drm_i915_gem_object *obj;
 	struct i915_gem_context *ctx;
+	struct intel_context *ce;
 	struct intel_sseu pg_sseu;
 	intel_wakeref_t wakeref;
 	struct drm_file *file;
@@ -1093,23 +1094,33 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 
 	wakeref = intel_runtime_pm_get(i915);
 
+	ce = intel_context_instance(ctx, i915->engine[RCS0]);
+	if (IS_ERR(ce)) {
+		ret = PTR_ERR(ce);
+		goto out_rpm;
+	}
+
+	ret = intel_context_pin(ce);
+	if (ret)
+		goto out_context;
+
 	/* First set the default mask. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* Then set a power-gated configuration. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* Back to defaults. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* One last power-gated configuration for the road. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;
 
@@ -1117,9 +1128,12 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		ret = -EIO;
 
-	i915_gem_object_put(obj);
-
+	intel_context_unpin(ce);
+out_context:
+	intel_context_put(ce);
+out_rpm:
 	intel_runtime_pm_put(i915, wakeref);
+	i915_gem_object_put(obj);
 
 out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 06/45] drm/i915: Pass intel_context to intel_context_pin_lock()
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (3 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 05/45] drm/i915/selftests: Pass around intel_context for sseu Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 07/45] drm/i915: Split engine setup/init into two phases Chris Wilson
                   ` (46 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Move the intel_context_instance() to the caller so that we can decouple
ourselves from one context instance per engine.

v2: Rename pin_lock() to lock_pinned(), hopefully that is clearer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 26 ------
 drivers/gpu/drm/i915/gt/intel_context.h       | 34 +++++--
 drivers/gpu/drm/i915/i915_gem_context.c       | 92 +++++++++++--------
 .../gpu/drm/i915/selftests/i915_gem_context.c |  2 +-
 4 files changed, 82 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 8b386202b374..15ac99c5dd4a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -128,32 +128,6 @@ intel_context_instance(struct i915_gem_context *ctx,
 	return intel_context_get(pos);
 }
 
-struct intel_context *
-intel_context_pin_lock(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine)
-	__acquires(ce->pin_mutex)
-{
-	struct intel_context *ce;
-
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
-
-	if (mutex_lock_interruptible(&ce->pin_mutex)) {
-		intel_context_put(ce);
-		return ERR_PTR(-EINTR);
-	}
-
-	return ce;
-}
-
-void intel_context_pin_unlock(struct intel_context *ce)
-	__releases(ce->pin_mutex)
-{
-	mutex_unlock(&ce->pin_mutex);
-	intel_context_put(ce);
-}
-
 int __intel_context_do_pin(struct intel_context *ce)
 {
 	int err;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index b9a574587eb3..b746add6b71d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -31,25 +31,45 @@ intel_context_lookup(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine);
 
 /**
- * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
- * @ctx - the parent GEM context
- * @engine - the target HW engine
+ * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
+ * @ce - the context
  *
  * Acquire a lock on the pinned status of the HW context, such that the context
  * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
  * intel_context_is_pinned() remains stable.
  */
-struct intel_context *
-intel_context_pin_lock(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine);
+static inline int intel_context_lock_pinned(struct intel_context *ce)
+	__acquires(ce->pin_mutex)
+{
+	return mutex_lock_interruptible(&ce->pin_mutex);
+}
 
+/**
+ * intel_context_is_pinned - Reports the 'pinned' status
+ * @ce - the context
+ *
+ * While in use by the GPU, the context, along with its ring and page
+ * tables is pinned into memory and the GTT.
+ *
+ * Returns: true if the context is currently pinned for use by the GPU.
+ */
 static inline bool
 intel_context_is_pinned(struct intel_context *ce)
 {
 	return atomic_read(&ce->pin_count);
 }
 
-void intel_context_pin_unlock(struct intel_context *ce);
+/**
+ * intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status
+ * @ce - the context
+ *
+ * Releases the lock earlier acquired by intel_context_unlock_pinned().
+ */
+static inline void intel_context_unlock_pinned(struct intel_context *ce)
+	__releases(ce->pin_mutex)
+{
+	mutex_unlock(&ce->pin_mutex);
+}
 
 struct intel_context *
 __intel_context_insert(struct i915_gem_context *ctx,
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 05496ea7a123..d9db3fea151c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -141,6 +141,18 @@ static void lut_close(struct i915_gem_context *ctx)
 	rcu_read_unlock();
 }
 
+static struct intel_context *
+lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance)
+{
+	struct intel_engine_cs *engine;
+
+	engine = intel_engine_lookup_user(ctx->i915, class, instance);
+	if (!engine)
+		return ERR_PTR(-EINVAL);
+
+	return intel_context_instance(ctx, engine);
+}
+
 static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
 {
 	unsigned int max;
@@ -1132,19 +1144,17 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 }
 
 static int
-__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				    struct intel_engine_cs *engine,
-				    struct intel_sseu sseu)
+__intel_context_reconfigure_sseu(struct intel_context *ce,
+				 struct intel_sseu sseu)
 {
-	struct intel_context *ce;
-	int ret = 0;
+	int ret;
 
-	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
-	GEM_BUG_ON(engine->id != RCS0);
+	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
+	GEM_BUG_ON(ce->engine->id != RCS0);
 
-	ce = intel_context_pin_lock(ctx, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
+	ret = intel_context_lock_pinned(ce);
+	if (ret)
+		return ret;
 
 	/* Nothing to do if unmodified. */
 	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
@@ -1155,24 +1165,23 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
 		ce->sseu = sseu;
 
 unlock:
-	intel_context_pin_unlock(ce);
+	intel_context_unlock_pinned(ce);
 	return ret;
 }
 
 static int
-i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				  struct intel_engine_cs *engine,
-				  struct intel_sseu sseu)
+intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
 {
+	struct drm_i915_private *i915 = ce->gem_context->i915;
 	int ret;
 
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
 
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = __intel_context_reconfigure_sseu(ce, sseu);
 
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
+	mutex_unlock(&i915->drm.struct_mutex);
 
 	return ret;
 }
@@ -1280,7 +1289,7 @@ static int set_sseu(struct i915_gem_context *ctx,
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct drm_i915_gem_context_param_sseu user_sseu;
-	struct intel_engine_cs *engine;
+	struct intel_context *ce;
 	struct intel_sseu sseu;
 	int ret;
 
@@ -1297,27 +1306,31 @@ static int set_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(i915,
-					  user_sseu.engine.engine_class,
-					  user_sseu.engine.engine_instance);
-	if (!engine)
-		return -EINVAL;
+	ce = lookup_user_engine(ctx,
+				user_sseu.engine.engine_class,
+				user_sseu.engine.engine_instance);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
 
 	/* Only render engine supports RPCS configuration. */
-	if (engine->class != RENDER_CLASS)
-		return -ENODEV;
+	if (ce->engine->class != RENDER_CLASS) {
+		ret = -ENODEV;
+		goto out_ce;
+	}
 
 	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
 	if (ret)
-		return ret;
+		goto out_ce;
 
-	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
-		return ret;
+		goto out_ce;
 
 	args->size = sizeof(user_sseu);
 
-	return 0;
+out_ce:
+	intel_context_put(ce);
+	return ret;
 }
 
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
@@ -1522,8 +1535,8 @@ static int get_sseu(struct i915_gem_context *ctx,
 		    struct drm_i915_gem_context_param *args)
 {
 	struct drm_i915_gem_context_param_sseu user_sseu;
-	struct intel_engine_cs *engine;
 	struct intel_context *ce;
+	int err;
 
 	if (args->size == 0)
 		goto out;
@@ -1537,22 +1550,25 @@ static int get_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(ctx->i915,
-					  user_sseu.engine.engine_class,
-					  user_sseu.engine.engine_instance);
-	if (!engine)
-		return -EINVAL;
-
-	ce = intel_context_pin_lock(ctx, engine); /* serialises with set_sseu */
+	ce = lookup_user_engine(ctx,
+				user_sseu.engine.engine_class,
+				user_sseu.engine.engine_instance);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
+	err = intel_context_lock_pinned(ce); /* serialises with set_sseu */
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
 	user_sseu.slice_mask = ce->sseu.slice_mask;
 	user_sseu.subslice_mask = ce->sseu.subslice_mask;
 	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
 	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
 
-	intel_context_pin_unlock(ce);
+	intel_context_unlock_pinned(ce);
+	intel_context_put(ce);
 
 	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
 			 sizeof(user_sseu)))
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 8e2a94333559..214d1fd2f4dc 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1017,7 +1017,7 @@ __sseu_test(struct drm_i915_private *i915,
 	if (ret)
 		return ret;
 
-	ret = __i915_gem_context_reconfigure_sseu(ce->gem_context, ce->engine, sseu);
+	ret = __intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
 		goto out_spin;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 07/45] drm/i915: Split engine setup/init into two phases
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (4 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 06/45] drm/i915: Pass intel_context to intel_context_pin_lock() Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 08/45] drm/i915: Switch back to an array of logical per-engine HW contexts Chris Wilson
                   ` (45 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we require the engine vfuncs setup prior to
initialising the pinned kernel contexts, so split the vfunc setup from
the engine initialisation and call it earlier.

v2: s/setup_xcs/setup_common/ for intel_ring_submission_setup()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |   8 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  99 ++++----
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  74 ++----
 drivers/gpu/drm/i915/gt/intel_lrc.h           |   5 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 232 +++++++++---------
 drivers/gpu/drm/i915/gt/intel_workarounds.c   |   3 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |  48 ++--
 drivers/gpu/drm/i915/gt/mock_engine.h         |   2 +
 drivers/gpu/drm/i915/i915_gem.c               |   6 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  12 +-
 10 files changed, 245 insertions(+), 244 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index a228dc1774d8..3e53f53bc52b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -362,14 +362,12 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
 	return (head - tail - CACHELINE_BYTES) & (size - 1);
 }
 
-int intel_engine_setup_common(struct intel_engine_cs *engine);
+int intel_engines_setup(struct drm_i915_private *i915);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
-int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
+int intel_ring_submission_setup(struct intel_engine_cs *engine);
+int intel_ring_submission_init(struct intel_engine_cs *engine);
 
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5d26791f8d9e..23da9eebe247 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -50,35 +50,24 @@
 
 struct engine_class_info {
 	const char *name;
-	int (*init_legacy)(struct intel_engine_cs *engine);
-	int (*init_execlists)(struct intel_engine_cs *engine);
-
 	u8 uabi_class;
 };
 
 static const struct engine_class_info intel_engine_classes[] = {
 	[RENDER_CLASS] = {
 		.name = "rcs",
-		.init_execlists = logical_render_ring_init,
-		.init_legacy = intel_init_render_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_RENDER,
 	},
 	[COPY_ENGINE_CLASS] = {
 		.name = "bcs",
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_blt_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_COPY,
 	},
 	[VIDEO_DECODE_CLASS] = {
 		.name = "vcs",
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_bsd_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_VIDEO,
 	},
 	[VIDEO_ENHANCEMENT_CLASS] = {
 		.name = "vecs",
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_vebox_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE,
 	},
 };
@@ -400,48 +389,39 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 
 /**
  * intel_engines_init() - init the Engine Command Streamers
- * @dev_priv: i915 device private
+ * @i915: i915 device private
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init(struct drm_i915_private *dev_priv)
+int intel_engines_init(struct drm_i915_private *i915)
 {
+	int (*init)(struct intel_engine_cs *engine);
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id, err_id;
 	int err;
 
-	for_each_engine(engine, dev_priv, id) {
-		const struct engine_class_info *class_info =
-			&intel_engine_classes[engine->class];
-		int (*init)(struct intel_engine_cs *engine);
-
-		if (HAS_EXECLISTS(dev_priv))
-			init = class_info->init_execlists;
-		else
-			init = class_info->init_legacy;
+	if (HAS_EXECLISTS(i915))
+		init = intel_execlists_submission_init;
+	else
+		init = intel_ring_submission_init;
 
-		err = -EINVAL;
+	for_each_engine(engine, i915, id) {
 		err_id = id;
 
-		if (GEM_DEBUG_WARN_ON(!init))
-			goto cleanup;
-
 		err = init(engine);
 		if (err)
 			goto cleanup;
-
-		GEM_BUG_ON(!engine->submit_request);
 	}
 
 	return 0;
 
 cleanup:
-	for_each_engine(engine, dev_priv, id) {
+	for_each_engine(engine, i915, id) {
 		if (id >= err_id) {
 			kfree(engine);
-			dev_priv->engine[id] = NULL;
+			i915->engine[id] = NULL;
 		} else {
-			dev_priv->gt.cleanup_engine(engine);
+			i915->gt.cleanup_engine(engine);
 		}
 	}
 	return err;
@@ -559,16 +539,7 @@ static int init_status_page(struct intel_engine_cs *engine)
 	return ret;
 }
 
-/**
- * intel_engines_setup_common - setup engine state not requiring hw access
- * @engine: Engine to setup.
- *
- * Initializes @engine@ structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-int intel_engine_setup_common(struct intel_engine_cs *engine)
+static int intel_engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
@@ -602,6 +573,52 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
 	return err;
 }
 
+/**
+ * intel_engines_setup- setup engine state not requiring hw access
+ * @i915: Device to setup.
+ *
+ * Initializes engine structure members shared between legacy and execlists
+ * submission modes which do not require hardware access.
+ *
+ * Typically done early in the submission mode specific engine setup stage.
+ */
+int intel_engines_setup(struct drm_i915_private *i915)
+{
+	int (*setup)(struct intel_engine_cs *engine);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
+	if (HAS_EXECLISTS(i915))
+		setup = intel_execlists_submission_setup;
+	else
+		setup = intel_ring_submission_setup;
+
+	for_each_engine(engine, i915, id) {
+		err = intel_engine_setup_common(engine);
+		if (err)
+			goto cleanup;
+
+		err = setup(engine);
+		if (err)
+			goto cleanup;
+
+		GEM_BUG_ON(!engine->cops);
+	}
+
+	return 0;
+
+cleanup:
+	for_each_engine(engine, i915, id) {
+		if (engine->cops)
+			i915->gt.cleanup_engine(engine);
+		else
+			kfree(engine);
+		i915->engine[id] = NULL;
+	}
+	return err;
+}
+
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 {
 	static const struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d17c08e26935..01f58a152a9e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1697,8 +1697,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	unsigned int i;
 	int ret;
 
-	if (GEM_DEBUG_WARN_ON(engine->id != RCS0))
-		return -EINVAL;
+	if (engine->class != RENDER_CLASS)
+		return 0;
 
 	switch (INTEL_GEN(engine->i915)) {
 	case 11:
@@ -2444,15 +2444,8 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
 
-static int
-logical_ring_setup(struct intel_engine_cs *engine)
+int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 {
-	int err;
-
-	err = intel_engine_setup_common(engine);
-	if (err)
-		return err;
-
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
 
@@ -2462,10 +2455,16 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
 
+	if (engine->class == RENDER_CLASS) {
+		engine->init_context = gen8_init_rcs_context;
+		engine->emit_flush = gen8_emit_flush_render;
+		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+	}
+
 	return 0;
 }
 
-static int logical_ring_init(struct intel_engine_cs *engine)
+int intel_execlists_submission_init(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -2477,6 +2476,15 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 		return ret;
 
 	intel_engine_init_workarounds(engine);
+	intel_engine_init_whitelist(engine);
+
+	if (intel_init_workaround_bb(engine))
+		/*
+		 * We continue even if we fail to initialize WA batch
+		 * because we only expect rare glitches but nothing
+		 * critical to prevent us from using GPU
+		 */
+		DRM_ERROR("WA batch buffer initialization failed\n");
 
 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
 		execlists->submit_reg = i915->uncore.regs +
@@ -2509,50 +2517,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	return 0;
 }
 
-int logical_render_ring_init(struct intel_engine_cs *engine)
-{
-	int ret;
-
-	ret = logical_ring_setup(engine);
-	if (ret)
-		return ret;
-
-	/* Override some for render ring. */
-	engine->init_context = gen8_init_rcs_context;
-	engine->emit_flush = gen8_emit_flush_render;
-	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
-
-	ret = logical_ring_init(engine);
-	if (ret)
-		return ret;
-
-	ret = intel_init_workaround_bb(engine);
-	if (ret) {
-		/*
-		 * We continue even if we fail to initialize WA batch
-		 * because we only expect rare glitches but nothing
-		 * critical to prevent us from using GPU
-		 */
-		DRM_ERROR("WA batch buffer initialization failed: %d\n",
-			  ret);
-	}
-
-	intel_engine_init_whitelist(engine);
-
-	return 0;
-}
-
-int logical_xcs_ring_init(struct intel_engine_cs *engine)
-{
-	int err;
-
-	err = logical_ring_setup(engine);
-	if (err)
-		return err;
-
-	return logical_ring_init(engine);
-}
-
 static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 {
 	u32 indirect_ctx_offset;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 1a33ec74af8c..a0dc907a7249 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -66,8 +66,9 @@ enum {
 
 /* Logical Rings */
 void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
-int logical_render_ring_init(struct intel_engine_cs *engine);
-int logical_xcs_ring_init(struct intel_engine_cs *engine);
+
+int intel_execlists_submission_setup(struct intel_engine_cs *engine);
+int intel_execlists_submission_init(struct intel_engine_cs *engine);
 
 /* Logical Ring Contexts */
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index b791da2711e0..5bcd3034a101 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1520,54 +1520,6 @@ static const struct intel_context_ops ring_context_ops = {
 	.destroy = ring_context_destroy,
 };
 
-static int intel_init_ring_buffer(struct intel_engine_cs *engine)
-{
-	struct i915_timeline *timeline;
-	struct intel_ring *ring;
-	int err;
-
-	err = intel_engine_setup_common(engine);
-	if (err)
-		return err;
-
-	timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
-	if (IS_ERR(timeline)) {
-		err = PTR_ERR(timeline);
-		goto err;
-	}
-	GEM_BUG_ON(timeline->has_initial_breadcrumb);
-
-	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
-	i915_timeline_put(timeline);
-	if (IS_ERR(ring)) {
-		err = PTR_ERR(ring);
-		goto err;
-	}
-
-	err = intel_ring_pin(ring);
-	if (err)
-		goto err_ring;
-
-	GEM_BUG_ON(engine->buffer);
-	engine->buffer = ring;
-
-	err = intel_engine_init_common(engine);
-	if (err)
-		goto err_unpin;
-
-	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
-
-	return 0;
-
-err_unpin:
-	intel_ring_unpin(ring);
-err_ring:
-	intel_ring_put(ring);
-err:
-	intel_engine_cleanup_common(engine);
-	return err;
-}
-
 void intel_engine_cleanup(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -2163,24 +2115,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
 	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
 }
 
-static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
-				struct intel_engine_cs *engine)
-{
-	if (INTEL_GEN(dev_priv) >= 6) {
-		engine->irq_enable = gen6_irq_enable;
-		engine->irq_disable = gen6_irq_disable;
-	} else if (INTEL_GEN(dev_priv) >= 5) {
-		engine->irq_enable = gen5_irq_enable;
-		engine->irq_disable = gen5_irq_disable;
-	} else if (INTEL_GEN(dev_priv) >= 3) {
-		engine->irq_enable = i9xx_irq_enable;
-		engine->irq_disable = i9xx_irq_disable;
-	} else {
-		engine->irq_enable = i8xx_irq_enable;
-		engine->irq_disable = i8xx_irq_disable;
-	}
-}
-
 static void i9xx_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = i9xx_submit_request;
@@ -2196,13 +2130,33 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = gen6_bsd_submit_request;
 }
 
-static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
-				      struct intel_engine_cs *engine)
+static void setup_irq(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	if (INTEL_GEN(i915) >= 6) {
+		engine->irq_enable = gen6_irq_enable;
+		engine->irq_disable = gen6_irq_disable;
+	} else if (INTEL_GEN(i915) >= 5) {
+		engine->irq_enable = gen5_irq_enable;
+		engine->irq_disable = gen5_irq_disable;
+	} else if (INTEL_GEN(i915) >= 3) {
+		engine->irq_enable = i9xx_irq_enable;
+		engine->irq_disable = i9xx_irq_disable;
+	} else {
+		engine->irq_enable = i8xx_irq_enable;
+		engine->irq_disable = i8xx_irq_disable;
+	}
+}
+
+static void setup_common(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+
 	/* gen8+ are only supported with execlists */
-	GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
+	GEM_BUG_ON(INTEL_GEN(i915) >= 8);
 
-	intel_ring_init_irq(dev_priv, engine);
+	setup_irq(engine);
 
 	engine->resume = xcs_resume;
 	engine->reset.prepare = reset_prepare;
@@ -2218,117 +2172,96 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	 * engine->emit_init_breadcrumb().
 	 */
 	engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
-	if (IS_GEN(dev_priv, 5))
+	if (IS_GEN(i915, 5))
 		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
-	if (INTEL_GEN(dev_priv) >= 6)
+	if (INTEL_GEN(i915) >= 6)
 		engine->emit_bb_start = gen6_emit_bb_start;
-	else if (INTEL_GEN(dev_priv) >= 4)
+	else if (INTEL_GEN(i915) >= 4)
 		engine->emit_bb_start = i965_emit_bb_start;
-	else if (IS_I830(dev_priv) || IS_I845G(dev_priv))
+	else if (IS_I830(i915) || IS_I845G(i915))
 		engine->emit_bb_start = i830_emit_bb_start;
 	else
 		engine->emit_bb_start = i915_emit_bb_start;
 }
 
-int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
+static void setup_rcs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	intel_ring_default_vfuncs(dev_priv, engine);
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (HAS_L3_DPF(dev_priv))
+	if (HAS_L3_DPF(i915))
 		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 
-	if (INTEL_GEN(dev_priv) >= 7) {
+	if (INTEL_GEN(i915) >= 7) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
 		engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
-	} else if (IS_GEN(dev_priv, 6)) {
+	} else if (IS_GEN(i915, 6)) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen6_render_ring_flush;
 		engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
-	} else if (IS_GEN(dev_priv, 5)) {
+	} else if (IS_GEN(i915, 5)) {
 		engine->emit_flush = gen4_render_ring_flush;
 	} else {
-		if (INTEL_GEN(dev_priv) < 4)
+		if (INTEL_GEN(i915) < 4)
 			engine->emit_flush = gen2_render_ring_flush;
 		else
 			engine->emit_flush = gen4_render_ring_flush;
 		engine->irq_enable_mask = I915_USER_INTERRUPT;
 	}
 
-	if (IS_HASWELL(dev_priv))
+	if (IS_HASWELL(i915))
 		engine->emit_bb_start = hsw_emit_bb_start;
 
 	engine->resume = rcs_resume;
-
-	ret = intel_init_ring_buffer(engine);
-	if (ret)
-		return ret;
-
-	return 0;
 }
 
-int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
+static void setup_vcs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	intel_ring_default_vfuncs(dev_priv, engine);
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(i915) >= 6) {
 		/* gen6 bsd needs a special wa for tail updates */
-		if (IS_GEN(dev_priv, 6))
+		if (IS_GEN(i915, 6))
 			engine->set_default_submission = gen6_bsd_set_default_submission;
 		engine->emit_flush = gen6_bsd_ring_flush;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
-		if (IS_GEN(dev_priv, 6))
+		if (IS_GEN(i915, 6))
 			engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
 		else
 			engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 	} else {
 		engine->emit_flush = bsd_ring_flush;
-		if (IS_GEN(dev_priv, 5))
+		if (IS_GEN(i915, 5))
 			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
 		else
 			engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
 	}
-
-	return intel_init_ring_buffer(engine);
 }
 
-int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
+static void setup_bcs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
-
-	intel_ring_default_vfuncs(dev_priv, engine);
+	struct drm_i915_private *i915 = engine->i915;
 
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
-	if (IS_GEN(dev_priv, 6))
+	if (IS_GEN(i915, 6))
 		engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
 	else
 		engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
-
-	return intel_init_ring_buffer(engine);
 }
 
-int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
+static void setup_vecs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
+	struct drm_i915_private *i915 = engine->i915;
 
-	intel_ring_default_vfuncs(dev_priv, engine);
+	GEM_BUG_ON(INTEL_GEN(i915) < 7);
 
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
@@ -2336,6 +2269,73 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_disable = hsw_vebox_irq_disable;
 
 	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+}
+
+int intel_ring_submission_setup(struct intel_engine_cs *engine)
+{
+	setup_common(engine);
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		setup_rcs(engine);
+		break;
+	case VIDEO_DECODE_CLASS:
+		setup_vcs(engine);
+		break;
+	case COPY_ENGINE_CLASS:
+		setup_bcs(engine);
+		break;
+	case VIDEO_ENHANCEMENT_CLASS:
+		setup_vecs(engine);
+		break;
+	default:
+		MISSING_CASE(engine->class);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+int intel_ring_submission_init(struct intel_engine_cs *engine)
+{
+	struct i915_timeline *timeline;
+	struct intel_ring *ring;
+	int err;
 
-	return intel_init_ring_buffer(engine);
+	timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
+	if (IS_ERR(timeline)) {
+		err = PTR_ERR(timeline);
+		goto err;
+	}
+	GEM_BUG_ON(timeline->has_initial_breadcrumb);
+
+	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
+	i915_timeline_put(timeline);
+	if (IS_ERR(ring)) {
+		err = PTR_ERR(ring);
+		goto err;
+	}
+
+	err = intel_ring_pin(ring);
+	if (err)
+		goto err_ring;
+
+	GEM_BUG_ON(engine->buffer);
+	engine->buffer = ring;
+
+	err = intel_engine_init_common(engine);
+	if (err)
+		goto err_unpin;
+
+	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
+
+	return 0;
+
+err_unpin:
+	intel_ring_unpin(ring);
+err_ring:
+	intel_ring_put(ring);
+err:
+	intel_engine_cleanup_common(engine);
+	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 364696221fd7..5751446a4b0b 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1077,7 +1077,8 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
 	struct drm_i915_private *i915 = engine->i915;
 	struct i915_wa_list *w = &engine->whitelist;
 
-	GEM_BUG_ON(engine->id != RCS0);
+	if (engine->class != RENDER_CLASS)
+		return;
 
 	wa_init_start(w, "whitelist");
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 21f413ff5b8e..85cdbfe1d989 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -239,7 +239,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    int id)
 {
 	struct mock_engine *engine;
-	int err;
 
 	GEM_BUG_ON(id >= I915_NUM_ENGINES);
 
@@ -265,37 +264,44 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.reset.finish = mock_reset_finish;
 	engine->base.cancel_requests = mock_cancel_requests;
 
-	if (i915_timeline_init(i915, &engine->base.timeline, NULL))
-		goto err_free;
-	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
-
-	intel_engine_init_breadcrumbs(&engine->base);
-	intel_engine_init_execlists(&engine->base);
-	intel_engine_init__pm(&engine->base);
-
 	/* fake hw queue */
 	spin_lock_init(&engine->hw_lock);
 	timer_setup(&engine->hw_delay, hw_delay_complete, 0);
 	INIT_LIST_HEAD(&engine->hw_queue);
 
-	engine->base.kernel_context =
-		intel_context_instance(i915->kernel_context, &engine->base);
-	if (IS_ERR(engine->base.kernel_context))
+	return &engine->base;
+}
+
+int mock_engine_init(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	int err;
+
+	intel_engine_init_breadcrumbs(engine);
+	intel_engine_init_execlists(engine);
+	intel_engine_init__pm(engine);
+
+	if (i915_timeline_init(i915, &engine->timeline, NULL))
 		goto err_breadcrumbs;
+	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
+
+	engine->kernel_context =
+		intel_context_instance(i915->kernel_context, engine);
+	if (IS_ERR(engine->kernel_context))
+		goto err_timeline;
 
-	err = intel_context_pin(engine->base.kernel_context);
-	intel_context_put(engine->base.kernel_context);
+	err = intel_context_pin(engine->kernel_context);
+	intel_context_put(engine->kernel_context);
 	if (err)
-		goto err_breadcrumbs;
+		goto err_timeline;
 
-	return &engine->base;
+	return 0;
 
+err_timeline:
+	i915_timeline_fini(&engine->timeline);
 err_breadcrumbs:
-	intel_engine_fini_breadcrumbs(&engine->base);
-	i915_timeline_fini(&engine->base.timeline);
-err_free:
-	kfree(engine);
-	return NULL;
+	intel_engine_fini_breadcrumbs(engine);
+	return -ENOMEM;
 }
 
 void mock_engine_flush(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h
index 44b35a85e9d1..3f9b698c49d2 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.h
+++ b/drivers/gpu/drm/i915/gt/mock_engine.h
@@ -42,6 +42,8 @@ struct mock_engine {
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    const char *name,
 				    int id);
+int mock_engine_init(struct intel_engine_cs *engine);
+
 void mock_engine_flush(struct intel_engine_cs *engine);
 void mock_engine_reset(struct intel_engine_cs *engine);
 void mock_engine_free(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 54f27cabae2a..08c66e76d712 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4517,6 +4517,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_ggtt;
 	}
 
+	ret = intel_engines_setup(dev_priv);
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
+		goto err_unlock;
+	}
+
 	ret = i915_gem_contexts_init(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index c072424c6b7c..e4033d0576c4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -209,12 +209,16 @@ struct drm_i915_private *mock_gem_device(void)
 	mock_init_ggtt(i915, &i915->ggtt);
 
 	mkwrite_device_info(i915)->engine_mask = BIT(0);
-	i915->kernel_context = mock_context(i915, NULL);
-	if (!i915->kernel_context)
-		goto err_unlock;
 
 	i915->engine[RCS0] = mock_engine(i915, "mock", RCS0);
 	if (!i915->engine[RCS0])
+		goto err_unlock;
+
+	i915->kernel_context = mock_context(i915, NULL);
+	if (!i915->kernel_context)
+		goto err_engine;
+
+	if (mock_engine_init(i915->engine[RCS0]))
 		goto err_context;
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -225,6 +229,8 @@ struct drm_i915_private *mock_gem_device(void)
 
 err_context:
 	i915_gem_contexts_fini(i915);
+err_engine:
+	mock_engine_free(i915->engine[RCS0]);
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_timelines_fini(i915);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 08/45] drm/i915: Switch back to an array of logical per-engine HW contexts
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (5 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 07/45] drm/i915: Split engine setup/init into two phases Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 09/45] drm/i915: Remove intel_context.active_link Chris Wilson
                   ` (44 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

We switched to a tree of per-engine HW context to accommodate the
introduction of virtual engines. However, we plan to also support
multiple instances of the same engine within the GEM context, defeating
our use of the engine as a key to looking up the HW context. Just
allocate a logical per-engine instance and always use an index into the
ctx->engines[]. Later on, this ctx->engines[] may be replaced by a user
specified map.

v2: Add for_each_gem_engine() helper to iterator within the engines lock
v3: intel_context_create_request() helper
v4: s/unsigned long/unsigned int/ 4 billion engines is quite enough.
v5: Push iterator locking to caller

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 112 ++++--------------
 drivers/gpu/drm/i915/gt/intel_context.h       |  27 +----
 drivers/gpu/drm/i915/gt/intel_context_types.h |   2 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   3 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |   2 +-
 drivers/gpu/drm/i915/i915_gem.c               |  24 ++--
 drivers/gpu/drm/i915/i915_gem_context.c       |  99 ++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_context.h       |  58 +++++++++
 drivers/gpu/drm/i915/i915_gem_context_types.h |  40 ++++++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  70 +++++------
 drivers/gpu/drm/i915/i915_perf.c              |  80 +++++++------
 drivers/gpu/drm/i915/i915_request.c           |  15 +--
 drivers/gpu/drm/i915/intel_guc_submission.c   |  22 ++--
 .../gpu/drm/i915/selftests/i915_gem_context.c |   2 +-
 drivers/gpu/drm/i915/selftests/mock_context.c |  14 ++-
 16 files changed, 331 insertions(+), 241 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 15ac99c5dd4a..5e506e648454 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -17,7 +17,7 @@ static struct i915_global_context {
 	struct kmem_cache *slab_ce;
 } global;
 
-struct intel_context *intel_context_alloc(void)
+static struct intel_context *intel_context_alloc(void)
 {
 	return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
 }
@@ -28,104 +28,17 @@ void intel_context_free(struct intel_context *ce)
 }
 
 struct intel_context *
-intel_context_lookup(struct i915_gem_context *ctx,
+intel_context_create(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine)
 {
-	struct intel_context *ce = NULL;
-	struct rb_node *p;
-
-	spin_lock(&ctx->hw_contexts_lock);
-	p = ctx->hw_contexts.rb_node;
-	while (p) {
-		struct intel_context *this =
-			rb_entry(p, struct intel_context, node);
-
-		if (this->engine == engine) {
-			GEM_BUG_ON(this->gem_context != ctx);
-			ce = this;
-			break;
-		}
-
-		if (this->engine < engine)
-			p = p->rb_right;
-		else
-			p = p->rb_left;
-	}
-	spin_unlock(&ctx->hw_contexts_lock);
-
-	return ce;
-}
-
-struct intel_context *
-__intel_context_insert(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine,
-		       struct intel_context *ce)
-{
-	struct rb_node **p, *parent;
-	int err = 0;
-
-	spin_lock(&ctx->hw_contexts_lock);
-
-	parent = NULL;
-	p = &ctx->hw_contexts.rb_node;
-	while (*p) {
-		struct intel_context *this;
-
-		parent = *p;
-		this = rb_entry(parent, struct intel_context, node);
-
-		if (this->engine == engine) {
-			err = -EEXIST;
-			ce = this;
-			break;
-		}
-
-		if (this->engine < engine)
-			p = &parent->rb_right;
-		else
-			p = &parent->rb_left;
-	}
-	if (!err) {
-		rb_link_node(&ce->node, parent, p);
-		rb_insert_color(&ce->node, &ctx->hw_contexts);
-	}
-
-	spin_unlock(&ctx->hw_contexts_lock);
-
-	return ce;
-}
-
-void __intel_context_remove(struct intel_context *ce)
-{
-	struct i915_gem_context *ctx = ce->gem_context;
-
-	spin_lock(&ctx->hw_contexts_lock);
-	rb_erase(&ce->node, &ctx->hw_contexts);
-	spin_unlock(&ctx->hw_contexts_lock);
-}
-
-struct intel_context *
-intel_context_instance(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine)
-{
-	struct intel_context *ce, *pos;
-
-	ce = intel_context_lookup(ctx, engine);
-	if (likely(ce))
-		return intel_context_get(ce);
+	struct intel_context *ce;
 
 	ce = intel_context_alloc();
 	if (!ce)
 		return ERR_PTR(-ENOMEM);
 
 	intel_context_init(ce, ctx, engine);
-
-	pos = __intel_context_insert(ctx, engine, ce);
-	if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */
-		intel_context_free(ce);
-
-	GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
-	return intel_context_get(pos);
+	return ce;
 }
 
 int __intel_context_do_pin(struct intel_context *ce)
@@ -204,6 +117,8 @@ intel_context_init(struct intel_context *ce,
 		   struct i915_gem_context *ctx,
 		   struct intel_engine_cs *engine)
 {
+	GEM_BUG_ON(!engine->cops);
+
 	kref_init(&ce->ref);
 
 	ce->gem_context = ctx;
@@ -254,3 +169,18 @@ void intel_context_exit_engine(struct intel_context *ce)
 {
 	intel_engine_pm_put(ce->engine);
 }
+
+struct i915_request *intel_context_create_request(struct intel_context *ce)
+{
+	struct i915_request *rq;
+	int err;
+
+	err = intel_context_pin(ce);
+	if (unlikely(err))
+		return ERR_PTR(err);
+
+	rq = i915_request_create(ce);
+	intel_context_unpin(ce);
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index b746add6b71d..63392c88cd98 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,24 +12,16 @@
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
 
-struct intel_context *intel_context_alloc(void);
-void intel_context_free(struct intel_context *ce);
-
 void intel_context_init(struct intel_context *ce,
 			struct i915_gem_context *ctx,
 			struct intel_engine_cs *engine);
 
-/**
- * intel_context_lookup - Find the matching HW context for this (ctx, engine)
- * @ctx - the parent GEM context
- * @engine - the target HW engine
- *
- * May return NULL if the HW context hasn't been instantiated (i.e. unused).
- */
 struct intel_context *
-intel_context_lookup(struct i915_gem_context *ctx,
+intel_context_create(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine);
 
+void intel_context_free(struct intel_context *ce);
+
 /**
  * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
  * @ce - the context
@@ -71,17 +63,6 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
 	mutex_unlock(&ce->pin_mutex);
 }
 
-struct intel_context *
-__intel_context_insert(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine,
-		       struct intel_context *ce);
-void
-__intel_context_remove(struct intel_context *ce);
-
-struct intel_context *
-intel_context_instance(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine);
-
 int __intel_context_do_pin(struct intel_context *ce);
 
 static inline int intel_context_pin(struct intel_context *ce)
@@ -144,4 +125,6 @@ static inline void intel_context_timeline_unlock(struct intel_context *ce)
 	mutex_unlock(&ce->ring->timeline->mutex);
 }
 
+struct i915_request *intel_context_create_request(struct intel_context *ce);
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index f02d27734e3b..3579c2708321 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -10,7 +10,6 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
-#include <linux/rbtree.h>
 #include <linux/types.h>
 
 #include "i915_active_types.h"
@@ -61,7 +60,6 @@ struct intel_context {
 	struct i915_active_request active_tracker;
 
 	const struct intel_context_ops *ops;
-	struct rb_node node;
 
 	/** sseu: Control eu/slice partitioning */
 	struct intel_sseu sseu;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 23da9eebe247..4e30f3720eb7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -716,7 +716,7 @@ static int pin_context(struct i915_gem_context *ctx,
 	struct intel_context *ce;
 	int err;
 
-	ce = intel_context_instance(ctx, engine);
+	ce = i915_gem_context_get_engine(ctx, engine->id);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 85cdbfe1d989..2941916b37bf 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -23,6 +23,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_gem_context.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
 
@@ -286,7 +287,7 @@ int mock_engine_init(struct intel_engine_cs *engine)
 	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
 
 	engine->kernel_context =
-		intel_context_instance(i915->kernel_context, engine);
+		i915_gem_context_get_engine(i915->kernel_context, engine->id);
 	if (IS_ERR(engine->kernel_context))
 		goto err_timeline;
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 606fc2713240..8b6574e1b495 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1188,7 +1188,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		INIT_LIST_HEAD(&s->workload_q_head[i]);
 		s->shadow[i] = ERR_PTR(-EINVAL);
 
-		ce = intel_context_instance(ctx, engine);
+		ce = i915_gem_context_get_engine(ctx, i);
 		if (IS_ERR(ce)) {
 			ret = PTR_ERR(ce);
 			goto out_shadow_ctx;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 08c66e76d712..4c1793b1012e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4289,8 +4289,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
 
 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 {
-	struct i915_gem_context *ctx;
 	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
 	enum intel_engine_id id;
 	int err = 0;
 
@@ -4307,18 +4308,21 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	e = i915_gem_context_lock_engines(ctx);
+
 	for_each_engine(engine, i915, id) {
+		struct intel_context *ce = e->engines[id];
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = intel_context_create_request(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
-			goto out_ctx;
+			goto err_active;
 		}
 
 		err = 0;
-		if (engine->init_context)
-			err = engine->init_context(rq);
+		if (rq->engine->init_context)
+			err = rq->engine->init_context(rq);
 
 		i915_request_add(rq);
 		if (err)
@@ -4332,15 +4336,10 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	}
 
 	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-		struct i915_vma *state;
+		struct intel_context *ce = e->engines[id];
+		struct i915_vma *state = ce->state;
 		void *vaddr;
 
-		ce = intel_context_lookup(ctx, engine);
-		if (!ce)
-			continue;
-
-		state = ce->state;
 		if (!state)
 			continue;
 
@@ -4396,6 +4395,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	}
 
 out_ctx:
+	i915_gem_context_unlock_engines(ctx);
 	i915_gem_context_set_closed(ctx);
 	i915_gem_context_put(ctx);
 	return err;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d9db3fea151c..e83b790de6d2 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -150,7 +150,7 @@ lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance)
 	if (!engine)
 		return ERR_PTR(-EINVAL);
 
-	return intel_context_instance(ctx, engine);
+	return i915_gem_context_get_engine(ctx, engine->id);
 }
 
 static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
@@ -242,10 +242,51 @@ static void release_hw_id(struct i915_gem_context *ctx)
 	mutex_unlock(&i915->contexts.mutex);
 }
 
-static void i915_gem_context_free(struct i915_gem_context *ctx)
+static void __free_engines(struct i915_gem_engines *e, unsigned int count)
+{
+	while (count--) {
+		if (!e->engines[count])
+			continue;
+
+		intel_context_put(e->engines[count]);
+	}
+	kfree(e);
+}
+
+static void free_engines(struct i915_gem_engines *e)
+{
+	__free_engines(e, e->num_engines);
+}
+
+static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 {
-	struct intel_context *it, *n;
+	struct intel_engine_cs *engine;
+	struct i915_gem_engines *e;
+	enum intel_engine_id id;
+
+	e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+
+	e->i915 = ctx->i915;
+	for_each_engine(engine, ctx->i915, id) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(ctx, engine);
+		if (IS_ERR(ce)) {
+			__free_engines(e, id);
+			return ERR_CAST(ce);
+		}
+
+		e->engines[id] = ce;
+	}
+	e->num_engines = id;
+
+	return e;
+}
 
+static void i915_gem_context_free(struct i915_gem_context *ctx)
+{
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 	GEM_BUG_ON(!list_empty(&ctx->active_engines));
@@ -253,8 +294,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
-	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
-		intel_context_put(it);
+	free_engines(rcu_access_pointer(ctx->engines));
+	mutex_destroy(&ctx->engines_mutex);
 
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
@@ -363,6 +404,8 @@ static struct i915_gem_context *
 __create_context(struct drm_i915_private *dev_priv)
 {
 	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
+	int err;
 	int i;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -376,8 +419,13 @@ __create_context(struct drm_i915_private *dev_priv)
 	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
-	ctx->hw_contexts = RB_ROOT;
-	spin_lock_init(&ctx->hw_contexts_lock);
+	mutex_init(&ctx->engines_mutex);
+	e = default_engines(ctx);
+	if (IS_ERR(e)) {
+		err = PTR_ERR(e);
+		goto err_free;
+	}
+	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -399,6 +447,10 @@ __create_context(struct drm_i915_private *dev_priv)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
 	return ctx;
+
+err_free:
+	kfree(ctx);
+	return ERR_PTR(err);
 }
 
 static struct i915_hw_ppgtt *
@@ -857,7 +909,8 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
-	struct intel_context *ce, *next;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
 	int err = 0;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -870,20 +923,22 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	i915_active_init(i915, &cb->base, cb_retire);
 	i915_active_acquire(&cb->base);
 
-	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
-		struct intel_engine_cs *engine = ce->engine;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		struct i915_request *rq;
 
-		if (!(engine->mask & engines))
+		if (!(ce->engine->mask & engines))
+			continue;
+
+		if (!intel_context_is_pinned(ce))
 			continue;
 
 		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
-				       engine->mask)) {
+				       ce->engine->mask)) {
 			err = -ENXIO;
 			break;
 		}
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = intel_context_create_request(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			break;
@@ -899,6 +954,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (err)
 			break;
 	}
+	i915_gem_context_unlock_engines(ctx);
 
 	cb->task = err ? NULL : task; /* caller needs to unwind instead */
 	cb->data = data;
@@ -1729,6 +1785,23 @@ int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
 	return err;
 }
 
+/* GEM context-engines iterator: for_each_gem_engine() */
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
+{
+	const struct i915_gem_engines *e = it->engines;
+	struct intel_context *ctx;
+
+	do {
+		if (it->idx >= e->num_engines)
+			return NULL;
+
+		ctx = e->engines[it->idx++];
+	} while (!ctx);
+
+	return ctx;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_context.c"
 #include "selftests/i915_gem_context.c"
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 5a8e080499fb..272e183ebc0c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -176,6 +176,64 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
 	kref_put(&ctx->ref, i915_gem_context_release);
 }
 
+static inline struct i915_gem_engines *
+i915_gem_context_engines(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->engines,
+					 lockdep_is_held(&ctx->engines_mutex));
+}
+
+static inline struct i915_gem_engines *
+i915_gem_context_lock_engines(struct i915_gem_context *ctx)
+	__acquires(&ctx->engines_mutex)
+{
+	mutex_lock(&ctx->engines_mutex);
+	return i915_gem_context_engines(ctx);
+}
+
+static inline void
+i915_gem_context_unlock_engines(struct i915_gem_context *ctx)
+	__releases(&ctx->engines_mutex)
+{
+	mutex_unlock(&ctx->engines_mutex);
+}
+
+static inline struct intel_context *
+i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx)
+{
+	return i915_gem_context_engines(ctx)->engines[idx];
+}
+
+static inline struct intel_context *
+i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx)
+{
+	struct intel_context *ce = ERR_PTR(-EINVAL);
+
+	rcu_read_lock(); {
+		struct i915_gem_engines *e = rcu_dereference(ctx->engines);
+		if (likely(idx < e->num_engines && e->engines[idx]))
+			ce = intel_context_get(e->engines[idx]);
+	} rcu_read_unlock();
+
+	return ce;
+}
+
+static inline void
+i915_gem_engines_iter_init(struct i915_gem_engines_iter *it,
+			   struct i915_gem_engines *engines)
+{
+	GEM_BUG_ON(!engines);
+	it->engines = engines;
+	it->idx = 0;
+}
+
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it);
+
+#define for_each_gem_engine(ce, engines, it) \
+	for (i915_gem_engines_iter_init(&(it), (engines)); \
+	     ((ce) = i915_gem_engines_iter_next(&(it)));)
+
 struct i915_lut_handle *i915_lut_handle_alloc(void);
 void i915_lut_handle_free(struct i915_lut_handle *lut);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index d282a6ab3b9f..5f84618cf7db 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -29,6 +29,18 @@ struct i915_hw_ppgtt;
 struct i915_timeline;
 struct intel_ring;
 
+struct i915_gem_engines {
+	struct rcu_work rcu;
+	struct drm_i915_private *i915;
+	unsigned int num_engines;
+	struct intel_context *engines[];
+};
+
+struct i915_gem_engines_iter {
+	unsigned int idx;
+	const struct i915_gem_engines *engines;
+};
+
 /**
  * struct i915_gem_context - client state
  *
@@ -42,6 +54,30 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	/**
+	 * @engines: User defined engines for this context
+	 *
+	 * Various uAPI offer the ability to lookup up an
+	 * index from this array to select an engine operate on.
+	 *
+	 * Multiple logically distinct instances of the same engine
+	 * may be defined in the array, as well as composite virtual
+	 * engines.
+	 *
+	 * Execbuf uses the I915_EXEC_RING_MASK as an index into this
+	 * array to select which HW context + engine to execute on. For
+	 * the default array, the user_ring_map[] is used to translate
+	 * the legacy uABI onto the approprate index (e.g. both
+	 * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same
+	 * context, and I915_EXEC_BSD is weird). For a use defined
+	 * array, execbuf uses I915_EXEC_RING_MASK as a plain index.
+	 *
+	 * User defined by I915_CONTEXT_PARAM_ENGINE (when the
+	 * CONTEXT_USER_ENGINES flag is set).
+	 */
+	struct i915_gem_engines __rcu *engines;
+	struct mutex engines_mutex; /* guards writes to engines */
+
 	struct i915_timeline *timeline;
 
 	/**
@@ -134,10 +170,6 @@ struct i915_gem_context {
 
 	struct i915_sched_attr sched;
 
-	/** hw_contexts: per-engine logical HW state */
-	struct rb_root hw_contexts;
-	spinlock_t hw_contexts_lock;
-
 	/** ring_size: size for allocating the per-engine ring buffer */
 	u32 ring_size;
 	/** desc_template: invariant fields for the HW context descriptor */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 166a33c0d3ed..679f7c1561ba 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2076,9 +2076,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
 	return file_priv->bsd_engine;
 }
 
-#define I915_USER_RINGS (4)
-
-static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
+static const enum intel_engine_id user_ring_map[] = {
 	[I915_EXEC_DEFAULT]	= RCS0,
 	[I915_EXEC_RENDER]	= RCS0,
 	[I915_EXEC_BLT]		= BCS0,
@@ -2086,10 +2084,8 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };
 
-static int eb_pin_context(struct i915_execbuffer *eb,
-			  struct intel_engine_cs *engine)
+static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 {
-	struct intel_context *ce;
 	int err;
 
 	/*
@@ -2100,21 +2096,16 @@ static int eb_pin_context(struct i915_execbuffer *eb,
 	if (err)
 		return err;
 
-	ce = intel_context_instance(eb->gem_context, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
-
 	/*
 	 * Pinning the contexts may generate requests in order to acquire
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
 	err = intel_context_pin(ce);
-	intel_context_put(ce);
 	if (err)
 		return err;
 
-	eb->engine = engine;
+	eb->engine = ce->engine;
 	eb->context = ce;
 	return 0;
 }
@@ -2124,25 +2115,19 @@ static void eb_unpin_context(struct i915_execbuffer *eb)
 	intel_context_unpin(eb->context);
 }
 
-static int
-eb_select_engine(struct i915_execbuffer *eb,
-		 struct drm_file *file,
-		 struct drm_i915_gem_execbuffer2 *args)
+static unsigned int
+eb_select_legacy_ring(struct i915_execbuffer *eb,
+		      struct drm_file *file,
+		      struct drm_i915_gem_execbuffer2 *args)
 {
 	struct drm_i915_private *i915 = eb->i915;
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
-	struct intel_engine_cs *engine;
-
-	if (user_ring_id > I915_USER_RINGS) {
-		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
-		return -EINVAL;
-	}
 
-	if ((user_ring_id != I915_EXEC_BSD) &&
-	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
+	if (user_ring_id != I915_EXEC_BSD &&
+	    (args->flags & I915_EXEC_BSD_MASK)) {
 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
 			  "bsd dispatch flags: %d\n", (int)(args->flags));
-		return -EINVAL;
+		return -1;
 	}
 
 	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) {
@@ -2157,20 +2142,39 @@ eb_select_engine(struct i915_execbuffer *eb,
 		} else {
 			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
 				  bsd_idx);
-			return -EINVAL;
+			return -1;
 		}
 
-		engine = i915->engine[_VCS(bsd_idx)];
-	} else {
-		engine = i915->engine[user_ring_map[user_ring_id]];
+		return _VCS(bsd_idx);
 	}
 
-	if (!engine) {
-		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
-		return -EINVAL;
+	if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
+		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
+		return -1;
 	}
 
-	return eb_pin_context(eb, engine);
+	return user_ring_map[user_ring_id];
+}
+
+static int
+eb_select_engine(struct i915_execbuffer *eb,
+		 struct drm_file *file,
+		 struct drm_i915_gem_execbuffer2 *args)
+{
+	struct intel_context *ce;
+	unsigned int idx;
+	int err;
+
+	idx = eb_select_legacy_ring(eb, file, args);
+
+	ce = i915_gem_context_get_engine(eb->gem_context, idx);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = eb_pin_context(eb, ce);
+	intel_context_put(ce);
+
+	return err;
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index afaeabe5e531..c4995d5a16d2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1203,35 +1203,35 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
 					    struct i915_gem_context *ctx)
 {
-	struct intel_engine_cs *engine = i915->engine[RCS0];
+	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
 	int err;
 
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
-
 	err = i915_mutex_lock_interruptible(&i915->drm);
-	if (err) {
-		intel_context_put(ce);
+	if (err)
 		return ERR_PTR(err);
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		if (ce->engine->class != RENDER_CLASS)
+			continue;
+
+		/*
+		 * As the ID is the gtt offset of the context's vma we
+		 * pin the vma to ensure the ID remains fixed.
+		 */
+		err = intel_context_pin(ce);
+		if (err == 0) {
+			i915->perf.oa.pinned_ctx = ce;
+			break;
+		}
 	}
+	i915_gem_context_unlock_engines(ctx);
 
-	/*
-	 * As the ID is the gtt offset of the context's vma we
-	 * pin the vma to ensure the ID remains fixed.
-	 *
-	 * NB: implied RCS engine...
-	 */
-	err = intel_context_pin(ce);
 	mutex_unlock(&i915->drm.struct_mutex);
-	intel_context_put(ce);
 	if (err)
 		return ERR_PTR(err);
 
-	i915->perf.oa.pinned_ctx = ce;
-
-	return ce;
+	return i915->perf.oa.pinned_ctx;
 }
 
 /**
@@ -1717,7 +1717,6 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
 static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 				       const struct i915_oa_config *oa_config)
 {
-	struct intel_engine_cs *engine = dev_priv->engine[RCS0];
 	unsigned int map_type = i915_coherent_map_type(dev_priv);
 	struct i915_gem_context *ctx;
 	struct i915_request *rq;
@@ -1746,30 +1745,43 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 
 	/* Update all contexts now that we've stalled the submission. */
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
-		struct intel_context *ce = intel_context_lookup(ctx, engine);
-		u32 *regs;
-
-		/* OA settings will be set upon first use */
-		if (!ce || !ce->state)
-			continue;
-
-		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
-		if (IS_ERR(regs))
-			return PTR_ERR(regs);
+		struct i915_gem_engines_iter it;
+		struct intel_context *ce;
+
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx),
+				    it) {
+			u32 *regs;
+
+			if (ce->engine->class != RENDER_CLASS)
+				continue;
+
+			/* OA settings will be set upon first use */
+			if (!ce->state)
+				continue;
+
+			regs = i915_gem_object_pin_map(ce->state->obj,
+						       map_type);
+			if (IS_ERR(regs)) {
+				i915_gem_context_unlock_engines(ctx);
+				return PTR_ERR(regs);
+			}
 
-		ce->state->obj->mm.dirty = true;
-		regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
+			ce->state->obj->mm.dirty = true;
+			regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
 
-		gen8_update_reg_state_unlocked(ce, regs, oa_config);
+			gen8_update_reg_state_unlocked(ce, regs, oa_config);
 
-		i915_gem_object_unpin_map(ce->state->obj);
+			i915_gem_object_unpin_map(ce->state->obj);
+		}
+		i915_gem_context_unlock_engines(ctx);
 	}
 
 	/*
 	 * Apply the configuration by doing one context restore of the edited
 	 * context image.
 	 */
-	rq = i915_request_create(engine->kernel_context);
+	rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f99e8d493d9c..8b0d72a79861 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -786,7 +786,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_context *ce;
 	struct i915_request *rq;
-	int err;
 
 	/*
 	 * Preempt contexts are reserved for exclusive use to inject a
@@ -800,21 +799,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ce = intel_context_instance(ctx, engine);
+	ce = i915_gem_context_get_engine(ctx, engine->id);
 	if (IS_ERR(ce))
 		return ERR_CAST(ce);
 
-	err = intel_context_pin(ce);
-	if (err) {
-		rq = ERR_PTR(err);
-		goto err_put;
-	}
-
-	rq = i915_request_create(ce);
-	intel_context_unpin(ce);
-
-err_put:
+	rq = intel_context_create_request(ce);
 	intel_context_put(ce);
+
 	return rq;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 1b6d6403ee92..4c814344809c 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -364,11 +364,10 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
 static void guc_stage_desc_init(struct intel_guc_client *client)
 {
 	struct intel_guc *guc = client->guc;
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx = client->owner;
+	struct i915_gem_engines_iter it;
 	struct guc_stage_desc *desc;
-	unsigned int tmp;
+	struct intel_context *ce;
 	u32 gfx_addr;
 
 	desc = __get_stage_desc(client);
@@ -382,10 +381,11 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 	desc->priority = client->priority;
 	desc->db_id = client->doorbell_id;
 
-	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
-		struct intel_context *ce = intel_context_lookup(ctx, engine);
-		u32 guc_engine_id = engine->guc_id;
-		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		struct guc_execlist_context *lrc;
+
+		if (!(ce->engine->mask & client->engines))
+			continue;
 
 		/* TODO: We have a design issue to be solved here. Only when we
 		 * receive the first batch, we know which engine is used by the
@@ -394,7 +394,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * for now who owns a GuC client. But for future owner of GuC
 		 * client, need to make sure lrc is pinned prior to enter here.
 		 */
-		if (!ce || !ce->state)
+		if (!ce->state)
 			break;	/* XXX: continue? */
 
 		/*
@@ -404,6 +404,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * Instead, the GuC uses the LRCA of the user mode context (see
 		 * guc_add_request below).
 		 */
+		lrc = &desc->lrc[ce->engine->guc_id];
 		lrc->context_desc = lower_32_bits(ce->lrc_desc);
 
 		/* The state page is after PPHWSP */
@@ -414,15 +415,16 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * here. In proxy submission, it wants the stage id
 		 */
 		lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
-				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
+				(ce->engine->guc_id << GUC_ELC_ENGINE_OFFSET);
 
 		lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
 		lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
 		lrc->ring_next_free_location = lrc->ring_begin;
 		lrc->ring_current_tail_pointer_value = 0;
 
-		desc->engines_used |= (1 << guc_engine_id);
+		desc->engines_used |= BIT(ce->engine->guc_id);
 	}
+	i915_gem_context_unlock_engines(ctx);
 
 	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
 			 client->engines, desc->engines_used);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 214d1fd2f4dc..7fd224a4ca4c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1094,7 +1094,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 
 	wakeref = intel_runtime_pm_get(i915);
 
-	ce = intel_context_instance(ctx, i915->engine[RCS0]);
+	ce = i915_gem_context_get_engine(ctx, RCS0);
 	if (IS_ERR(ce)) {
 		ret = PTR_ERR(ce);
 		goto out_rpm;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 0426093bf1d9..71c750693585 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915,
 	     const char *name)
 {
 	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -40,8 +41,11 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
-	ctx->hw_contexts = RB_ROOT;
-	spin_lock_init(&ctx->hw_contexts_lock);
+	mutex_init(&ctx->engines_mutex);
+	e = default_engines(ctx);
+	if (IS_ERR(e))
+		goto err_free;
+	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -51,7 +55,7 @@ mock_context(struct drm_i915_private *i915,
 
 	ret = i915_gem_context_pin_hw_id(ctx);
 	if (ret < 0)
-		goto err_handles;
+		goto err_engines;
 
 	if (name) {
 		struct i915_hw_ppgtt *ppgtt;
@@ -69,7 +73,9 @@ mock_context(struct drm_i915_private *i915,
 
 	return ctx;
 
-err_handles:
+err_engines:
+	free_engines(rcu_access_pointer(ctx->engines));
+err_free:
 	kfree(ctx);
 	return NULL;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 09/45] drm/i915: Remove intel_context.active_link
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (6 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 08/45] drm/i915: Switch back to an array of logical per-engine HW contexts Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 10/45] drm/i915: Move i915_request_alloc into selftests/ Chris Wilson
                   ` (43 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

We no longer need to track the active intel_contexts within each engine,
allowing us to drop a tricky mutex_lock from inside unpin (which may
occur inside fs_reclaim).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c           | 11 +----------
 drivers/gpu/drm/i915/gt/intel_context_types.h     |  1 -
 drivers/gpu/drm/i915/i915_debugfs.c               | 11 +++++++++--
 drivers/gpu/drm/i915/i915_gem_context.c           |  2 --
 drivers/gpu/drm/i915/i915_gem_context_types.h     |  1 -
 drivers/gpu/drm/i915/selftests/i915_gem_context.c |  1 -
 drivers/gpu/drm/i915/selftests/mock_context.c     |  1 -
 7 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 5e506e648454..1f1761fc6597 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -49,7 +49,6 @@ int __intel_context_do_pin(struct intel_context *ce)
 		return -EINTR;
 
 	if (likely(!atomic_read(&ce->pin_count))) {
-		struct i915_gem_context *ctx = ce->gem_context;
 		intel_wakeref_t wakeref;
 
 		err = 0;
@@ -58,11 +57,7 @@ int __intel_context_do_pin(struct intel_context *ce)
 		if (err)
 			goto err;
 
-		i915_gem_context_get(ctx);
-
-		mutex_lock(&ctx->mutex);
-		list_add(&ce->active_link, &ctx->active_engines);
-		mutex_unlock(&ctx->mutex);
+		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
 
 		intel_context_get(ce);
 		smp_mb__before_atomic(); /* flush pin before it is visible */
@@ -91,10 +86,6 @@ void intel_context_unpin(struct intel_context *ce)
 	if (likely(atomic_dec_and_test(&ce->pin_count))) {
 		ce->ops->unpin(ce);
 
-		mutex_lock(&ce->gem_context->mutex);
-		list_del(&ce->active_link);
-		mutex_unlock(&ce->gem_context->mutex);
-
 		i915_gem_context_put(ce->gem_context);
 		intel_context_put(ce);
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 3579c2708321..d5a7dbd0daee 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -38,7 +38,6 @@ struct intel_context {
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *active;
 
-	struct list_head active_link;
 	struct list_head signal_link;
 	struct list_head signals;
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 00d3ff746eb1..466becbb99c6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -34,6 +34,7 @@
 
 #include "gt/intel_reset.h"
 
+#include "i915_gem_context.h"
 #include "intel_dp.h"
 #include "intel_drv.h"
 #include "intel_fbc.h"
@@ -396,14 +397,17 @@ static void print_context_stats(struct seq_file *m,
 	struct i915_gem_context *ctx;
 
 	list_for_each_entry(ctx, &i915->contexts.list, link) {
+		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
-		list_for_each_entry(ce, &ctx->active_engines, active_link) {
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx), it) {
 			if (ce->state)
 				per_file_stats(0, ce->state->obj, &kstats);
 			if (ce->ring)
 				per_file_stats(0, ce->ring->vma->obj, &kstats);
 		}
+		i915_gem_context_unlock_engines(ctx);
 
 		if (!IS_ERR_OR_NULL(ctx->file_priv)) {
 			struct file_stats stats = { .vm = &ctx->ppgtt->vm, };
@@ -1893,6 +1897,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		return ret;
 
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
 		seq_puts(m, "HW context ");
@@ -1917,7 +1922,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		seq_putc(m, ctx->remap_slice ? 'R' : 'r');
 		seq_putc(m, '\n');
 
-		list_for_each_entry(ce, &ctx->active_engines, active_link) {
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx), it) {
 			seq_printf(m, "%s: ", ce->engine->name);
 			if (ce->state)
 				describe_obj(m, ce->state->obj);
@@ -1925,6 +1931,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 				describe_ctx_ring(m, ce->ring);
 			seq_putc(m, '\n');
 		}
+		i915_gem_context_unlock_engines(ctx);
 
 		seq_putc(m, '\n');
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index e83b790de6d2..0d9d435ae398 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -289,7 +289,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
-	GEM_BUG_ON(!list_empty(&ctx->active_engines));
 
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
@@ -416,7 +415,6 @@ __create_context(struct drm_i915_private *dev_priv)
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
-	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
 	mutex_init(&ctx->engines_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index 5f84618cf7db..d5cb4f121aad 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -165,7 +165,6 @@ struct i915_gem_context {
 	atomic_t hw_id_pin_count;
 	struct list_head hw_id_link;
 
-	struct list_head active_engines;
 	struct mutex mutex;
 
 	struct i915_sched_attr sched;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 7fd224a4ca4c..deedd1898fe5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1675,7 +1675,6 @@ static int mock_context_barrier(void *arg)
 		goto out;
 	}
 	i915_request_add(rq);
-	GEM_BUG_ON(list_empty(&ctx->active_engines));
 
 	counter = 0;
 	context_barrier_inject_fault = BIT(RCS0);
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 71c750693585..10e67c931ed1 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -50,7 +50,6 @@ mock_context(struct drm_i915_private *i915,
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
-	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
 	ret = i915_gem_context_pin_hw_id(ctx);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 10/45] drm/i915: Move i915_request_alloc into selftests/
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (7 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 09/45] drm/i915: Remove intel_context.active_link Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 11/45] drm/i915/execlists: Flush the tasklet on parking Chris Wilson
                   ` (42 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Having transitioned GEM over to using intel_context as its primary means
of tracking the GEM context and engine combined and using
i915_request_create(), we can move the older i915_request_alloc()
helper function into selftests/ where the remaining users are confined.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |  1 +
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  9 +++--
 drivers/gpu/drm/i915/gt/selftest_lrc.c        | 13 ++++---
 .../gpu/drm/i915/gt/selftest_workarounds.c    | 15 +++-----
 drivers/gpu/drm/i915/i915_request.c           | 38 -------------------
 drivers/gpu/drm/i915/i915_request.h           |  3 --
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  3 +-
 drivers/gpu/drm/i915/selftests/i915_gem.c     |  5 ++-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 13 ++++---
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  3 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |  4 +-
 .../gpu/drm/i915/selftests/igt_gem_utils.c    | 34 +++++++++++++++++
 .../gpu/drm/i915/selftests/igt_gem_utils.h    | 17 +++++++++
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |  3 +-
 drivers/gpu/drm/i915/selftests/mock_request.c |  3 +-
 15 files changed, 89 insertions(+), 75 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_gem_utils.c
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_gem_utils.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index dd8d923aa1c6..58643373495c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -193,6 +193,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o \
 	selftests/igt_flush_test.o \
+	selftests/igt_gem_utils.o \
 	selftests/igt_live_test.o \
 	selftests/igt_reset.o \
 	selftests/igt_spinner.o
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 9dece55a091c..dab3d30c9c73 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -29,6 +29,7 @@
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
 #include "selftests/igt_flush_test.h"
+#include "selftests/igt_gem_utils.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_wedge_me.h"
 
@@ -175,7 +176,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 	if (err)
 		goto unpin_vma;
 
-	rq = i915_request_alloc(engine, h->ctx);
+	rq = igt_request_alloc(h->ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto unpin_hws;
@@ -455,7 +456,7 @@ static int igt_reset_nop(void *arg)
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
-				rq = i915_request_alloc(engine, ctx);
+				rq = igt_request_alloc(ctx, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					break;
@@ -554,7 +555,7 @@ static int igt_reset_nop_engine(void *arg)
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
-				rq = i915_request_alloc(engine, ctx);
+				rq = igt_request_alloc(ctx, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					break;
@@ -800,7 +801,7 @@ static int active_engine(void *data)
 		struct i915_request *new;
 
 		mutex_lock(&engine->i915->drm.struct_mutex);
-		new = i915_request_alloc(engine, ctx[idx]);
+		new = igt_request_alloc(ctx[idx], engine);
 		if (IS_ERR(new)) {
 			mutex_unlock(&engine->i915->drm.struct_mutex);
 			err = PTR_ERR(new);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index cd0551f97c2f..84538f69185b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -10,6 +10,7 @@
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
 #include "selftests/igt_flush_test.h"
+#include "selftests/igt_gem_utils.h"
 #include "selftests/igt_live_test.h"
 #include "selftests/igt_spinner.h"
 #include "selftests/mock_context.h"
@@ -148,7 +149,7 @@ static int live_busywait_preempt(void *arg)
 		 * fails, we hang instead.
 		 */
 
-		lo = i915_request_alloc(engine, ctx_lo);
+		lo = igt_request_alloc(ctx_lo, engine);
 		if (IS_ERR(lo)) {
 			err = PTR_ERR(lo);
 			goto err_vma;
@@ -192,7 +193,7 @@ static int live_busywait_preempt(void *arg)
 			goto err_vma;
 		}
 
-		hi = i915_request_alloc(engine, ctx_hi);
+		hi = igt_request_alloc(ctx_hi, engine);
 		if (IS_ERR(hi)) {
 			err = PTR_ERR(hi);
 			goto err_vma;
@@ -857,13 +858,13 @@ static int live_chain_preempt(void *arg)
 			i915_request_add(rq);
 
 			for (i = 0; i < count; i++) {
-				rq = i915_request_alloc(engine, lo.ctx);
+				rq = igt_request_alloc(lo.ctx, engine);
 				if (IS_ERR(rq))
 					goto err_wedged;
 				i915_request_add(rq);
 			}
 
-			rq = i915_request_alloc(engine, hi.ctx);
+			rq = igt_request_alloc(hi.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
 			i915_request_add(rq);
@@ -882,7 +883,7 @@ static int live_chain_preempt(void *arg)
 			}
 			igt_spinner_end(&lo.spin);
 
-			rq = i915_request_alloc(engine, lo.ctx);
+			rq = igt_request_alloc(lo.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
 			i915_request_add(rq);
@@ -1087,7 +1088,7 @@ static int smoke_submit(struct preempt_smoke *smoke,
 
 	ctx->sched.priority = prio;
 
-	rq = i915_request_alloc(smoke->engine, ctx);
+	rq = igt_request_alloc(ctx, smoke->engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto unpin;
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 6b9e9b6d82f7..9f7680b9984b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -8,6 +8,7 @@
 #include "intel_reset.h"
 
 #include "selftests/igt_flush_test.h"
+#include "selftests/igt_gem_utils.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_spinner.h"
 #include "selftests/igt_wedge_me.h"
@@ -102,7 +103,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (err)
 		goto err_obj;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
@@ -511,7 +512,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 		i915_gem_object_unpin_map(batch->obj);
 		i915_gem_chipset_flush(ctx->i915);
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = igt_request_alloc(ctx, engine);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out_batch;
@@ -701,14 +702,11 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
 				      struct intel_engine_cs *engine,
 				      struct i915_vma *results)
 {
-	intel_wakeref_t wakeref;
 	struct i915_request *rq;
 	int i, err = 0;
 	u32 srm, *cs;
 
-	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(engine->i915, wakeref)
-		rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -744,7 +742,6 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
 static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
 				       struct intel_engine_cs *engine)
 {
-	intel_wakeref_t wakeref;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	int i, err = 0;
@@ -770,9 +767,7 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
 	i915_gem_object_flush_map(batch->obj);
 	i915_gem_chipset_flush(ctx->i915);
 
-	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(engine->i915, wakeref)
-		rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8b0d72a79861..a5a2ccc23958 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -771,44 +771,6 @@ i915_request_create(struct intel_context *ce)
 	return rq;
 }
 
-/**
- * i915_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
-struct i915_request *
-i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = engine->i915;
-	struct intel_context *ce;
-	struct i915_request *rq;
-
-	/*
-	 * Preempt contexts are reserved for exclusive use to inject a
-	 * preemption context switch. They are never to be used for any trivial
-	 * request!
-	 */
-	GEM_BUG_ON(ctx == i915->preempt_context);
-
-	/*
-	 * Pinning the contexts may generate requests in order to acquire
-	 * GGTT space, so do this first before we reserve a seqno for
-	 * ourselves.
-	 */
-	ce = i915_gem_context_get_engine(ctx, engine->id);
-	if (IS_ERR(ce))
-		return ERR_CAST(ce);
-
-	rq = intel_context_create_request(ce);
-	intel_context_put(ce);
-
-	return rq;
-}
-
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 1eee7416af31..8025a89b5999 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -246,9 +246,6 @@ i915_request_create(struct intel_context *ce);
 
 struct i915_request *__i915_request_commit(struct i915_request *request);
 
-struct i915_request * __must_check
-i915_request_alloc(struct intel_engine_cs *engine,
-		   struct i915_gem_context *ctx);
 void i915_request_retire_upto(struct i915_request *rq);
 
 static inline struct i915_request *
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 90721b54e7ae..1e1f83326a96 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -26,6 +26,7 @@
 
 #include <linux/prime_numbers.h>
 
+#include "igt_gem_utils.h"
 #include "mock_drm.h"
 #include "i915_random.h"
 
@@ -980,7 +981,7 @@ static int gpu_write(struct i915_vma *vma,
 	if (IS_ERR(batch))
 		return PTR_ERR(batch);
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 0342de369d3e..c6a9bff85311 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -8,8 +8,9 @@
 
 #include "../i915_selftest.h"
 
-#include "mock_context.h"
+#include "igt_gem_utils.h"
 #include "igt_flush_test.h"
+#include "mock_context.h"
 
 static int switch_to_context(struct drm_i915_private *i915,
 			     struct i915_gem_context *ctx)
@@ -20,7 +21,7 @@ static int switch_to_context(struct drm_i915_private *i915,
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = igt_request_alloc(ctx, engine);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index deedd1898fe5..b62f005e4d50 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -29,6 +29,7 @@
 
 #include "i915_random.h"
 #include "igt_flush_test.h"
+#include "igt_gem_utils.h"
 #include "igt_live_test.h"
 #include "igt_reset.h"
 #include "igt_spinner.h"
@@ -91,7 +92,7 @@ static int live_nop_switch(void *arg)
 
 		times[0] = ktime_get_raw();
 		for (n = 0; n < nctx; n++) {
-			rq = i915_request_alloc(engine, ctx[n]);
+			rq = igt_request_alloc(ctx[n], engine);
 			if (IS_ERR(rq)) {
 				err = PTR_ERR(rq);
 				goto out_unlock;
@@ -121,7 +122,7 @@ static int live_nop_switch(void *arg)
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
-				rq = i915_request_alloc(engine, ctx[n % nctx]);
+				rq = igt_request_alloc(ctx[n % nctx], engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					goto out_unlock;
@@ -301,7 +302,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 		goto err_vma;
 	}
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
@@ -1350,7 +1351,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_unpin;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
@@ -1445,7 +1446,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_unpin;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
@@ -1669,7 +1670,7 @@ static int mock_context_barrier(void *arg)
 		goto out;
 	}
 
-	rq = i915_request_alloc(i915->engine[RCS0], ctx);
+	rq = igt_request_alloc(ctx, i915->engine[RCS0]);
 	if (IS_ERR(rq)) {
 		pr_err("Request allocation failed!\n");
 		goto out;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 89766688e420..4fc6e5445dd1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -24,6 +24,7 @@
 
 #include "../i915_selftest.h"
 
+#include "igt_gem_utils.h"
 #include "lib_sw_fence.h"
 #include "mock_context.h"
 #include "mock_drm.h"
@@ -460,7 +461,7 @@ static int igt_evict_contexts(void *arg)
 
 			/* We will need some GGTT space for the rq's context */
 			igt_evict_ctl.fail_if_busy = true;
-			rq = i915_request_alloc(engine, ctx);
+			rq = igt_request_alloc(ctx, engine);
 			igt_evict_ctl.fail_if_busy = false;
 
 			if (IS_ERR(rq)) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 098d7b3aa131..b60591531e4a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -267,7 +267,7 @@ static struct i915_request *
 __live_request_alloc(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine)
 {
-	return i915_request_alloc(engine, ctx);
+	return igt_request_alloc(ctx, engine);
 }
 
 static int __igt_breadcrumbs_smoketest(void *arg)
@@ -1074,7 +1074,7 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (HAS_EXECLISTS(ctx->i915))
 		return INT_MAX;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 	} else {
diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/selftests/igt_gem_utils.c
new file mode 100644
index 000000000000..16891b1a3e50
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_gem_utils.c
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "igt_gem_utils.h"
+
+#include "gt/intel_context.h"
+
+#include "../i915_gem_context.h"
+#include "../i915_gem_pm.h"
+#include "../i915_request.h"
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = i915_gem_context_get_engine(ctx, engine->id);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = intel_context_create_request(ce);
+	intel_context_put(ce);
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/selftests/igt_gem_utils.h
new file mode 100644
index 000000000000..0f17251cf75d
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_gem_utils.h
@@ -0,0 +1,17 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef __IGT_GEM_UTILS_H__
+#define __IGT_GEM_UTILS_H__
+
+struct i915_request;
+struct i915_gem_context;
+struct intel_engine_cs;
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
+
+#endif /* __IGT_GEM_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 16890dfe74c0..ece8a8a0d3b0 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -4,6 +4,7 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include "igt_gem_utils.h"
 #include "igt_spinner.h"
 
 int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915)
@@ -114,7 +115,7 @@ igt_spinner_create_request(struct igt_spinner *spin,
 	if (err)
 		goto unpin_vma;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto unpin_hws;
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index f739ba63057f..b99f7576153c 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -24,6 +24,7 @@
 
 #include "gt/mock_engine.h"
 
+#include "igt_gem_utils.h"
 #include "mock_request.h"
 
 struct i915_request *
@@ -34,7 +35,7 @@ mock_request(struct intel_engine_cs *engine,
 	struct i915_request *request;
 
 	/* NB the i915->requests slab cache is enlarged to fit mock_request */
-	request = i915_request_alloc(engine, context);
+	request = igt_request_alloc(context, engine);
 	if (IS_ERR(request))
 		return NULL;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 11/45] drm/i915/execlists: Flush the tasklet on parking
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (8 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 10/45] drm/i915: Move i915_request_alloc into selftests/ Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 12/45] drm/i915: Move the engine->destroy() vfunc onto the engine Chris Wilson
                   ` (41 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Tidy up the cleanup sequence by always ensure that the tasklet is
flushed on parking (before we cleanup). The parking provides a
convenient point to ensure that the backend is truly idle.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c         | 25 +++++++--------------
 drivers/gpu/drm/i915/intel_guc_submission.c |  1 +
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 01f58a152a9e..ea7794d368d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2331,27 +2331,18 @@ static int gen8_init_rcs_context(struct i915_request *rq)
 	return i915_gem_render_state_emit(rq);
 }
 
+static void execlists_park(struct intel_engine_cs *engine)
+{
+	tasklet_kill(&engine->execlists.tasklet);
+}
+
 /**
  * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
  * @engine: Engine Command Streamer.
  */
 void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv;
-
-	/*
-	 * Tasklet cannot be active at this point due intel_mark_active/idle
-	 * so this is just for documentation.
-	 */
-	if (WARN_ON(test_bit(TASKLET_STATE_SCHED,
-			     &engine->execlists.tasklet.state)))
-		tasklet_kill(&engine->execlists.tasklet);
-
-	dev_priv = engine->i915;
-
-	if (engine->buffer) {
-		WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
-	}
+	struct drm_i915_private *i915 = engine->i915;
 
 	if (engine->cleanup)
 		engine->cleanup(engine);
@@ -2361,7 +2352,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	lrc_destroy_wa_ctx(engine);
 
 	engine->i915 = NULL;
-	dev_priv->engine[engine->id] = NULL;
+	i915->engine[engine->id] = NULL;
 	kfree(engine);
 }
 
@@ -2376,7 +2367,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->reset.reset = execlists_reset;
 	engine->reset.finish = execlists_reset_finish;
 
-	engine->park = NULL;
+	engine->park = execlists_park;
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4c814344809c..ed94001028f2 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -1363,6 +1363,7 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv)
 
 static void guc_submission_park(struct intel_engine_cs *engine)
 {
+	tasklet_kill(&engine->execlists.tasklet);
 	intel_engine_unpin_breadcrumbs_irq(engine);
 	engine->flags &= ~I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 12/45] drm/i915: Move the engine->destroy() vfunc onto the engine
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (9 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 11/45] drm/i915/execlists: Flush the tasklet on parking Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25 11:01   ` Tvrtko Ursulin
  2019-04-25  9:19 ` [PATCH 13/45] drm/i915: Convert inconsistent static engine tables into an init error Chris Wilson
                   ` (40 subsequent siblings)
  51 siblings, 1 reply; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Make the engine responsible for cleaning itself up!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine.h       |  4 ++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 63 ++++++++++----------
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 30 ++++------
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c   | 35 +++++------
 drivers/gpu/drm/i915/i915_drv.h              |  6 --
 drivers/gpu/drm/i915/i915_gem.c              | 19 +-----
 7 files changed, 66 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 3e53f53bc52b..f5b0f27cecb6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -362,7 +362,11 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
 	return (head - tail - CACHELINE_BYTES) & (size - 1);
 }
 
+int intel_engines_init_mmio(struct drm_i915_private *i915);
 int intel_engines_setup(struct drm_i915_private *i915);
+int intel_engines_init(struct drm_i915_private *i915);
+void intel_engines_cleanup(struct drm_i915_private *i915);
+
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4e30f3720eb7..65cdd0c4accc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -303,6 +303,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	engine->class = info->class;
 	engine->instance = info->instance;
 
+	/*
+	 * To be overridden by the backend on setup, however to facilitate
+	 * cleanup on error during setup we always provide the destroy vfunc.
+	 */
+	engine->destroy = (typeof(engine->destroy))kfree;
+
 	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
 
 	engine->context_size = __intel_engine_context_size(dev_priv,
@@ -327,18 +333,31 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	return 0;
 }
 
+/**
+ * intel_engines_cleanup() - free the resources allocated for Command Streamers
+ * @dev_priv: i915 device private
+ */
+void intel_engines_cleanup(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		engine->destroy(engine);
+		i915->engine[id] = NULL;
+	}
+}
+
 /**
  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
  * @dev_priv: i915 device private
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
+int intel_engines_init_mmio(struct drm_i915_private *i915)
 {
-	struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
-	const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
+	struct intel_device_info *device_info = mkwrite_device_info(i915);
+	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
 	unsigned int mask = 0;
 	unsigned int i;
 	int err;
@@ -351,10 +370,10 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 		return -ENODEV;
 
 	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
-		if (!HAS_ENGINE(dev_priv, i))
+		if (!HAS_ENGINE(i915, i))
 			continue;
 
-		err = intel_engine_setup(dev_priv, i);
+		err = intel_engine_setup(i915, i);
 		if (err)
 			goto cleanup;
 
@@ -370,20 +389,19 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 		device_info->engine_mask = mask;
 
 	/* We always presume we have at least RCS available for later probing */
-	if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) {
+	if (WARN_ON(!HAS_ENGINE(i915, RCS0))) {
 		err = -ENODEV;
 		goto cleanup;
 	}
 
-	RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask);
+	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
 
-	i915_check_and_clear_faults(dev_priv);
+	i915_check_and_clear_faults(i915);
 
 	return 0;
 
 cleanup:
-	for_each_engine(engine, dev_priv, id)
-		kfree(engine);
+	intel_engines_cleanup(i915);
 	return err;
 }
 
@@ -397,7 +415,7 @@ int intel_engines_init(struct drm_i915_private *i915)
 {
 	int (*init)(struct intel_engine_cs *engine);
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id, err_id;
+	enum intel_engine_id id;
 	int err;
 
 	if (HAS_EXECLISTS(i915))
@@ -406,8 +424,6 @@ int intel_engines_init(struct drm_i915_private *i915)
 		init = intel_ring_submission_init;
 
 	for_each_engine(engine, i915, id) {
-		err_id = id;
-
 		err = init(engine);
 		if (err)
 			goto cleanup;
@@ -416,14 +432,7 @@ int intel_engines_init(struct drm_i915_private *i915)
 	return 0;
 
 cleanup:
-	for_each_engine(engine, i915, id) {
-		if (id >= err_id) {
-			kfree(engine);
-			i915->engine[id] = NULL;
-		} else {
-			i915->gt.cleanup_engine(engine);
-		}
-	}
+	intel_engines_cleanup(i915);
 	return err;
 }
 
@@ -609,13 +618,7 @@ int intel_engines_setup(struct drm_i915_private *i915)
 	return 0;
 
 cleanup:
-	for_each_engine(engine, i915, id) {
-		if (engine->cops)
-			i915->gt.cleanup_engine(engine);
-		else
-			kfree(engine);
-		i915->engine[id] = NULL;
-	}
+	intel_engines_cleanup(i915);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d972c339309c..9d64e33f8427 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -420,7 +420,7 @@ struct intel_engine_cs {
 	 */
 	void		(*cancel_requests)(struct intel_engine_cs *engine);
 
-	void		(*cleanup)(struct intel_engine_cs *engine);
+	void		(*destroy)(struct intel_engine_cs *engine);
 
 	struct intel_engine_execlists execlists;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index ea7794d368d1..2e74c792104e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2336,26 +2336,6 @@ static void execlists_park(struct intel_engine_cs *engine)
 	tasklet_kill(&engine->execlists.tasklet);
 }
 
-/**
- * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
- * @engine: Engine Command Streamer.
- */
-void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	if (engine->cleanup)
-		engine->cleanup(engine);
-
-	intel_engine_cleanup_common(engine);
-
-	lrc_destroy_wa_ctx(engine);
-
-	engine->i915 = NULL;
-	i915->engine[engine->id] = NULL;
-	kfree(engine);
-}
-
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
@@ -2378,10 +2358,20 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 }
 
+static void execlists_destroy(struct intel_engine_cs *engine)
+{
+	intel_engine_cleanup_common(engine);
+
+	lrc_destroy_wa_ctx(engine);
+	kfree(engine);
+}
+
 static void
 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
+
+	engine->destroy = execlists_destroy;
 	engine->resume = execlists_resume;
 
 	engine->reset.prepare = execlists_reset_prepare;
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 5bcd3034a101..7a4804c47466 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1520,25 +1520,6 @@ static const struct intel_context_ops ring_context_ops = {
 	.destroy = ring_context_destroy,
 };
 
-void intel_engine_cleanup(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
-		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
-
-	intel_ring_unpin(engine->buffer);
-	intel_ring_put(engine->buffer);
-
-	if (engine->cleanup)
-		engine->cleanup(engine);
-
-	intel_engine_cleanup_common(engine);
-
-	dev_priv->engine[engine->id] = NULL;
-	kfree(engine);
-}
-
 static int load_pd_dir(struct i915_request *rq,
 		       const struct i915_hw_ppgtt *ppgtt)
 {
@@ -2130,6 +2111,20 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = gen6_bsd_submit_request;
 }
 
+static void ring_destroy(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
+		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+
+	intel_ring_unpin(engine->buffer);
+	intel_ring_put(engine->buffer);
+
+	intel_engine_cleanup_common(engine);
+	kfree(engine);
+}
+
 static void setup_irq(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
@@ -2158,6 +2153,8 @@ static void setup_common(struct intel_engine_cs *engine)
 
 	setup_irq(engine);
 
+	engine->destroy = ring_destroy;
+
 	engine->resume = xcs_resume;
 	engine->reset.prepare = reset_prepare;
 	engine->reset.reset = reset_ring;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c77bf5b735b..5ca4df9a7428 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1995,8 +1995,6 @@ struct drm_i915_private {
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
-		void (*cleanup_engine)(struct intel_engine_cs *engine);
-
 		struct i915_gt_timelines {
 			struct mutex mutex; /* protects list, tainted by GPU */
 			struct list_head active_list;
@@ -2722,9 +2720,6 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
-int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
-int intel_engines_init(struct drm_i915_private *dev_priv);
-
 u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
 
 /* intel_hotplug.c */
@@ -3132,7 +3127,6 @@ int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv);
 void i915_gem_fini(struct drm_i915_private *dev_priv);
-void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 			   unsigned int flags, long timeout);
 void i915_gem_suspend(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4c1793b1012e..2fcec1bfb038 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4476,11 +4476,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
 
-	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
-		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
-	else
-		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
-
 	i915_timelines_init(dev_priv);
 
 	ret = i915_gem_init_userptr(dev_priv);
@@ -4601,7 +4596,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_pm:
 	if (ret != -EIO) {
 		intel_cleanup_gt_powersave(dev_priv);
-		i915_gem_cleanup_engines(dev_priv);
+		intel_engines_cleanup(dev_priv);
 	}
 err_context:
 	if (ret != -EIO)
@@ -4661,7 +4656,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_uc_fini_hw(dev_priv);
 	intel_uc_fini(dev_priv);
-	i915_gem_cleanup_engines(dev_priv);
+	intel_engines_cleanup(dev_priv);
 	i915_gem_contexts_fini(dev_priv);
 	i915_gem_fini_scratch(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -4684,16 +4679,6 @@ void i915_gem_init_mmio(struct drm_i915_private *i915)
 	i915_gem_sanitize(i915);
 }
 
-void
-i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id)
-		dev_priv->gt.cleanup_engine(engine);
-}
-
 void
 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 13/45] drm/i915: Convert inconsistent static engine tables into an init error
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (10 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 12/45] drm/i915: Move the engine->destroy() vfunc onto the engine Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 14/45] drm/i915: Make engine_mask & num_engines static Chris Wilson
                   ` (39 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Remove the modification of the "constant" device info by promoting the
inconsistent intel_engine static table into an initialisation error.
Now, if we add a new engine into the device_info, we must first add that
engine information into the intel_engines.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 28 ++++++++---------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 65cdd0c4accc..862cf1040f88 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -356,15 +356,14 @@ void intel_engines_cleanup(struct drm_i915_private *i915)
  */
 int intel_engines_init_mmio(struct drm_i915_private *i915)
 {
-	struct intel_device_info *device_info = mkwrite_device_info(i915);
-	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
-	unsigned int mask = 0;
 	unsigned int i;
 	int err;
 
-	WARN_ON(engine_mask == 0);
-	WARN_ON(engine_mask &
-		GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
+	/* We always presume we have at least RCS available for later probing */
+	if (GEM_WARN_ON(!HAS_ENGINE(i915, RCS0))) {
+		err = -ENODEV;
+		goto cleanup;
+	}
 
 	if (i915_inject_load_failure())
 		return -ENODEV;
@@ -376,25 +375,16 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
 		err = intel_engine_setup(i915, i);
 		if (err)
 			goto cleanup;
-
-		mask |= BIT(i);
 	}
 
-	/*
-	 * Catch failures to update intel_engines table when the new engines
-	 * are added to the driver by a warning and disabling the forgotten
-	 * engines.
-	 */
-	if (WARN_ON(mask != engine_mask))
-		device_info->engine_mask = mask;
-
-	/* We always presume we have at least RCS available for later probing */
-	if (WARN_ON(!HAS_ENGINE(i915, RCS0))) {
+	/* Catch failures to update intel_engines table for new engines. */
+	if (GEM_WARN_ON(INTEL_INFO(i915)->engine_mask >> i)) {
 		err = -ENODEV;
 		goto cleanup;
 	}
 
-	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
+	RUNTIME_INFO(i915)->num_engines =
+		hweight32(INTEL_INFO(i915)->engine_mask);
 
 	i915_check_and_clear_faults(i915);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 14/45] drm/i915: Make engine_mask & num_engines static
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (11 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 13/45] drm/i915: Convert inconsistent static engine tables into an init error Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:30   ` Jani Nikula
  2019-04-25 10:20   ` Tvrtko Ursulin
  2019-04-25  9:19 ` [PATCH 15/45] drm/i915: Restore control over ppgtt for context creation ABI Chris Wilson
                   ` (38 subsequent siblings)
  51 siblings, 2 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Jani Nikula

Having removed the urge to modify the engine_mask at runtime, we can
promote the num_engines from a runtime calculation to a static and push
it into the device_info tables.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  3 --
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  2 +-
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |  4 +-
 drivers/gpu/drm/i915/i915_pci.c               | 45 +++++++++----------
 drivers/gpu/drm/i915/intel_device_info.h      |  3 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c |  4 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |  2 +-
 7 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 862cf1040f88..b2cd6c6785be 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -383,9 +383,6 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
 		goto cleanup;
 	}
 
-	RUNTIME_INFO(i915)->num_engines =
-		hweight32(INTEL_INFO(i915)->engine_mask);
-
 	i915_check_and_clear_faults(i915);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 7a4804c47466..2edeedb748ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1568,7 +1568,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 	struct intel_engine_cs *engine = rq->engine;
 	enum intel_engine_id id;
 	const int num_engines =
-		IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+		IS_HSW_GT1(i915) ? INTEL_INFO(i915)->num_engines - 1 : 0;
 	bool force_restore = false;
 	int len;
 	u32 *cs;
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 84538f69185b..6f223f7facde 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1185,7 +1185,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 
 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
 		count, flags,
-		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+		INTEL_INFO(smoke->i915)->num_engines, smoke->ncontext);
 	return 0;
 }
 
@@ -1213,7 +1213,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
 
 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
 		count, flags,
-		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+		INTEL_INFO(smoke->i915)->num_engines, smoke->ncontext);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index ffa2ee70a03d..431a4a2c20e1 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -35,6 +35,7 @@
 
 #define PLATFORM(x) .platform = (x)
 #define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1)
+#define ENGINES(x) .engine_mask = (x), .num_engines = hweight8(x)
 
 #define I845_PIPE_OFFSETS \
 	.pipe_offsets = { \
@@ -145,6 +146,7 @@
 
 #define I830_FEATURES \
 	GEN(2), \
+	ENGINES(BIT(RCS0)), \
 	.is_mobile = 1, \
 	.num_pipes = 2, \
 	.display.has_overlay = 1, \
@@ -154,7 +156,6 @@
 	.gpu_reset_clobbers_display = true, \
 	.hws_needs_physical = 1, \
 	.unfenced_needs_alignment = 1, \
-	.engine_mask = BIT(RCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = false, \
 	I9XX_PIPE_OFFSETS, \
@@ -164,6 +165,7 @@
 
 #define I845_FEATURES \
 	GEN(2), \
+	ENGINES(BIT(RCS0)), \
 	.num_pipes = 1, \
 	.display.has_overlay = 1, \
 	.display.overlay_needs_physical = 1, \
@@ -171,7 +173,6 @@
 	.gpu_reset_clobbers_display = true, \
 	.hws_needs_physical = 1, \
 	.unfenced_needs_alignment = 1, \
-	.engine_mask = BIT(RCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = false, \
 	I845_PIPE_OFFSETS, \
@@ -202,10 +203,10 @@ static const struct intel_device_info intel_i865g_info = {
 
 #define GEN3_FEATURES \
 	GEN(3), \
+	ENGINES(BIT(RCS0)), \
 	.num_pipes = 2, \
 	.display.has_gmch = 1, \
 	.gpu_reset_clobbers_display = true, \
-	.engine_mask = BIT(RCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = true, \
 	I9XX_PIPE_OFFSETS, \
@@ -286,11 +287,11 @@ static const struct intel_device_info intel_pineview_m_info = {
 
 #define GEN4_FEATURES \
 	GEN(4), \
+	ENGINES(BIT(RCS0)), \
 	.num_pipes = 2, \
 	.display.has_hotplug = 1, \
 	.display.has_gmch = 1, \
 	.gpu_reset_clobbers_display = true, \
-	.engine_mask = BIT(RCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = true, \
 	I9XX_PIPE_OFFSETS, \
@@ -320,25 +321,25 @@ static const struct intel_device_info intel_i965gm_info = {
 static const struct intel_device_info intel_g45_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_G45),
-	.engine_mask = BIT(RCS0) | BIT(VCS0),
+	ENGINES(BIT(RCS0) | BIT(VCS0)),
 	.gpu_reset_clobbers_display = false,
 };
 
 static const struct intel_device_info intel_gm45_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_GM45),
+	ENGINES(BIT(RCS0) | BIT(VCS0)),
 	.is_mobile = 1,
 	.display.has_fbc = 1,
 	.display.supports_tv = 1,
-	.engine_mask = BIT(RCS0) | BIT(VCS0),
 	.gpu_reset_clobbers_display = false,
 };
 
 #define GEN5_FEATURES \
 	GEN(5), \
+	ENGINES(BIT(RCS0) | BIT(VCS0)), \
 	.num_pipes = 2, \
 	.display.has_hotplug = 1, \
-	.engine_mask = BIT(RCS0) | BIT(VCS0), \
 	.has_snoop = true, \
 	.has_coherent_ggtt = true, \
 	/* ilk does support rc6, but we do not implement [power] contexts */ \
@@ -362,10 +363,10 @@ static const struct intel_device_info intel_ironlake_m_info = {
 
 #define GEN6_FEATURES \
 	GEN(6), \
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0)), \
 	.num_pipes = 2, \
 	.display.has_hotplug = 1, \
 	.display.has_fbc = 1, \
-	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
 	.has_coherent_ggtt = true, \
 	.has_llc = 1, \
 	.has_rc6 = 1, \
@@ -410,10 +411,10 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
 
 #define GEN7_FEATURES  \
 	GEN(7), \
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0)), \
 	.num_pipes = 3, \
 	.display.has_hotplug = 1, \
 	.display.has_fbc = 1, \
-	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
 	.has_coherent_ggtt = true, \
 	.has_llc = 1, \
 	.has_rc6 = 1, \
@@ -468,6 +469,7 @@ static const struct intel_device_info intel_ivybridge_q_info = {
 static const struct intel_device_info intel_valleyview_info = {
 	PLATFORM(INTEL_VALLEYVIEW),
 	GEN(7),
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0)),
 	.is_lp = 1,
 	.num_pipes = 2,
 	.has_runtime_pm = 1,
@@ -479,7 +481,6 @@ static const struct intel_device_info intel_valleyview_info = {
 	.ppgtt_size = 31,
 	.has_snoop = true,
 	.has_coherent_ggtt = false,
-	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0),
 	.display_mmio_offset = VLV_DISPLAY_BASE,
 	I9XX_PIPE_OFFSETS,
 	I9XX_CURSOR_OFFSETS,
@@ -489,7 +490,7 @@ static const struct intel_device_info intel_valleyview_info = {
 
 #define G75_FEATURES  \
 	GEN7_FEATURES, \
-	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0)), \
 	.display.has_ddi = 1, \
 	.has_fpga_dbg = 1, \
 	.display.has_psr = 1, \
@@ -553,18 +554,17 @@ static const struct intel_device_info intel_broadwell_rsvd_info = {
 
 static const struct intel_device_info intel_broadwell_gt3_info = {
 	BDW_PLATFORM,
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)),
 	.gt = 3,
-	.engine_mask =
-		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
 };
 
 static const struct intel_device_info intel_cherryview_info = {
 	PLATFORM(INTEL_CHERRYVIEW),
 	GEN(8),
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0)),
 	.num_pipes = 3,
 	.display.has_hotplug = 1,
 	.is_lp = 1,
-	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0),
 	.has_64bit_reloc = 1,
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
@@ -616,9 +616,7 @@ static const struct intel_device_info intel_skylake_gt2_info = {
 
 #define SKL_GT3_PLUS_PLATFORM \
 	SKL_PLATFORM, \
-	.engine_mask = \
-		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)
-
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1))
 
 static const struct intel_device_info intel_skylake_gt3_info = {
 	SKL_GT3_PLUS_PLATFORM,
@@ -632,9 +630,9 @@ static const struct intel_device_info intel_skylake_gt4_info = {
 
 #define GEN9_LP_FEATURES \
 	GEN(9), \
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0)), \
 	.is_lp = 1, \
 	.display.has_hotplug = 1, \
-	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \
 	.num_pipes = 3, \
 	.has_64bit_reloc = 1, \
 	.display.has_ddi = 1, \
@@ -689,9 +687,8 @@ static const struct intel_device_info intel_kabylake_gt2_info = {
 
 static const struct intel_device_info intel_kabylake_gt3_info = {
 	KBL_PLATFORM,
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)),
 	.gt = 3,
-	.engine_mask =
-		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
 };
 
 #define CFL_PLATFORM \
@@ -710,9 +707,8 @@ static const struct intel_device_info intel_coffeelake_gt2_info = {
 
 static const struct intel_device_info intel_coffeelake_gt3_info = {
 	CFL_PLATFORM,
+	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)),
 	.gt = 3,
-	.engine_mask =
-		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
 };
 
 #define GEN10_FEATURES \
@@ -754,15 +750,14 @@ static const struct intel_device_info intel_cannonlake_info = {
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
 	PLATFORM(INTEL_ICELAKE),
-	.engine_mask =
-		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
+	ENGINES(BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2)),
 };
 
 static const struct intel_device_info intel_elkhartlake_info = {
 	GEN11_FEATURES,
 	PLATFORM(INTEL_ELKHARTLAKE),
+	ENGINES(BIT(RCS0) | BIT(BCS0) | BIT(VCS0)),
 	.is_alpha_support = 1,
-	.engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0),
 	.ppgtt_size = 36,
 };
 
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 5a2e17d6146b..0deda1efd37d 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -149,6 +149,7 @@ struct intel_device_info {
 	u8 gen;
 	u8 gt; /* GT number, 0 if undefined */
 	intel_engine_mask_t engine_mask; /* Engines supported by the HW */
+	u8 num_engines;
 
 	enum intel_platform platform;
 
@@ -202,8 +203,6 @@ struct intel_runtime_info {
 	u8 num_sprites[I915_MAX_PIPES];
 	u8 num_scalers[I915_MAX_PIPES];
 
-	u8 num_engines;
-
 	/* Slice/subslice/EU info */
 	struct sseu_dev_info sseu;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index b62f005e4d50..43d014472ac6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1257,7 +1257,7 @@ static int igt_ctx_readonly(void *arg)
 		}
 	}
 	pr_info("Submitted %lu dwords (across %u engines)\n",
-		ndwords, RUNTIME_INFO(i915)->num_engines);
+		ndwords, INTEL_INFO(i915)->num_engines);
 
 	dw = 0;
 	idx = 0;
@@ -1586,7 +1586,7 @@ static int igt_vm_isolation(void *arg)
 		count += this;
 	}
 	pr_info("Checked %lu scratch offsets across %d engines\n",
-		count, RUNTIME_INFO(i915)->num_engines);
+		count, INTEL_INFO(i915)->num_engines);
 
 out_rpm:
 	intel_runtime_pm_put(i915, wakeref);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index b60591531e4a..8fe9a43c99d9 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -1211,7 +1211,7 @@ static int live_breadcrumbs_smoketest(void *arg)
 		num_fences += atomic_long_read(&t[id].num_fences);
 	}
 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
-		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
+		num_waits, num_fences, INTEL_INFO(i915)->num_engines, ncpus);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	ret = igt_live_test_end(&live) ?: ret;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 15/45] drm/i915: Restore control over ppgtt for context creation ABI
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (12 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 14/45] drm/i915: Make engine_mask & num_engines static Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 16/45] drm/i915: Allow a context to define its set of engines Chris Wilson
                   ` (37 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Having hid the partially exposed new ABI from the PR, put it back again
for completion of context recovery. A significant part of context
recovery is the ability to reuse as much of the old context as is
feasible (to avoid expensive reconstruction). The biggest chunk kept
hidden at the moment is fine-control over the ctx->ppgtt (the GPU page
tables and associated translation tables and kernel maps), so make
control over the ctx->ppgtt explicit.

This allows userspace to create and share virtual memory address spaces
(within the limits of a single fd) between contexts they own, along with
the ability to query the contexts for the vm state.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c         |  2 ++
 drivers/gpu/drm/i915/i915_gem_context.c |  5 -----
 include/uapi/drm/i915_drm.h             | 15 +++++++++++++++
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 824409ffd03f..81866bd1b6e4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3150,6 +3150,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 0d9d435ae398..217d4fe0349d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -98,7 +98,6 @@
 #include "i915_user_extensions.h"
 
 #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
-#define I915_CONTEXT_PARAM_VM 0x9
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
@@ -969,8 +968,6 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
 	struct i915_hw_ppgtt *ppgtt;
 	int ret;
 
-	return -EINVAL; /* nothing to see here; please move along */
-
 	if (!ctx->ppgtt)
 		return -ENODEV;
 
@@ -1069,8 +1066,6 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	struct i915_hw_ppgtt *ppgtt, *old;
 	int err;
 
-	return -EINVAL; /* nothing to see here; please move along */
-
 	if (args->size)
 		return -EINVAL;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a73f5316766..d6ad4a15b2b9 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -355,6 +355,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
+#define DRM_I915_GEM_VM_CREATE		0x3a
+#define DRM_I915_GEM_VM_DESTROY		0x3b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -415,6 +417,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1507,6 +1511,17 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
+
+	/*
+	 * The id of the associated virtual memory address space (ppGTT) of
+	 * this context. Can be retrieved and passed to another context
+	 * (on the same fd) for both to use the same ppGTT and so share
+	 * address layouts, and avoid reloading the page tables on context
+	 * switches between themselves.
+	 *
+	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+	 */
+#define I915_CONTEXT_PARAM_VM		0x9
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 16/45] drm/i915: Allow a context to define its set of engines
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (13 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 15/45] drm/i915: Restore control over ppgtt for context creation ABI Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 17/45] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation Chris Wilson
                   ` (36 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

v2: Fixup freeing of local on success of get_engines()
v3: Allow empty engines[]
v4: s/nengine/num_engines/
v5: Replace 64 limit on num_engines with a note that execbuf is
currently limited to only using the first 64 engines.
v6: Actually use the engines_mutex to guard the ctx->engines.

Testcase: igt/gem_ctx_engines
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c       | 219 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h       |  18 ++
 drivers/gpu/drm/i915/i915_gem_context_types.h |   1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   5 +-
 drivers/gpu/drm/i915/i915_utils.h             |  36 +++
 include/uapi/drm/i915_drm.h                   |  31 +++
 6 files changed, 303 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 217d4fe0349d..b4b7a2eee1c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,7 +90,6 @@
 #include <drm/i915_drm.h>
 
 #include "gt/intel_lrc_reg.h"
-#include "gt/intel_workarounds.h"
 
 #include "i915_drv.h"
 #include "i915_globals.h"
@@ -143,13 +142,17 @@ static void lut_close(struct i915_gem_context *ctx)
 static struct intel_context *
 lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance)
 {
-	struct intel_engine_cs *engine;
+	if (!i915_gem_context_user_engines(ctx)) {
+		struct intel_engine_cs *engine;
 
-	engine = intel_engine_lookup_user(ctx->i915, class, instance);
-	if (!engine)
-		return ERR_PTR(-EINVAL);
+		engine = intel_engine_lookup_user(ctx->i915, class, instance);
+		if (!engine)
+			return ERR_PTR(-EINVAL);
+
+		instance = engine->id;
+	}
 
-	return i915_gem_context_get_engine(ctx, engine->id);
+	return i915_gem_context_get_engine(ctx, instance);
 }
 
 static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
@@ -257,6 +260,17 @@ static void free_engines(struct i915_gem_engines *e)
 	__free_engines(e, e->num_engines);
 }
 
+static void free_engines_rcu(struct work_struct *wrk)
+{
+	struct i915_gem_engines *e =
+		container_of(wrk, struct i915_gem_engines, rcu.work);
+	struct drm_i915_private *i915 = e->i915;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	free_engines(e);
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
 static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 {
 	struct intel_engine_cs *engine;
@@ -1382,6 +1396,191 @@ static int set_sseu(struct i915_gem_context *ctx,
 	return ret;
 }
 
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *engines;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+	    const struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user =
+		u64_to_user_ptr(args->value);
+	struct set_engines set = { .ctx = ctx };
+	unsigned int num_engines, n;
+	u64 extensions;
+	int err;
+
+	if (!args->size) { /* switch back to legacy user_ring_map */
+		if (!i915_gem_context_user_engines(ctx))
+			return 0;
+
+		set.engines = default_engines(ctx);
+		if (IS_ERR(set.engines))
+			return PTR_ERR(set.engines);
+
+		goto replace;
+	}
+
+	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines)));
+	if (args->size < sizeof(*user) ||
+	    !IS_ALIGNED(args->size, sizeof(*user->engines))) {
+		DRM_DEBUG("Invalid size for engine array: %d\n",
+			  args->size);
+		return -EINVAL;
+	}
+
+	/*
+	 * Note that I915_EXEC_RING_MASK limits execbuf to only using the
+	 * first 64 engines defined here.
+	 */
+	num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines);
+
+	set.engines = kmalloc(struct_size(set.engines, engines, num_engines),
+			      GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	set.engines->i915 = ctx->i915;
+	for (n = 0; n < num_engines; n++) {
+		struct i915_engine_class_instance ci;
+		struct intel_engine_cs *engine;
+
+		if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
+			__free_engines(set.engines, n);
+			return -EFAULT;
+		}
+
+		if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID &&
+		    ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
+			set.engines->engines[n] = NULL;
+			continue;
+		}
+
+		engine = intel_engine_lookup_user(ctx->i915,
+						  ci.engine_class,
+						  ci.engine_instance);
+		if (!engine) {
+			DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			__free_engines(set.engines, n);
+			return -ENOENT;
+		}
+
+		set.engines->engines[n] = intel_context_create(ctx, engine);
+		if (!set.engines->engines[n]) {
+			__free_engines(set.engines, n);
+			return -ENOMEM;
+		}
+	}
+	set.engines->num_engines = num_engines;
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		free_engines(set.engines);
+		return err;
+	}
+
+replace:
+	mutex_lock(&ctx->engines_mutex);
+	if (args->size)
+		i915_gem_context_set_user_engines(ctx);
+	else
+		i915_gem_context_clear_user_engines(ctx);
+	rcu_swap_protected(ctx->engines, set.engines, 1);
+	mutex_unlock(&ctx->engines_mutex);
+
+	INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu);
+	queue_rcu_work(system_wq, &set.engines->rcu);
+
+	return 0;
+}
+
+static int
+get_engines(struct i915_gem_context *ctx,
+	    struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct i915_gem_engines *e;
+	size_t n, count, size;
+	int err = 0;
+
+	err = mutex_lock_interruptible(&ctx->engines_mutex);
+	if (err)
+		return err;
+
+	if (!i915_gem_context_user_engines(ctx)) {
+		args->size = 0;
+		goto unlock;
+	}
+
+	e = i915_gem_context_engines(ctx);
+	count = e->num_engines;
+
+	/* Be paranoid in case we have an impedance mismatch */
+	if (!check_struct_size(user, engines, count, &size)) {
+		err = -EINVAL;
+		goto unlock;
+	}
+	if (overflows_type(size, args->size)) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	if (!args->size) {
+		args->size = size;
+		goto unlock;
+	}
+
+	if (args->size < size) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	user = u64_to_user_ptr(args->value);
+	if (!access_ok(user, size)) {
+		err = -EFAULT;
+		goto unlock;
+	}
+
+	if (put_user(0, &user->extensions)) {
+		err = -EFAULT;
+		goto unlock;
+	}
+
+	for (n = 0; n < count; n++) {
+		struct i915_engine_class_instance ci = {
+			.engine_class = I915_ENGINE_CLASS_INVALID,
+			.engine_instance = I915_ENGINE_CLASS_INVALID_NONE,
+		};
+
+		if (e->engines[n]) {
+			ci.engine_class = e->engines[n]->engine->uabi_class;
+			ci.engine_instance = e->engines[n]->engine->instance;
+		}
+
+		if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
+			err = -EFAULT;
+			goto unlock;
+		}
+	}
+
+	args->size = size;
+
+unlock:
+	mutex_unlock(&ctx->engines_mutex);
+	return err;
+}
+
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
 			struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
@@ -1455,6 +1654,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		ret = set_ppgtt(fpriv, ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -1685,6 +1888,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_ppgtt(file_priv, ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = get_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 272e183ebc0c..9ad4a6362438 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -112,6 +112,24 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
 	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
 }
 
+static inline bool
+i915_gem_context_user_engines(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_set_user_engines(struct i915_gem_context *ctx)
+{
+	set_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
+{
+	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
 int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
 static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index d5cb4f121aad..fb965ded2508 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -146,6 +146,7 @@ struct i915_gem_context {
 #define CONTEXT_BANNED			0
 #define CONTEXT_CLOSED			1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
+#define CONTEXT_USER_ENGINES		3
 
 	/**
 	 * @hw_id: - unique identifier for the context
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 679f7c1561ba..d6c5220addd0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2165,7 +2165,10 @@ eb_select_engine(struct i915_execbuffer *eb,
 	unsigned int idx;
 	int err;
 
-	idx = eb_select_legacy_ring(eb, file, args);
+	if (i915_gem_context_user_engines(eb->gem_context))
+		idx = args->flags & I915_EXEC_RING_MASK;
+	else
+		idx = eb_select_legacy_ring(eb, file, args);
 
 	ce = i915_gem_context_get_engine(eb->gem_context, idx);
 	if (IS_ERR(ce))
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 2dbe8933b50a..1436fe2fb5f8 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -25,6 +25,9 @@
 #ifndef __I915_UTILS_H
 #define __I915_UTILS_H
 
+#include <linux/kernel.h>
+#include <linux/overflow.h>
+
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
 #if 0
@@ -73,6 +76,39 @@
 #define overflows_type(x, T) \
 	(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
 
+static inline bool
+__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
+{
+	size_t sz;
+
+	if (check_mul_overflow(count, arr, &sz))
+		return false;
+
+	if (check_add_overflow(sz, base, &sz))
+		return false;
+
+	*size = sz;
+	return true;
+}
+
+/**
+ * check_struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @n: Number of elements in the array.
+ * @sz: Total size of structure and array
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @n @member elements, like struct_size() but reports
+ * whether it overflowed, and the resultant size in @sz
+ *
+ * Return: false if the calculation overflowed.
+ */
+#define check_struct_size(p, member, n, sz) \
+	likely(__check_struct_size(sizeof(*(p)), \
+				   sizeof(*(p)->member) + __must_be_array((p)->member), \
+				   n, sz))
+
 #define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
 	(typeof(ptr))(__v & -BIT(n));					\
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index d6ad4a15b2b9..8e1bb22926e4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -136,6 +136,7 @@ enum drm_i915_gem_engine_class {
 struct i915_engine_class_instance {
 	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
 	__u16 engine_instance;
+#define I915_ENGINE_CLASS_INVALID_NONE -1
 };
 
 /**
@@ -1522,6 +1523,26 @@ struct drm_i915_gem_context_param {
 	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
 	 */
 #define I915_CONTEXT_PARAM_VM		0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ *	engine_class: I915_ENGINE_CLASS_INVALID,
+ *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
@@ -1585,6 +1606,16 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+struct i915_context_param_engines {
+	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+	__u64 extensions; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
 struct drm_i915_gem_context_create_ext_setparam {
 #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
 	struct i915_user_extension base;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 17/45] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (14 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 16/45] drm/i915: Allow a context to define its set of engines Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 18/45] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
                   ` (35 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

The SINGLE_TIMELINE flag can be used to create a context such that all
engine instances within that context share a common timeline. This can
be useful for mixing operations between real and virtual engines, or
when using a composite context for a single client API context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 4 ----
 include/uapi/drm/i915_drm.h             | 3 ++-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b4b7a2eee1c9..d6bea51050c0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -96,8 +96,6 @@
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
-#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
-
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
 static struct i915_global_gem_context {
@@ -493,8 +491,6 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	BUILD_BUG_ON(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &
-		     ~I915_CONTEXT_CREATE_FLAGS_UNKNOWN);
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
 	    !HAS_EXECLISTS(dev_priv))
 		return ERR_PTR(-EINVAL);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8e1bb22926e4..7aef672ab3c7 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1469,8 +1469,9 @@ struct drm_i915_gem_context_create_ext {
 	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
 	__u64 extensions;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 18/45] drm/i915: Allow userspace to clone contexts on creation
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (15 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 17/45] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 19/45] drm/i915: Load balancing across a virtual engine Chris Wilson
                   ` (34 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

A usecase arose out of handling context recovery in mesa, whereby they
wish to recreate a context with fresh logical state but preserving all
other details of the original. Currently, they create a new context and
iterate over which bits they want to copy across, but it would much more
convenient if they were able to just pass in a target context to clone
during creation. This essentially extends the setparam during creation
to pull the details from a target context instead of the user supplied
parameters.

The ideal here is that we don't expose control over anything more than
can be obtained via CONTEXT_PARAM. That is userspace retains explicit
control over all features, and this api is just convenience.

For example, you could replace

	struct context_param p = { .param = CONTEXT_PARAM_VM };

	param.ctx_id = old_id;
	gem_context_get_param(&p.param);

	new_id = gem_context_create();

	param.ctx_id = new_id;
	gem_context_set_param(&p.param);

	gem_vm_destroy(param.value); /* drop the ref to VM_ID handle */

with

	struct create_ext_param p = {
	  { .name = CONTEXT_CREATE_CLONE },
	  .clone_id = old_id,
	  .flags = CLONE_FLAGS_VM
	}
	new_id = gem_context_create_ext(&p);

and not have to worry about stray namespace pollution etc.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 206 ++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h             |  15 ++
 2 files changed, 221 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d6bea51050c0..ba7582d955d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1682,8 +1682,214 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
 	return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
 }
 
+static int clone_engines(struct i915_gem_context *dst,
+			 struct i915_gem_context *src)
+{
+	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
+	struct i915_gem_engines *clone;
+	bool user_engines;
+	unsigned long n;
+
+	clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL);
+	if (!clone)
+		goto err_unlock;
+
+	clone->i915 = dst->i915;
+	for (n = 0; n < e->num_engines; n++) {
+		if (!e->engines[n]) {
+			clone->engines[n] = NULL;
+			continue;
+		}
+
+		clone->engines[n] =
+			intel_context_create(dst, e->engines[n]->engine);
+		if (!clone->engines[n]) {
+			__free_engines(clone, n);
+			goto err_unlock;
+		}
+	}
+	clone->num_engines = n;
+
+	user_engines = i915_gem_context_user_engines(src);
+	i915_gem_context_unlock_engines(src);
+
+	free_engines(dst->engines);
+	RCU_INIT_POINTER(dst->engines, clone);
+	if (user_engines)
+		i915_gem_context_set_user_engines(dst);
+	else
+		i915_gem_context_clear_user_engines(dst);
+	return 0;
+
+err_unlock:
+	i915_gem_context_unlock_engines(src);
+	return -ENOMEM;
+}
+
+static int clone_flags(struct i915_gem_context *dst,
+		       struct i915_gem_context *src)
+{
+	dst->user_flags = src->user_flags;
+	return 0;
+}
+
+static int clone_schedattr(struct i915_gem_context *dst,
+			   struct i915_gem_context *src)
+{
+	dst->sched = src->sched;
+	return 0;
+}
+
+static int clone_sseu(struct i915_gem_context *dst,
+		      struct i915_gem_context *src)
+{
+	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
+	struct i915_gem_engines *clone;
+	unsigned long n;
+	int err;
+
+	clone = dst->engines; /* no locking required; sole access */
+	if (e->num_engines != clone->num_engines) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	for (n = 0; n < e->num_engines; n++) {
+		struct intel_context *ce = e->engines[n];
+
+		if (clone->engines[n]->engine->class != ce->engine->class) {
+			/* Must have compatible engine maps! */
+			err = -EINVAL;
+			goto unlock;
+		}
+
+		/* serialises with set_sseu */
+		err = intel_context_lock_pinned(ce);
+		if (err)
+			goto unlock;
+
+		clone->engines[n]->sseu = ce->sseu;
+		intel_context_unlock_pinned(ce);
+	}
+
+	err = 0;
+unlock:
+	i915_gem_context_unlock_engines(src);
+	return err;
+}
+
+static int clone_timeline(struct i915_gem_context *dst,
+			  struct i915_gem_context *src)
+{
+	if (src->timeline) {
+		GEM_BUG_ON(src->timeline == dst->timeline);
+
+		if (dst->timeline)
+			i915_timeline_put(dst->timeline);
+		dst->timeline = i915_timeline_get(src->timeline);
+	}
+
+	return 0;
+}
+
+static int clone_vm(struct i915_gem_context *dst,
+		    struct i915_gem_context *src)
+{
+	struct i915_hw_ppgtt *ppgtt;
+
+	rcu_read_lock();
+	do {
+		ppgtt = READ_ONCE(src->ppgtt);
+		if (!ppgtt)
+			break;
+
+		if (!kref_get_unless_zero(&ppgtt->ref))
+			continue;
+
+		/*
+		 * This ppgtt may have be reallocated between
+		 * the read and the kref, and reassigned to a third
+		 * context. In order to avoid inadvertent sharing
+		 * of this ppgtt with that third context (and not
+		 * src), we have to confirm that we have the same
+		 * ppgtt after passing through the strong memory
+		 * barrier implied by a successful
+		 * kref_get_unless_zero().
+		 *
+		 * Once we have acquired the current ppgtt of src,
+		 * we no longer care if it is released from src, as
+		 * it cannot be reallocated elsewhere.
+		 */
+
+		if (ppgtt == READ_ONCE(src->ppgtt))
+			break;
+
+		i915_ppgtt_put(ppgtt);
+	} while (1);
+	rcu_read_unlock();
+
+	if (ppgtt) {
+		__assign_ppgtt(dst, ppgtt);
+		i915_ppgtt_put(ppgtt);
+	}
+
+	return 0;
+}
+
+static int create_clone(struct i915_user_extension __user *ext, void *data)
+{
+	static int (* const fn[])(struct i915_gem_context *dst,
+				  struct i915_gem_context *src) = {
+#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
+		MAP(ENGINES, clone_engines),
+		MAP(FLAGS, clone_flags),
+		MAP(SCHEDATTR, clone_schedattr),
+		MAP(SSEU, clone_sseu),
+		MAP(TIMELINE, clone_timeline),
+		MAP(VM, clone_vm),
+#undef MAP
+	};
+	struct drm_i915_gem_context_create_ext_clone local;
+	const struct create_ext *arg = data;
+	struct i915_gem_context *dst = arg->ctx;
+	struct i915_gem_context *src;
+	int err, bit;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) !=
+		     I915_CONTEXT_CLONE_UNKNOWN);
+
+	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
+		return -EINVAL;
+
+	if (local.rsvd)
+		return -EINVAL;
+
+	rcu_read_lock();
+	src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id);
+	rcu_read_unlock();
+	if (!src)
+		return -ENOENT;
+
+	GEM_BUG_ON(src == dst);
+
+	for (bit = 0; bit < ARRAY_SIZE(fn); bit++) {
+		if (!(local.flags & BIT(bit)))
+			continue;
+
+		err = fn[bit](dst, src);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
 	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
 };
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7aef672ab3c7..7694113362d4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1623,6 +1623,21 @@ struct drm_i915_gem_context_create_ext_setparam {
 	struct drm_i915_gem_context_param param;
 };
 
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+	struct i915_user_extension base;
+	__u32 clone_id;
+	__u32 flags;
+#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
+#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
+#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
+#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
+#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
+#define I915_CONTEXT_CLONE_VM		(1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+	__u64 rsvd;
+};
+
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
 	__u32 pad;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 19/45] drm/i915: Load balancing across a virtual engine
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (16 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 18/45] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25 12:14   ` Tvrtko Ursulin
  2019-04-29 13:43   ` Tvrtko Ursulin
  2019-04-25  9:19 ` [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine Chris Wilson
                   ` (33 subsequent siblings)
  51 siblings, 2 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Having allowed the user to define a set of engines that they will want
to only use, we go one step further and allow them to bind those engines
into a single virtual instance. Submitting a batch to the virtual engine
will then forward it to any one of the set in a manner as best to
distribute load.  The virtual engine has a single timeline across all
engines (it operates as a single queue), so it is not able to concurrently
run batches across multiple engines by itself; that is left up to the user
to submit multiple concurrent batches to multiple queues. Multiple users
will be load balanced across the system.

The mechanism used for load balancing in this patch is a late greedy
balancer. When a request is ready for execution, it is added to each
engine's queue, and when an engine is ready for its next request it
claims it from the virtual engine. The first engine to do so, wins, i.e.
the request is executed at the earliest opportunity (idle moment) in the
system.

As not all HW is created equal, the user is still able to skip the
virtual engine and execute the batch on a specific engine, all within the
same queue. It will then be executed in order on the correct engine,
with execution on other virtual engines being moved away due to the load
detection.

A couple of areas for potential improvement left!

- The virtual engine always take priority over equal-priority tasks.
Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
and hopefully the virtual and real engines are not then congested (i.e.
all work is via virtual engines, or all work is to the real engine).

- We require the breadcrumb irq around every virtual engine request. For
normal engines, we eliminate the need for the slow round trip via
interrupt by using the submit fence and queueing in order. For virtual
engines, we have to allow any job to transfer to a new ring, and cannot
coalesce the submissions, so require the completion fence instead,
forcing the persistent use of interrupts.

- We only drip feed single requests through each virtual engine and onto
the physical engines, even if there was enough work to fill all ELSP,
leaving small stalls with an idle CS event at the end of every request.
Could we be greedy and fill both slots? Being lazy is virtuous for load
distribution on less-than-full workloads though.

Other areas of improvement are more general, such as reducing lock
contention, reducing dispatch overhead, looking at direct submission
rather than bouncing around tasklets etc.

sseu: Lift the restriction to allow sseu to be reconfigured on virtual
engines composed of RENDER_CLASS (rcs).

v2: macroize check_user_mbz()
v3: Cancel virtual engines on wedging
v4: Commence commenting
v5: Replace 64b sibling_mask with a list of class:instance

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c  |   6 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   8 +
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 656 ++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_lrc.h          |   9 +
 drivers/gpu/drm/i915/gt/selftest_lrc.c       | 180 +++++
 drivers/gpu/drm/i915/i915_gem.h              |   5 +
 drivers/gpu/drm/i915/i915_gem_context.c      | 118 +++-
 drivers/gpu/drm/i915/i915_scheduler.c        |  18 +-
 drivers/gpu/drm/i915/i915_timeline_types.h   |   1 +
 include/uapi/drm/i915_drm.h                  |  39 ++
 10 files changed, 1012 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 4283224249d4..35eef02b3f09 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -290,8 +290,12 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 				break;
 		}
 		list_add(&rq->signal_link, pos);
-		if (pos == &ce->signals) /* catch transitions from empty list */
+		if (pos == &ce->signals) { /* catch transitions from empty */
 			list_move_tail(&ce->signal_link, &b->signalers);
+		} else if (ce->engine != rq->engine) { /* virtualised */
+			list_move_tail(&ce->signal_link, &b->signalers);
+			intel_engine_queue_breadcrumbs(rq->engine);
+		}
 
 		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 		spin_unlock(&b->irq_lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9d64e33f8427..b07d3685e2cb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -227,6 +227,7 @@ struct intel_engine_execlists {
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root_cached queue;
+	struct rb_root_cached virtual;
 
 	/**
 	 * @csb_write: control register for Context Switch buffer
@@ -445,6 +446,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
+#define I915_ENGINE_IS_VIRTUAL       BIT(5)
 	unsigned int flags;
 
 	/*
@@ -534,6 +536,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 }
 
+static inline bool
+intel_engine_is_virtual(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_IS_VIRTUAL;
+}
+
 #define instdone_slice_mask(dev_priv__) \
 	(IS_GEN(dev_priv__, 7) ? \
 	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 2e74c792104e..e8faf3417f9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -136,6 +136,7 @@
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "i915_vgpu.h"
+#include "intel_engine_pm.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
@@ -165,6 +166,41 @@
 
 #define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
 
+struct virtual_engine {
+	struct intel_engine_cs base;
+	struct intel_context context;
+
+	/*
+	 * We allow only a single request through the virtual engine at a time
+	 * (each request in the timeline waits for the completion fence of
+	 * the previous before being submitted). By restricting ourselves to
+	 * only submitting a single request, each request is placed on to a
+	 * physical to maximise load spreading (by virtue of the late greedy
+	 * scheduling -- each real engine takes the next available request
+	 * upon idling).
+	 */
+	struct i915_request *request;
+
+	/*
+	 * We keep a rbtree of available virtual engines inside each physical
+	 * engine, sorted by priority. Here we preallocate the nodes we need
+	 * for the virtual engine, indexed by physical_engine->id.
+	 */
+	struct ve_node {
+		struct rb_node rb;
+		int prio;
+	} nodes[I915_NUM_ENGINES];
+
+	/* And finally, which physical engines this virtual engine maps onto. */
+	unsigned int num_siblings;
+	struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+	return container_of(engine, struct virtual_engine, base);
+}
+
 static int execlists_context_deferred_alloc(struct intel_context *ce,
 					    struct intel_engine_cs *engine);
 static void execlists_init_reg_state(u32 *reg_state,
@@ -228,7 +264,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
 }
 
 static inline bool need_preempt(const struct intel_engine_cs *engine,
-				const struct i915_request *rq)
+				const struct i915_request *rq,
+				struct rb_node *rb)
 {
 	int last_prio;
 
@@ -263,6 +300,25 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	    rq_prio(list_next_entry(rq, link)) > last_prio)
 		return true;
 
+	if (rb) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		bool preempt = false;
+
+		if (engine == ve->siblings[0]) { /* only preempt one sibling */
+			struct i915_request *next;
+
+			rcu_read_lock();
+			next = READ_ONCE(ve->request);
+			if (next)
+				preempt = rq_prio(next) > last_prio;
+			rcu_read_unlock();
+		}
+
+		if (preempt)
+			return preempt;
+	}
+
 	/*
 	 * If the inflight context did not trigger the preemption, then maybe
 	 * it was the set of queued requests? Pick the highest priority in
@@ -381,6 +437,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->timeline.requests,
 					 link) {
+		struct intel_engine_cs *owner;
+
 		if (i915_request_completed(rq))
 			break;
 
@@ -389,16 +447,32 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 
 		GEM_BUG_ON(rq->hw_context->active);
 
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != prio) {
-			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
-		}
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+		/*
+		 * Push the request back into the queue for later resubmission.
+		 * If this request is not native to this physical engine (i.e.
+		 * it came from a virtual source), push it back onto the virtual
+		 * engine so that it can be moved across onto another physical
+		 * engine as load dictates.
+		 */
+		owner = rq->hw_context->engine;
+		if (likely(owner == engine)) {
+			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+			if (rq_prio(rq) != prio) {
+				prio = rq_prio(rq);
+				pl = i915_sched_lookup_priolist(engine, prio);
+			}
+			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-		list_add(&rq->sched.link, pl);
+			list_add(&rq->sched.link, pl);
+			active = rq;
+		} else {
+			if (__i915_request_has_started(rq))
+				rq->sched.attr.priority |= ACTIVE_PRIORITY;
 
-		active = rq;
+			rq->engine = owner;
+			owner->submit_request(rq);
+			active = NULL;
+		}
 	}
 
 	/*
@@ -418,7 +492,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	 * in the priority queue, but they will not gain immediate access to
 	 * the GPU.
 	 */
-	if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
+	if ( ~prio & ACTIVE_PRIORITY &&
+	    active && __i915_request_has_started(active)) {
 		prio |= ACTIVE_PRIORITY;
 		active->sched.attr.priority = prio;
 		list_move_tail(&active->sched.link,
@@ -658,6 +733,72 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 						  execlists));
 }
 
+static void virtual_update_register_offsets(u32 *regs,
+					    struct intel_engine_cs *engine)
+{
+	u32 base = engine->mmio_base;
+
+	regs[CTX_CONTEXT_CONTROL] =
+		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
+	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+	regs[CTX_SECOND_BB_HEAD_U] =
+		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+	regs[CTX_CTX_TIMESTAMP] =
+		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
+	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
+	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
+	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
+	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
+	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
+	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
+	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
+
+	if (engine->class == RENDER_CLASS) {
+		regs[CTX_RCS_INDIRECT_CTX] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+		regs[CTX_BB_PER_CTX_PTR] =
+			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+		regs[CTX_R_PWR_CLK_STATE] =
+			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+	}
+}
+
+static bool virtual_matches(const struct virtual_engine *ve,
+			    const struct i915_request *rq,
+			    const struct intel_engine_cs *engine)
+{
+	const struct intel_engine_cs *active;
+
+	/*
+	 * We track when the HW has completed saving the context image
+	 * (i.e. when we have seen the final CS event switching out of
+	 * the context) and must not overwrite the context image before
+	 * then. This restricts us to only using the active engine
+	 * while the previous virtualized request is inflight (so
+	 * we reuse the register offsets). This is a very small
+	 * hystersis on the greedy seelction algorithm.
+	 */
+	active = READ_ONCE(ve->context.active);
+	if (active && active != engine)
+		return false;
+
+	return true;
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -690,6 +831,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
+	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+
+		if (!rq) { /* lazily cleanup after another engine handled rq */
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		if (!virtual_matches(ve, rq, engine)) {
+			rb = rb_next(rb);
+			continue;
+		}
+
+		break;
+	}
+
 	if (last) {
 		/*
 		 * Don't resubmit or switch until all outstanding
@@ -711,7 +872,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last)) {
+		if (need_preempt(engine, last, rb)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -751,6 +912,89 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		last->tail = last->wa_tail;
 	}
 
+	while (rb) { /* XXX virtual is always taking precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq;
+
+		spin_lock(&ve->base.timeline.lock);
+
+		rq = ve->request;
+		if (unlikely(!rq)) { /* lost the race to a sibling */
+			spin_unlock(&ve->base.timeline.lock);
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		GEM_BUG_ON(rq != ve->request);
+		GEM_BUG_ON(rq->engine != &ve->base);
+		GEM_BUG_ON(rq->hw_context != &ve->context);
+
+		if (rq_prio(rq) >= queue_prio(execlists)) {
+			if (!virtual_matches(ve, rq, engine)) {
+				spin_unlock(&ve->base.timeline.lock);
+				rb = rb_next(rb);
+				continue;
+			}
+
+			if (last && !can_merge_rq(last, rq)) {
+				spin_unlock(&ve->base.timeline.lock);
+				return; /* leave this rq for another engine */
+			}
+
+			GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
+				  engine->name,
+				  rq->fence.context,
+				  rq->fence.seqno,
+				  i915_request_completed(rq) ? "!" :
+				  i915_request_started(rq) ? "*" :
+				  "",
+				  yesno(engine != ve->siblings[0]));
+
+			ve->request = NULL;
+			ve->base.execlists.queue_priority_hint = INT_MIN;
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+
+			rq->engine = engine;
+
+			if (engine != ve->siblings[0]) {
+				u32 *regs = ve->context.lrc_reg_state;
+				unsigned int n;
+
+				GEM_BUG_ON(READ_ONCE(ve->context.active));
+				virtual_update_register_offsets(regs, engine);
+
+				/*
+				 * Move the bound engine to the top of the list
+				 * for future execution. We then kick this
+				 * tasklet first before checking others, so that
+				 * we preferentially reuse this set of bound
+				 * registers.
+				 */
+				for (n = 1; n < ve->num_siblings; n++) {
+					if (ve->siblings[n] == engine) {
+						swap(ve->siblings[n],
+						     ve->siblings[0]);
+						break;
+					}
+				}
+
+				GEM_BUG_ON(ve->siblings[0] != engine);
+			}
+
+			__i915_request_submit(rq);
+			trace_i915_request_in(rq, port_index(port, execlists));
+			submit = true;
+			last = rq;
+		}
+
+		spin_unlock(&ve->base.timeline.lock);
+		break;
+	}
+
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -1874,6 +2118,25 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
 			       &execlists->csb_status[reset_value]);
 }
 
+static struct i915_request *active_request(struct i915_request *rq)
+{
+	const struct list_head * const list = &rq->engine->timeline.requests;
+	const struct intel_context * const context = rq->hw_context;
+	struct i915_request *active = NULL;
+
+	list_for_each_entry_from_reverse(rq, list, link) {
+		if (i915_request_completed(rq))
+			break;
+
+		if (rq->hw_context != context)
+			break;
+
+		active = rq;
+	}
+
+	return active;
+}
+
 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1894,7 +2157,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	if (!port_isset(execlists->port))
 		goto out_clear;
 
-	ce = port_request(execlists->port)->hw_context;
+	rq = port_request(execlists->port);
+	ce = rq->hw_context;
 
 	/*
 	 * Catch up with any missed context-switch interrupts.
@@ -1907,16 +2171,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 */
 	execlists_cancel_port_requests(execlists);
 
-	/* Push back any incomplete requests for replay after the reset. */
-	rq = __unwind_incomplete_requests(engine);
+	rq = active_request(rq);
 	if (!rq)
 		goto out_replay;
 
-	if (rq->hw_context != ce) { /* caught just before a CS event */
-		rq = NULL;
-		goto out_replay;
-	}
-
 	/*
 	 * If this request hasn't started yet, e.g. it is waiting on a
 	 * semaphore, we need to avoid skipping the request or else we
@@ -1963,13 +2221,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	}
 	execlists_init_reg_state(regs, ce, engine, ce->ring);
 
-	/* Rerun the request; its payload has been neutered (if guilty). */
 out_replay:
+	/* Rerun the request; its payload has been neutered (if guilty). */
 	ce->ring->head =
 		rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
 	intel_ring_update_space(ce->ring);
 	__execlists_update_reg_state(ce, engine);
 
+	/* Push back any incomplete requests for replay after the reset. */
+	__unwind_incomplete_requests(engine);
+
 out_clear:
 	execlists_clear_all_active(execlists);
 }
@@ -2043,6 +2304,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		i915_priolist_free(p);
 	}
 
+	/* Cancel all attached virtual engines */
+	while ((rb = rb_first_cached(&execlists->virtual))) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+
+		rb_erase_cached(rb, &execlists->virtual);
+		RB_CLEAR_NODE(rb);
+
+		spin_lock(&ve->base.timeline.lock);
+		if (ve->request) {
+			ve->request->engine = engine;
+			__i915_request_submit(ve->request);
+			dma_fence_set_error(&ve->request->fence, -EIO);
+			i915_request_mark_complete(ve->request);
+			ve->base.execlists.queue_priority_hint = INT_MIN;
+			ve->request = NULL;
+		}
+		spin_unlock(&ve->base.timeline.lock);
+	}
+
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
 	execlists->queue_priority_hint = INT_MIN;
@@ -2760,6 +3041,318 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
 	return ret;
 }
 
+static void virtual_context_destroy(struct kref *kref)
+{
+	struct virtual_engine *ve =
+		container_of(kref, typeof(*ve), context.ref);
+	unsigned int n;
+
+	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(ve->context.active);
+
+	for (n = 0; n < ve->num_siblings; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct rb_node *node = &ve->nodes[sibling->id].rb;
+
+		if (RB_EMPTY_NODE(node))
+			continue;
+
+		spin_lock_irq(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(node))
+			rb_erase_cached(node, &sibling->execlists.virtual);
+
+		spin_unlock_irq(&sibling->timeline.lock);
+	}
+	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	if (ve->context.state)
+		__execlists_context_fini(&ve->context);
+
+	i915_timeline_fini(&ve->base.timeline);
+	kfree(ve);
+}
+
+static void virtual_engine_initial_hint(struct virtual_engine *ve)
+{
+	int swp;
+
+	/*
+	 * Pick a random sibling on starting to help spread the load around.
+	 *
+	 * New contexts are typically created with exactly the same order
+	 * of siblings, and often started in batches. Due to the way we iterate
+	 * the array of sibling when submitting requests, sibling[0] is
+	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
+	 * randomised across the system, we also help spread the load by the
+	 * first engine we inspect being different each time.
+	 *
+	 * NB This does not force us to execute on this engine, it will just
+	 * typically be the first we inspect for submission.
+	 */
+	swp = prandom_u32_max(ve->num_siblings);
+	if (!swp)
+		return;
+
+	swap(ve->siblings[swp], ve->siblings[0]);
+	virtual_update_register_offsets(ve->context.lrc_reg_state,
+					ve->siblings[0]);
+}
+
+static int virtual_context_pin(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	int err;
+
+	/* Note: we must use a real engine class for setting up reg state */
+	err = __execlists_context_pin(ce, ve->siblings[0]);
+	if (err)
+		return err;
+
+	virtual_engine_initial_hint(ve);
+	return 0;
+}
+
+static void virtual_context_enter(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	unsigned int n;
+
+	for (n = 0; n < ve->num_siblings; n++)
+		intel_engine_pm_get(ve->siblings[n]);
+}
+
+static void virtual_context_exit(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	unsigned int n;
+
+	for (n = 0; n < ve->num_siblings; n++)
+		intel_engine_pm_put(ve->siblings[n]);
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+	.pin = virtual_context_pin,
+	.unpin = execlists_context_unpin,
+
+	.enter = virtual_context_enter,
+	.exit = virtual_context_exit,
+
+	.destroy = virtual_context_destroy,
+};
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+	struct virtual_engine * const ve = (struct virtual_engine *)data;
+	const int prio = ve->base.execlists.queue_priority_hint;
+	unsigned int n;
+
+	local_irq_disable();
+	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct ve_node * const node = &ve->nodes[sibling->id];
+		struct rb_node **parent, *rb;
+		bool first;
+
+		spin_lock(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(&node->rb)) {
+			/*
+			 * Cheat and avoid rebalancing the tree if we can
+			 * reuse this node in situ.
+			 */
+			first = rb_first_cached(&sibling->execlists.virtual) ==
+				&node->rb;
+			if (prio == node->prio || (prio > node->prio && first))
+				goto submit_engine;
+
+			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
+		}
+
+		rb = NULL;
+		first = true;
+		parent = &sibling->execlists.virtual.rb_root.rb_node;
+		while (*parent) {
+			struct ve_node *other;
+
+			rb = *parent;
+			other = rb_entry(rb, typeof(*other), rb);
+			if (prio > other->prio) {
+				parent = &rb->rb_left;
+			} else {
+				parent = &rb->rb_right;
+				first = false;
+			}
+		}
+
+		rb_link_node(&node->rb, rb, parent);
+		rb_insert_color_cached(&node->rb,
+				       &sibling->execlists.virtual,
+				       first);
+
+submit_engine:
+		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
+		node->prio = prio;
+		if (first && prio > sibling->execlists.queue_priority_hint) {
+			sibling->execlists.queue_priority_hint = prio;
+			tasklet_hi_schedule(&sibling->execlists.tasklet);
+		}
+
+		spin_unlock(&sibling->timeline.lock);
+	}
+	local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *rq)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->engine);
+
+	GEM_TRACE("%s: rq=%llx:%lld\n",
+		  ve->base.name,
+		  rq->fence.context,
+		  rq->fence.seqno);
+
+	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
+
+	GEM_BUG_ON(ve->request);
+	ve->base.execlists.queue_priority_hint = rq_prio(rq);
+	WRITE_ONCE(ve->request, rq);
+
+	tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
+struct intel_context *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count)
+{
+	struct virtual_engine *ve;
+	unsigned int n;
+	int err;
+
+	if (count == 0)
+		return ERR_PTR(-EINVAL);
+
+	if (count == 1)
+		return intel_context_create(ctx, siblings[0]);
+
+	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
+	if (!ve)
+		return ERR_PTR(-ENOMEM);
+
+	ve->base.i915 = ctx->i915;
+	ve->base.id = -1;
+	ve->base.class = OTHER_CLASS;
+	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
+	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+
+	err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
+	if (err)
+		goto err_put;
+	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
+
+	intel_engine_init_execlists(&ve->base);
+
+	ve->base.cops = &virtual_context_ops;
+	ve->base.request_alloc = execlists_request_alloc;
+
+	ve->base.schedule = i915_schedule;
+	ve->base.submit_request = virtual_submit_request;
+
+	ve->base.execlists.queue_priority_hint = INT_MIN;
+	tasklet_init(&ve->base.execlists.tasklet,
+		     virtual_submission_tasklet,
+		     (unsigned long)ve);
+
+	intel_context_init(&ve->context, ctx, &ve->base);
+
+	for (n = 0; n < count; n++) {
+		struct intel_engine_cs *sibling = siblings[n];
+
+		GEM_BUG_ON(!is_power_of_2(sibling->mask));
+		if (sibling->mask & ve->base.mask) {
+			DRM_DEBUG("duplicate %s entry in load balancer\n",
+				  sibling->name);
+			err = -EINVAL;
+			goto err_put;
+		}
+
+		/*
+		 * The virtual engine implementation is tightly coupled to
+		 * the execlists backend -- we push out request directly
+		 * into a tree inside each physical engine. We could support
+		 * layering if we handle cloning of the requests and
+		 * submitting a copy into each backend.
+		 */
+		if (sibling->execlists.tasklet.func !=
+		    execlists_submission_tasklet) {
+			err = -ENODEV;
+			goto err_put;
+		}
+
+		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
+		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
+
+		ve->siblings[ve->num_siblings++] = sibling;
+		ve->base.mask |= sibling->mask;
+
+		/*
+		 * All physical engines must be compatible for their emission
+		 * functions (as we build the instructions during request
+		 * construction and do not alter them before submission
+		 * on the physical engine). We use the engine class as a guide
+		 * here, although that could be refined.
+		 */
+		if (ve->base.class != OTHER_CLASS) {
+			if (ve->base.class != sibling->class) {
+				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
+					  sibling->class, ve->base.class);
+				err = -EINVAL;
+				goto err_put;
+			}
+			continue;
+		}
+
+		ve->base.class = sibling->class;
+		ve->base.uabi_class = sibling->uabi_class;
+		snprintf(ve->base.name, sizeof(ve->base.name),
+			 "v%dx%d", ve->base.class, count);
+		ve->base.context_size = sibling->context_size;
+
+		ve->base.emit_bb_start = sibling->emit_bb_start;
+		ve->base.emit_flush = sibling->emit_flush;
+		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
+		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
+		ve->base.emit_fini_breadcrumb_dw =
+			sibling->emit_fini_breadcrumb_dw;
+	}
+
+	return &ve->context;
+
+err_put:
+	intel_context_put(&ve->context);
+	return ERR_PTR(err);
+}
+
+struct intel_context *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src)
+{
+	struct virtual_engine *se = to_virtual_engine(src);
+	struct intel_context *dst;
+
+	dst = intel_execlists_create_virtual(ctx,
+					     se->siblings,
+					     se->num_siblings);
+	if (IS_ERR(dst))
+		return dst;
+
+	return dst;
+}
+
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
@@ -2817,6 +3410,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 		show_request(m, last, "\t\tQ ");
 	}
 
+	last = NULL;
+	count = 0;
+	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+
+		if (rq) {
+			if (count++ < max - 1)
+				show_request(m, rq, "\t\tV ");
+			else
+				last = rq;
+		}
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d virtual requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tV ");
+	}
+
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index a0dc907a7249..5530606052e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -114,4 +114,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 							const char *prefix),
 				   unsigned int max);
 
+struct intel_context *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count);
+
+struct intel_context *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src);
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 6f223f7facde..558399e5c7bb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1301,6 +1301,185 @@ static int live_preempt_smoke(void *arg)
 	return err;
 }
 
+static int nop_virtual_engine(struct drm_i915_private *i915,
+			      struct intel_engine_cs **siblings,
+			      unsigned int nsibling,
+			      unsigned int nctx,
+			      unsigned int flags)
+#define CHAIN BIT(0)
+{
+	IGT_TIMEOUT(end_time);
+	struct i915_request *request[16];
+	struct i915_gem_context *ctx[16];
+	struct intel_context *ve[16];
+	unsigned long n, prime, nc;
+	struct igt_live_test t;
+	ktime_t times[2] = {};
+	int err;
+
+	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = kernel_context(i915);
+		if (!ctx[n]) {
+			err = -ENOMEM;
+			nctx = n;
+			goto out;
+		}
+
+		ve[n] = intel_execlists_create_virtual(ctx[n],
+						       siblings, nsibling);
+		if (IS_ERR(ve[n])) {
+			kernel_context_close(ctx[n]);
+			err = PTR_ERR(ve[n]);
+			nctx = n;
+			goto out;
+		}
+
+		err = intel_context_pin(ve[n]);
+		if (err) {
+			intel_context_put(ve[n]);
+			kernel_context_close(ctx[n]);
+			nctx = n;
+			goto out;
+		}
+	}
+
+	err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+	if (err)
+		goto out;
+
+	for_each_prime_number_from(prime, 1, 8192) {
+		times[1] = ktime_get_raw();
+
+		if (flags & CHAIN) {
+			for (nc = 0; nc < nctx; nc++) {
+				for (n = 0; n < prime; n++) {
+					request[nc] =
+						i915_request_create(ve[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		} else {
+			for (n = 0; n < prime; n++) {
+				for (nc = 0; nc < nctx; nc++) {
+					request[nc] =
+						i915_request_create(ve[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		}
+
+		for (nc = 0; nc < nctx; nc++) {
+			if (i915_request_wait(request[nc],
+					      I915_WAIT_LOCKED,
+					      HZ / 10) < 0) {
+				pr_err("%s(%s): wait for %llx:%lld timed out\n",
+				       __func__, ve[0]->engine->name,
+				       request[nc]->fence.context,
+				       request[nc]->fence.seqno);
+
+				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+					  __func__, ve[0]->engine->name,
+					  request[nc]->fence.context,
+					  request[nc]->fence.seqno);
+				GEM_TRACE_DUMP();
+				i915_gem_set_wedged(i915);
+				break;
+			}
+		}
+
+		times[1] = ktime_sub(ktime_get_raw(), times[1]);
+		if (prime == 1)
+			times[0] = times[1];
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	err = igt_live_test_end(&t);
+	if (err)
+		goto out;
+
+	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
+		prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (nc = 0; nc < nctx; nc++) {
+		intel_context_unpin(ve[nc]);
+		intel_context_put(ve[nc]);
+		kernel_context_close(ctx[nc]);
+	}
+	return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int class, inst;
+	int err = -ENODEV;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+		if (err) {
+			pr_err("Failed to wrap engine %s: err=%d\n",
+			       engine->name, err);
+			goto out_unlock;
+		}
+	}
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, n;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (n = 1; n <= nsibling + 1; n++) {
+			err = nop_virtual_engine(i915, siblings, nsibling,
+						 n, 0);
+			if (err)
+				goto out_unlock;
+		}
+
+		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1313,6 +1492,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_chain_preempt),
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
+		SUBTEST(live_virtual_engine),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 67f8a4a807a0..fe82d3571072 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -91,4 +91,9 @@ static inline bool __tasklet_enable(struct tasklet_struct *t)
 	return atomic_dec_and_test(&t->count);
 }
 
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+	return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ba7582d955d1..57b09f624bb4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,6 +86,7 @@
  */
 
 #include <linux/log2.h>
+#include <linux/nospec.h>
 
 #include <drm/i915_drm.h>
 
@@ -1209,7 +1210,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
 	int ret;
 
 	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
-	GEM_BUG_ON(ce->engine->id != RCS0);
 
 	ret = intel_context_lock_pinned(ce);
 	if (ret)
@@ -1397,7 +1397,102 @@ struct set_engines {
 	struct i915_gem_engines *engines;
 };
 
+static int
+set_engines__load_balance(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_load_balance __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *stack[16];
+	struct intel_engine_cs **siblings;
+	struct intel_context *ce;
+	u16 num_siblings, idx;
+	unsigned int n;
+	int err;
+
+	if (!HAS_EXECLISTS(set->ctx->i915))
+		return -ENODEV;
+
+	if (USES_GUC_SUBMISSION(set->ctx->i915))
+		return -ENODEV; /* not implement yet */
+
+	if (get_user(idx, &ext->engine_index))
+		return -EFAULT;
+
+	if (idx >= set->engines->num_engines) {
+		DRM_DEBUG("Invalid placement value, %d >= %d\n",
+			  idx, set->engines->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->engines->num_engines);
+	if (set->engines->engines[idx]) {
+		DRM_DEBUG("Invalid placement[%d], already occupied\n", idx);
+		return -EEXIST;
+	}
+
+	if (get_user(num_siblings, &ext->num_siblings))
+		return -EFAULT;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	siblings = stack;
+	if (num_siblings > ARRAY_SIZE(stack)) {
+		siblings = kmalloc_array(num_siblings,
+					 sizeof(*siblings),
+					 GFP_KERNEL);
+		if (!siblings)
+			return -ENOMEM;
+	}
+
+	for (n = 0; n < num_siblings; n++) {
+		struct i915_engine_class_instance ci;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+			err = -EFAULT;
+			goto out_siblings;
+		}
+
+		siblings[n] = intel_engine_lookup_user(set->ctx->i915,
+						       ci.engine_class,
+						       ci.engine_instance);
+		if (!siblings[n]) {
+			DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			err = -EINVAL;
+			goto out_siblings;
+		}
+	}
+
+	ce = intel_execlists_create_virtual(set->ctx, siblings, n);
+	if (IS_ERR(ce)) {
+		err = PTR_ERR(ce);
+		goto out_siblings;
+	}
+
+	if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
+		intel_context_put(ce);
+		err = -EEXIST;
+		goto out_siblings;
+	}
+
+out_siblings:
+	if (siblings != stack)
+		kfree(siblings);
+
+	return err;
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
+	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
 };
 
 static int
@@ -1696,14 +1791,29 @@ static int clone_engines(struct i915_gem_context *dst,
 
 	clone->i915 = dst->i915;
 	for (n = 0; n < e->num_engines; n++) {
+		struct intel_engine_cs *engine;
+
 		if (!e->engines[n]) {
 			clone->engines[n] = NULL;
 			continue;
 		}
+		engine = e->engines[n]->engine;
 
-		clone->engines[n] =
-			intel_context_create(dst, e->engines[n]->engine);
-		if (!clone->engines[n]) {
+		/*
+		 * Virtual engines are singletons; they can only exist
+		 * inside a single context, because they embed their
+		 * HW context... As each virtual context implies a single
+		 * timeline (each engine can only dequeue a single request
+		 * at any time), it would be surprising for two contexts
+		 * to use the same engine. So let's create a copy of
+		 * the virtual engine instead.
+		 */
+		if (intel_engine_is_virtual(engine))
+			clone->engines[n] =
+				intel_execlists_clone_virtual(dst, engine);
+		else
+			clone->engines[n] = intel_context_create(dst, engine);
+		if (IS_ERR_OR_NULL(clone->engines[n])) {
 			__free_engines(clone, n);
 			goto err_unlock;
 		}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 39bc4f54e272..b58d9c23a876 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -248,17 +248,26 @@ sched_lock_engine(const struct i915_sched_node *node,
 		  struct intel_engine_cs *locked,
 		  struct sched_cache *cache)
 {
-	struct intel_engine_cs *engine = node_to_request(node)->engine;
+	const struct i915_request *rq = node_to_request(node);
+	struct intel_engine_cs *engine;
 
 	GEM_BUG_ON(!locked);
 
-	if (engine != locked) {
+	/*
+	 * Virtual engines complicate acquiring the engine timeline lock,
+	 * as their rq->engine pointer is not stable until under that
+	 * engine lock. The simple ploy we use is to take the lock then
+	 * check that the rq still belongs to the newly locked engine.
+	 */
+	while (locked != (engine = READ_ONCE(rq->engine))) {
 		spin_unlock(&locked->timeline.lock);
 		memset(cache, 0, sizeof(*cache));
 		spin_lock(&engine->timeline.lock);
+		locked = engine;
 	}
 
-	return engine;
+	GEM_BUG_ON(locked != engine);
+	return locked;
 }
 
 static bool inflight(const struct i915_request *rq,
@@ -371,8 +380,11 @@ static void __i915_schedule(struct i915_request *rq,
 		if (prio <= node->attr.priority || node_signaled(node))
 			continue;
 
+		GEM_BUG_ON(node_to_request(node)->engine != engine);
+
 		node->attr.priority = prio;
 		if (!list_empty(&node->link)) {
+			GEM_BUG_ON(intel_engine_is_virtual(engine));
 			if (!cache.priolist)
 				cache.priolist =
 					i915_sched_lookup_priolist(engine,
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
index 5256a0b5c5f7..1688705f4a2b 100644
--- a/drivers/gpu/drm/i915/i915_timeline_types.h
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -26,6 +26,7 @@ struct i915_timeline {
 	spinlock_t lock;
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
+#define TIMELINE_VIRTUAL 2
 	struct mutex mutex; /* protects the flow of requests */
 
 	unsigned int pin_count;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7694113362d4..ff2ababc0984 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -137,6 +137,7 @@ struct i915_engine_class_instance {
 	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
 	__u16 engine_instance;
 #define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
 };
 
 /**
@@ -1607,8 +1608,46 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+	struct i915_user_extension base;
+
+	__u16 engine_index;
+	__u16 num_siblings;
+	__u32 flags; /* all undefined flags must be zero */
+
+	__u64 mbz64[1]; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
+	struct i915_user_extension base; \
+	__u16 engine_index; \
+	__u16 num_siblings; \
+	__u32 flags; \
+	__u64 mbz64[1]; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
 struct i915_context_param_engines {
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
 	struct i915_engine_class_instance engines[0];
 } __attribute__((packed));
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (17 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 19/45] drm/i915: Load balancing across a virtual engine Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-29 14:12   ` Tvrtko Ursulin
  2019-04-25  9:19 ` [PATCH 21/45] drm/i915: Extend execution fence to support a callback Chris Wilson
                   ` (32 subsequent siblings)
  51 siblings, 1 reply; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Allow the user to direct which physical engines of the virtual engine
they wish to execute one, as sometimes it is necessary to override the
load balancing algorithm.

v2: Only kick the virtual engines on context-out if required

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c    |  67 +++++++++++++
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_request.c    |   1 +
 drivers/gpu/drm/i915/i915_request.h    |   3 +
 4 files changed, 202 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e8faf3417f9c..8acf9d1c6f2f 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -549,6 +549,15 @@ execlists_context_schedule_in(struct i915_request *rq)
 	rq->hw_context->active = rq->engine;
 }
 
+static void kick_siblings(struct i915_request *rq)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+	struct i915_request *next = READ_ONCE(ve->request);
+
+	if (next && next->execution_mask & ~rq->execution_mask)
+		tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
 static inline void
 execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 {
@@ -556,6 +565,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 	intel_engine_context_out(rq->engine);
 	execlists_context_status_change(rq, status);
 	trace_i915_request_out(rq);
+
+	/*
+	 * If this is part of a virtual engine, its next request may have
+	 * been blocked waiting for access to the active context. We have
+	 * to kick all the siblings again in case we need to switch (e.g.
+	 * the next request is not runnable on this engine). Hopefully,
+	 * we will already have submitted the next request before the
+	 * tasklet runs and do not need to rebuild each virtual tree
+	 * and kick everyone again.
+	 */
+	if (rq->engine != rq->hw_context->engine)
+		kick_siblings(rq);
 }
 
 static u64 execlists_update_context(struct i915_request *rq)
@@ -783,6 +804,9 @@ static bool virtual_matches(const struct virtual_engine *ve,
 {
 	const struct intel_engine_cs *active;
 
+	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
+		return false;
+
 	/*
 	 * We track when the HW has completed saving the context image
 	 * (i.e. when we have seen the final CS event switching out of
@@ -3141,12 +3165,44 @@ static const struct intel_context_ops virtual_context_ops = {
 	.destroy = virtual_context_destroy,
 };
 
+static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
+{
+	struct i915_request *rq;
+	intel_engine_mask_t mask;
+
+	rq = READ_ONCE(ve->request);
+	if (!rq)
+		return 0;
+
+	/* The rq is ready for submission; rq->execution_mask is now stable. */
+	mask = rq->execution_mask;
+	if (unlikely(!mask)) {
+		/* Invalid selection, submit to a random engine in error */
+		i915_request_skip(rq, -ENODEV);
+		mask = ve->siblings[0]->mask;
+	}
+
+	GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
+		  ve->base.name,
+		  rq->fence.context, rq->fence.seqno,
+		  mask, ve->base.execlists.queue_priority_hint);
+
+	return mask;
+}
+
 static void virtual_submission_tasklet(unsigned long data)
 {
 	struct virtual_engine * const ve = (struct virtual_engine *)data;
 	const int prio = ve->base.execlists.queue_priority_hint;
+	intel_engine_mask_t mask;
 	unsigned int n;
 
+	rcu_read_lock();
+	mask = virtual_submission_mask(ve);
+	rcu_read_unlock();
+	if (unlikely(!mask))
+		return;
+
 	local_irq_disable();
 	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
@@ -3154,6 +3210,17 @@ static void virtual_submission_tasklet(unsigned long data)
 		struct rb_node **parent, *rb;
 		bool first;
 
+		if (unlikely(!(mask & sibling->mask))) {
+			if (!RB_EMPTY_NODE(&node->rb)) {
+				spin_lock(&sibling->timeline.lock);
+				rb_erase_cached(&node->rb,
+						&sibling->execlists.virtual);
+				RB_CLEAR_NODE(&node->rb);
+				spin_unlock(&sibling->timeline.lock);
+			}
+			continue;
+		}
+
 		spin_lock(&sibling->timeline.lock);
 
 		if (!RB_EMPTY_NODE(&node->rb)) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 558399e5c7bb..73308749936c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1480,6 +1480,136 @@ static int live_virtual_engine(void *arg)
 	return err;
 }
 
+static int mask_virtual_engine(struct drm_i915_private *i915,
+			       struct intel_engine_cs **siblings,
+			       unsigned int nsibling)
+{
+	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
+	struct i915_gem_context *ctx;
+	struct intel_context *ve;
+	struct igt_live_test t;
+	unsigned int n;
+	int err;
+
+	/*
+	 * Check that by setting the execution mask on a request, we can
+	 * restrict it to our desired engine within the virtual engine.
+	 */
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		return -ENOMEM;
+
+	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+	if (IS_ERR(ve)) {
+		err = PTR_ERR(ve);
+		goto out_close;
+	}
+
+	err = intel_context_pin(ve);
+	if (err)
+		goto out_put;
+
+	err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+	if (err)
+		goto out_unpin;
+
+	for (n = 0; n < nsibling; n++) {
+		request[n] = i915_request_create(ve);
+		if (IS_ERR(request)) {
+			err = PTR_ERR(request);
+			nsibling = n;
+			goto out;
+		}
+
+		/* Reverse order as it's more likely to be unnatural */
+		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
+
+		i915_request_get(request[n]);
+		i915_request_add(request[n]);
+	}
+
+	for (n = 0; n < nsibling; n++) {
+		if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) {
+			pr_err("%s(%s): wait for %llx:%lld timed out\n",
+			       __func__, ve->engine->name,
+			       request[n]->fence.context,
+			       request[n]->fence.seqno);
+
+			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+				  __func__, ve->engine->name,
+				  request[n]->fence.context,
+				  request[n]->fence.seqno);
+			GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto out;
+		}
+
+		if (request[n]->engine != siblings[nsibling - n - 1]) {
+			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
+			       request[n]->engine->name,
+			       siblings[nsibling - n - 1]->name);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	err = igt_live_test_end(&t);
+	if (err)
+		goto out;
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (n = 0; n < nsibling; n++)
+		i915_request_put(request[n]);
+
+out_unpin:
+	intel_context_unpin(ve);
+out_put:
+	intel_context_put(ve);
+out_close:
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_virtual_mask(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+	int err = 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		unsigned int nsibling;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		err = mask_virtual_engine(i915, siblings, nsibling);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1493,6 +1623,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
+		SUBTEST(live_virtual_mask),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a5a2ccc23958..06c2b89d0b0a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -688,6 +688,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->waitboost = false;
+	rq->execution_mask = ALL_ENGINES;
 
 	INIT_LIST_HEAD(&rq->active_list);
 	INIT_LIST_HEAD(&rq->execute_cb);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8025a89b5999..d7f9b2194568 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -28,6 +28,8 @@
 #include <linux/dma-fence.h>
 #include <linux/lockdep.h>
 
+#include "gt/intel_engine_types.h"
+
 #include "i915_gem.h"
 #include "i915_scheduler.h"
 #include "i915_selftest.h"
@@ -156,6 +158,7 @@ struct i915_request {
 	 */
 	struct i915_sched_node sched;
 	struct i915_dependency dep;
+	intel_engine_mask_t execution_mask;
 
 	/*
 	 * A convenience pointer to the current breadcrumb value stored in
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 21/45] drm/i915: Extend execution fence to support a callback
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (18 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 22/45] drm/i915/execlists: Virtual engine bonding Chris Wilson
                   ` (31 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want to configure the slave request
depending on which physical engine the master request is executed on.
For this, we introduce a callback from the execute fence to convey this
information.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c | 84 +++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_request.h |  4 ++
 2 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 06c2b89d0b0a..55357b4a5fff 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -38,6 +38,8 @@ struct execute_cb {
 	struct list_head link;
 	struct irq_work work;
 	struct i915_sw_fence *fence;
+	void (*hook)(struct i915_request *rq, struct dma_fence *signal);
+	struct i915_request *signal;
 };
 
 static struct i915_global_request {
@@ -329,6 +331,17 @@ static void irq_execute_cb(struct irq_work *wrk)
 	kmem_cache_free(global.slab_execute_cbs, cb);
 }
 
+static void irq_execute_cb_hook(struct irq_work *wrk)
+{
+	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+	cb->hook(container_of(cb->fence, struct i915_request, submit),
+		 &cb->signal->fence);
+	i915_request_put(cb->signal);
+
+	irq_execute_cb(wrk);
+}
+
 static void __notify_execute_cb(struct i915_request *rq)
 {
 	struct execute_cb *cb;
@@ -355,14 +368,19 @@ static void __notify_execute_cb(struct i915_request *rq)
 }
 
 static int
-i915_request_await_execution(struct i915_request *rq,
-			     struct i915_request *signal,
-			     gfp_t gfp)
+__i915_request_await_execution(struct i915_request *rq,
+			       struct i915_request *signal,
+			       void (*hook)(struct i915_request *rq,
+					    struct dma_fence *signal),
+			       gfp_t gfp)
 {
 	struct execute_cb *cb;
 
-	if (i915_request_is_active(signal))
+	if (i915_request_is_active(signal)) {
+		if (hook)
+			hook(rq, &signal->fence);
 		return 0;
+	}
 
 	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
 	if (!cb)
@@ -372,8 +390,18 @@ i915_request_await_execution(struct i915_request *rq,
 	i915_sw_fence_await(cb->fence);
 	init_irq_work(&cb->work, irq_execute_cb);
 
+	if (hook) {
+		cb->hook = hook;
+		cb->signal = i915_request_get(signal);
+		cb->work.func = irq_execute_cb_hook;
+	}
+
 	spin_lock_irq(&signal->lock);
 	if (i915_request_is_active(signal)) {
+		if (hook) {
+			hook(rq, &signal->fence);
+			i915_request_put(signal);
+		}
 		i915_sw_fence_complete(cb->fence);
 		kmem_cache_free(global.slab_execute_cbs, cb);
 	} else {
@@ -802,7 +830,7 @@ emit_semaphore_wait(struct i915_request *to,
 		return err;
 
 	/* Only submit our spinner after the signaler is running! */
-	err = i915_request_await_execution(to, from, gfp);
+	err = __i915_request_await_execution(to, from, NULL, gfp);
 	if (err)
 		return err;
 
@@ -923,6 +951,52 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 	return 0;
 }
 
+int
+i915_request_await_execution(struct i915_request *rq,
+			     struct dma_fence *fence,
+			     void (*hook)(struct i915_request *rq,
+					  struct dma_fence *signal))
+{
+	struct dma_fence **child = &fence;
+	unsigned int nchild = 1;
+	int ret;
+
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+
+		/* XXX Error for signal-on-any fence arrays */
+
+		child = array->fences;
+		nchild = array->num_fences;
+		GEM_BUG_ON(!nchild);
+	}
+
+	do {
+		fence = *child++;
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			continue;
+
+		/*
+		 * We don't squash repeated fence dependencies here as we
+		 * want to run our callback in all cases.
+		 */
+
+		if (dma_fence_is_i915(fence))
+			ret = __i915_request_await_execution(rq,
+							     to_request(fence),
+							     hook,
+							     I915_FENCE_GFP);
+		else
+			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
+							    I915_FENCE_TIMEOUT,
+							    GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	} while (--nchild);
+
+	return 0;
+}
+
 /**
  * i915_request_await_object - set this request to (async) wait upon a bo
  * @to: request we are wishing to use
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index d7f9b2194568..c9f7d07991c8 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -283,6 +283,10 @@ int i915_request_await_object(struct i915_request *to,
 			      bool write);
 int i915_request_await_dma_fence(struct i915_request *rq,
 				 struct dma_fence *fence);
+int i915_request_await_execution(struct i915_request *rq,
+				 struct dma_fence *fence,
+				 void (*hook)(struct i915_request *rq,
+					      struct dma_fence *signal));
 
 void i915_request_add(struct i915_request *rq);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 22/45] drm/i915/execlists: Virtual engine bonding
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (19 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 21/45] drm/i915: Extend execution fence to support a callback Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-29 15:58   ` Tvrtko Ursulin
  2019-04-25  9:19 ` [PATCH 23/45] drm/i915: Allow specification of parallel execbuf Chris Wilson
                   ` (30 subsequent siblings)
  51 siblings, 1 reply; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Some users require that when a master batch is executed on one particular
engine, a companion batch is run simultaneously on a specific slave
engine. For this purpose, we introduce virtual engine bonding, allowing
maps of master:slaves to be constructed to constrain which physical
engines a virtual engine may select given a fence on a master engine.

For the moment, we continue to ignore the issue of preemption deferring
the master request for later. Ideally, we would like to then also remove
the slave and run something else rather than have it stall the pipeline.
With load balancing, we should be able to move workload around it, but
there is a similar stall on the master pipeline while it may wait for
the slave to be executed. At the cost of more latency for the bonded
request, it may be interesting to launch both on their engines in
lockstep. (Bubbles abound.)

Opens: Also what about bonding an engine as its own master? It doesn't
break anything internally, so allow the silliness.

v2: Emancipate the bonds
v3: Couple in delayed scheduling for the selftests
v4: Handle invalid mutually exclusive bonding
v5: Mention what the uapi does
v6: s/nbond/num_bonds/

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 +
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  98 +++++++++
 drivers/gpu/drm/i915/gt/intel_lrc.h           |   4 +
 drivers/gpu/drm/i915/gt/selftest_lrc.c        | 191 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.c       |  84 ++++++++
 drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
 include/uapi/drm/i915_drm.h                   |  44 ++++
 7 files changed, 431 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index b07d3685e2cb..ca8d95a5708d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -405,6 +405,13 @@ struct intel_engine_cs {
 	 */
 	void		(*submit_request)(struct i915_request *rq);
 
+	/*
+	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
+	 * request down to the bonded pairs.
+	 */
+	void            (*bond_execute)(struct i915_request *rq,
+					struct dma_fence *signal);
+
 	/*
 	 * Call when the priority on a request has changed and it and its
 	 * dependencies may need rescheduling. Note the request itself may
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 8acf9d1c6f2f..d793d976402d 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -191,6 +191,18 @@ struct virtual_engine {
 		int prio;
 	} nodes[I915_NUM_ENGINES];
 
+	/*
+	 * Keep track of bonded pairs -- restrictions upon on our selection
+	 * of physical engines any particular request may be submitted to.
+	 * If we receive a submit-fence from a master engine, we will only
+	 * use one of sibling_mask physical engines.
+	 */
+	struct ve_bond {
+		const struct intel_engine_cs *master;
+		intel_engine_mask_t sibling_mask;
+	} *bonds;
+	unsigned int num_bonds;
+
 	/* And finally, which physical engines this virtual engine maps onto. */
 	unsigned int num_siblings;
 	struct intel_engine_cs *siblings[0];
@@ -982,6 +994,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			rb_erase_cached(rb, &execlists->virtual);
 			RB_CLEAR_NODE(rb);
 
+			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
 			rq->engine = engine;
 
 			if (engine != ve->siblings[0]) {
@@ -3093,6 +3106,8 @@ static void virtual_context_destroy(struct kref *kref)
 	if (ve->context.state)
 		__execlists_context_fini(&ve->context);
 
+	kfree(ve->bonds);
+
 	i915_timeline_fini(&ve->base.timeline);
 	kfree(ve);
 }
@@ -3288,6 +3303,38 @@ static void virtual_submit_request(struct i915_request *rq)
 	tasklet_schedule(&ve->base.execlists.tasklet);
 }
 
+static struct ve_bond *
+virtual_find_bond(struct virtual_engine *ve,
+		  const struct intel_engine_cs *master)
+{
+	int i;
+
+	for (i = 0; i < ve->num_bonds; i++) {
+		if (ve->bonds[i].master == master)
+			return &ve->bonds[i];
+	}
+
+	return NULL;
+}
+
+static void
+virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->engine);
+	struct ve_bond *bond;
+
+	bond = virtual_find_bond(ve, to_request(signal)->engine);
+	if (bond) {
+		intel_engine_mask_t old, new, cmp;
+
+		cmp = READ_ONCE(rq->execution_mask);
+		do {
+			old = cmp;
+			new = cmp & bond->sibling_mask;
+		} while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
+	}
+}
+
 struct intel_context *
 intel_execlists_create_virtual(struct i915_gem_context *ctx,
 			       struct intel_engine_cs **siblings,
@@ -3328,6 +3375,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 
 	ve->base.schedule = i915_schedule;
 	ve->base.submit_request = virtual_submit_request;
+	ve->base.bond_execute = virtual_bond_execute;
 
 	ve->base.execlists.queue_priority_hint = INT_MIN;
 	tasklet_init(&ve->base.execlists.tasklet,
@@ -3417,9 +3465,59 @@ intel_execlists_clone_virtual(struct i915_gem_context *ctx,
 	if (IS_ERR(dst))
 		return dst;
 
+	if (se->num_bonds) {
+		struct virtual_engine *de = to_virtual_engine(dst->engine);
+
+		de->bonds = kmemdup(se->bonds,
+				    sizeof(*se->bonds) * se->num_bonds,
+				    GFP_KERNEL);
+		if (!de->bonds) {
+			intel_context_put(dst);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		de->num_bonds = se->num_bonds;
+	}
+
 	return dst;
 }
 
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     const struct intel_engine_cs *master,
+				     const struct intel_engine_cs *sibling)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+	struct ve_bond *bond;
+	int n;
+
+	/* Sanity check the sibling is part of the virtual engine */
+	for (n = 0; n < ve->num_siblings; n++)
+		if (sibling == ve->siblings[n])
+			break;
+	if (n == ve->num_siblings)
+		return -EINVAL;
+
+	bond = virtual_find_bond(ve, master);
+	if (bond) {
+		bond->sibling_mask |= sibling->mask;
+		return 0;
+	}
+
+	bond = krealloc(ve->bonds,
+			sizeof(*bond) * (ve->num_bonds + 1),
+			GFP_KERNEL);
+	if (!bond)
+		return -ENOMEM;
+
+	bond[ve->num_bonds].master = master;
+	bond[ve->num_bonds].sibling_mask = sibling->mask;
+
+	ve->bonds = bond;
+	ve->num_bonds++;
+
+	return 0;
+}
+
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 5530606052e5..e029aee87adf 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -123,4 +123,8 @@ struct intel_context *
 intel_execlists_clone_virtual(struct i915_gem_context *ctx,
 			      struct intel_engine_cs *src);
 
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     const struct intel_engine_cs *master,
+				     const struct intel_engine_cs *sibling);
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 73308749936c..68d10038f3c9 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -13,6 +13,7 @@
 #include "selftests/igt_gem_utils.h"
 #include "selftests/igt_live_test.h"
 #include "selftests/igt_spinner.h"
+#include "selftests/lib_sw_fence.h"
 #include "selftests/mock_context.h"
 
 static int live_sanitycheck(void *arg)
@@ -1610,6 +1611,195 @@ static int live_virtual_mask(void *arg)
 	return err;
 }
 
+static int bond_virtual_engine(struct drm_i915_private *i915,
+			       unsigned int class,
+			       struct intel_engine_cs **siblings,
+			       unsigned int nsibling,
+			       unsigned int flags)
+#define BOND_SCHEDULE BIT(0)
+{
+	struct intel_engine_cs *master;
+	struct i915_gem_context *ctx;
+	struct i915_request *rq[16];
+	enum intel_engine_id id;
+	unsigned long n;
+	int err;
+
+	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		return -ENOMEM;
+
+	err = 0;
+	rq[0] = ERR_PTR(-ENOMEM);
+	for_each_engine(master, i915, id) {
+		struct i915_sw_fence fence = {};
+
+		if (master->class == class)
+			continue;
+
+		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
+
+		rq[0] = igt_request_alloc(ctx, master);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			goto out;
+		}
+		i915_request_get(rq[0]);
+
+		if (flags & BOND_SCHEDULE) {
+			onstack_fence_init(&fence);
+			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
+							       &fence,
+							       GFP_KERNEL);
+		}
+		i915_request_add(rq[0]);
+		if (err < 0)
+			goto out;
+
+		for (n = 0; n < nsibling; n++) {
+			struct intel_context *ve;
+
+			ve = intel_execlists_create_virtual(ctx,
+							    siblings,
+							    nsibling);
+			if (IS_ERR(ve)) {
+				err = PTR_ERR(ve);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			err = intel_virtual_engine_attach_bond(ve->engine,
+							       master,
+							       siblings[n]);
+			if (err) {
+				intel_context_put(ve);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			err = intel_context_pin(ve);
+			intel_context_put(ve);
+			if (err) {
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			rq[n + 1] = i915_request_create(ve);
+			intel_context_unpin(ve);
+			if (IS_ERR(rq[n + 1])) {
+				err = PTR_ERR(rq[n + 1]);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+			i915_request_get(rq[n + 1]);
+
+			err = i915_request_await_execution(rq[n + 1],
+							   &rq[0]->fence,
+							   ve->engine->bond_execute);
+			i915_request_add(rq[n + 1]);
+			if (err < 0) {
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+		}
+		onstack_fence_fini(&fence);
+
+		if (i915_request_wait(rq[0],
+				      I915_WAIT_LOCKED,
+				      HZ / 10) < 0) {
+			pr_err("Master request did not execute (on %s)!\n",
+			       rq[0]->engine->name);
+			err = -EIO;
+			goto out;
+		}
+
+		for (n = 0; n < nsibling; n++) {
+			if (i915_request_wait(rq[n + 1],
+					      I915_WAIT_LOCKED,
+					      MAX_SCHEDULE_TIMEOUT) < 0) {
+				err = -EIO;
+				goto out;
+			}
+
+			if (rq[n + 1]->engine != siblings[n]) {
+				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
+				       siblings[n]->name,
+				       rq[n + 1]->engine->name,
+				       rq[0]->engine->name);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		for (n = 0; !IS_ERR(rq[n]); n++)
+			i915_request_put(rq[n]);
+		rq[0] = ERR_PTR(-ENOMEM);
+	}
+
+out:
+	for (n = 0; !IS_ERR(rq[n]); n++)
+		i915_request_put(rq[n]);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_virtual_bond(void *arg)
+{
+	static const struct phase {
+		const char *name;
+		unsigned int flags;
+	} phases[] = {
+		{ "", 0 },
+		{ "schedule", BOND_SCHEDULE },
+		{ },
+	};
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+	int err = 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		const struct phase *p;
+		int nsibling;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (p = phases; p->name; p++) {
+			err = bond_virtual_engine(i915,
+						  class, siblings, nsibling,
+						  p->flags);
+			if (err) {
+				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
+				       __func__, p->name, class, nsibling, err);
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1624,6 +1814,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
 		SUBTEST(live_virtual_mask),
+		SUBTEST(live_virtual_bond),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 57b09f624bb4..cbba8f4bd786 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1491,8 +1491,92 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
 	return err;
 }
 
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_bond __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct i915_engine_class_instance ci;
+	struct intel_engine_cs *virtual;
+	struct intel_engine_cs *master;
+	u16 idx, num_bonds;
+	int err, n;
+
+	if (get_user(idx, &ext->virtual_index))
+		return -EFAULT;
+
+	if (idx >= set->engines->num_engines) {
+		DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
+			  idx, set->engines->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->engines->num_engines);
+	if (!set->engines->engines[idx]) {
+		DRM_DEBUG("Invalid engine at %d\n", idx);
+		return -EINVAL;
+	}
+	virtual = set->engines->engines[idx]->engine;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	if (copy_from_user(&ci, &ext->master, sizeof(ci)))
+		return -EFAULT;
+
+	master = intel_engine_lookup_user(set->ctx->i915,
+					  ci.engine_class, ci.engine_instance);
+	if (!master) {
+		DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n",
+			  ci.engine_class, ci.engine_instance);
+		return -EINVAL;
+	}
+
+	if (get_user(num_bonds, &ext->num_bonds))
+		return -EFAULT;
+
+	for (n = 0; n < num_bonds; n++) {
+		struct intel_engine_cs *bond;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci)))
+			return -EFAULT;
+
+		bond = intel_engine_lookup_user(set->ctx->i915,
+						ci.engine_class,
+						ci.engine_instance);
+		if (!bond) {
+			DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			return -EINVAL;
+		}
+
+		/*
+		 * A non-virtual engine has no siblings to choose between; and
+		 * a submit fence will always be directed to the one engine.
+		 */
+		if (intel_engine_is_virtual(virtual)) {
+			err = intel_virtual_engine_attach_bond(virtual,
+							       master,
+							       bond);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
 	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
 static int
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
index 2bfa72c1654b..b976c12817c5 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
@@ -45,6 +45,9 @@ void __onstack_fence_init(struct i915_sw_fence *fence,
 
 void onstack_fence_fini(struct i915_sw_fence *fence)
 {
+	if (!fence->flags)
+		return;
+
 	i915_sw_fence_commit(fence);
 	i915_sw_fence_fini(fence);
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ff2ababc0984..c0054074b4c3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1543,6 +1543,10 @@ struct drm_i915_gem_context_param {
  * sized argument, will revert back to default settings.
  *
  * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
@@ -1645,9 +1649,49 @@ struct i915_context_engines_load_balance {
 	struct i915_engine_class_instance engines[N__]; \
 } __attribute__((packed)) name__
 
+/*
+ * i915_context_engines_bond:
+ *
+ * Constructed bonded pairs for execution within a virtual engine.
+ *
+ * All engines are equal, but some are more equal than others. Given
+ * the distribution of resources in the HW, it may be preferable to run
+ * a request on a given subset of engines in parallel to a request on a
+ * specific engine. We enable this selection of engines within a virtual
+ * engine by specifying bonding pairs, for any given master engine we will
+ * only execute on one of the corresponding siblings within the virtual engine.
+ *
+ * To execute a request in parallel on the master engine and a sibling requires
+ * coordination with a I915_EXEC_FENCE_SUBMIT.
+ */
+struct i915_context_engines_bond {
+	struct i915_user_extension base;
+
+	struct i915_engine_class_instance master;
+
+	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
+	__u16 num_bonds;
+
+	__u64 flags; /* all undefined flags must be zero */
+	__u64 mbz64[4]; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \
+	struct i915_user_extension base; \
+	struct i915_engine_class_instance master; \
+	__u16 virtual_index; \
+	__u16 num_bonds; \
+	__u64 flags; \
+	__u64 mbz64[4]; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
 struct i915_context_param_engines {
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
+#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
 	struct i915_engine_class_instance engines[0];
 } __attribute__((packed));
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 23/45] drm/i915: Allow specification of parallel execbuf
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (20 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 22/45] drm/i915/execlists: Virtual engine bonding Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 24/45] drm/i915: Split GEM object type definition to its own header Chris Wilson
                   ` (29 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

There is a desire to split a task onto two engines and have them run at
the same time, e.g. scanline interleaving to spread the workload evenly.
Through the use of the out-fence from the first execbuf, we can
coordinate secondary execbuf to only become ready simultaneously with
the first, so that with all things idle the second execbufs are executed
in parallel with the first. The key difference here between the new
EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
waits for the completion of the first request (so that all of its
rendering results are visible to the second execbuf, the more common
userspace fence requirement).

Since we only have a single input fence slot, userspace cannot mix an
in-fence and a submit-fence. It has to use one or the other! This is not
such a harsh requirement, since by virtue of the submit-fence, the
secondary execbuf inherit all of the dependencies from the first
request, and for the application the dependencies should be common
between the primary and secondary execbuf.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Testcase: igt/gem_exec_fence/parallel
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++++++++++++-
 include/uapi/drm/i915_drm.h                | 17 ++++++++++++++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 81866bd1b6e4..10d7016e8f06 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -435,6 +435,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_CAPTURE:
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d6c5220addd0..7ce25b54c57b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2318,6 +2318,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 {
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
+	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
 	int out_fence_fd = -1;
 	int err;
@@ -2360,11 +2361,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			return -EINVAL;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
+		if (in_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+
+		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!exec_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+	}
+
 	if (args->flags & I915_EXEC_FENCE_OUT) {
 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
 		if (out_fence_fd < 0) {
 			err = out_fence_fd;
-			goto err_in_fence;
+			goto err_exec_fence;
 		}
 	}
 
@@ -2494,6 +2508,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			goto err_request;
 	}
 
+	if (exec_fence) {
+		err = i915_request_await_execution(eb.request, exec_fence,
+						   eb.engine->bond_execute);
+		if (err < 0)
+			goto err_request;
+	}
+
 	if (fences) {
 		err = await_fence_array(&eb, fences);
 		if (err)
@@ -2555,6 +2576,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
+err_exec_fence:
+	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
 	return err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c0054074b4c3..ddf50bb7399a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -604,6 +604,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT	52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1126,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT		(1 << 20)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 24/45] drm/i915: Split GEM object type definition to its own header
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (21 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 23/45] drm/i915: Allow specification of parallel execbuf Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-26 12:12   ` Jani Nikula
  2019-04-25  9:19 ` [PATCH 25/45] drm/i915: Pull GEM ioctls interface to its own file Chris Wilson
                   ` (28 subsequent siblings)
  51 siblings, 1 reply; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

For convenience in avoiding inline spaghetti, keep the type definition
as a separate header.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/Makefile             |   1 +
 drivers/gpu/drm/i915/gem/Makefile.header-test |  16 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 285 +++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   1 +
 drivers/gpu/drm/i915/i915_drv.h               |   3 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.h    |   3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
 drivers/gpu/drm/i915/i915_gem_object.h        | 295 +-----------------
 9 files changed, 312 insertions(+), 294 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/Makefile
 create mode 100644 drivers/gpu/drm/i915/gem/Makefile.header-test
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_types.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 58643373495c..def781c9ea69 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -85,6 +85,7 @@ gt-$(CONFIG_DRM_I915_SELFTEST) += \
 i915-y += $(gt-y)
 
 # GEM (Graphics Execution Management) code
+obj-y += gem/
 i915-y += \
 	  i915_active.o \
 	  i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile
new file mode 100644
index 000000000000..07e7b8b840ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/Makefile
@@ -0,0 +1 @@
+include $(src)/Makefile.header-test # Extra header tests
diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test
new file mode 100644
index 000000000000..61e06cbb4b32
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/Makefile.header-test
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: MIT
+# Copyright © 2019 Intel Corporation
+
+# Test the headers are compilable as standalone units
+header_test := $(notdir $(wildcard $(src)/*.h))
+
+quiet_cmd_header_test = HDRTEST $@
+      cmd_header_test = echo "\#include \"$(<F)\"" > $@
+
+header_test_%.c: %.h
+	$(call cmd,header_test)
+
+extra-$(CONFIG_DRM_I915_WERROR) += \
+	$(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
+
+clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
new file mode 100644
index 000000000000..e4b50944f553
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -0,0 +1,285 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_TYPES_H__
+#define __I915_GEM_OBJECT_TYPES_H__
+
+#include <linux/reservation.h>
+
+#include <drm/drm_gem.h>
+
+#include "../i915_active.h"
+#include "../i915_selftest.h"
+
+struct drm_i915_gem_object;
+
+/*
+ * struct i915_lut_handle tracks the fast lookups from handle to vma used
+ * for execbuf. Although we use a radixtree for that mapping, in order to
+ * remove them as the object or context is closed, we need a secondary list
+ * and a translation entry (i915_lut_handle).
+ */
+struct i915_lut_handle {
+	struct list_head obj_link;
+	struct list_head ctx_link;
+	struct i915_gem_context *ctx;
+	u32 handle;
+};
+
+struct drm_i915_gem_object_ops {
+	unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
+#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
+#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
+
+	/* Interface between the GEM object and its backing storage.
+	 * get_pages() is called once prior to the use of the associated set
+	 * of pages before to binding them into the GTT, and put_pages() is
+	 * called after we no longer need them. As we expect there to be
+	 * associated cost with migrating pages between the backing storage
+	 * and making them available for the GPU (e.g. clflush), we may hold
+	 * onto the pages after they are no longer referenced by the GPU
+	 * in case they may be used again shortly (for example migrating the
+	 * pages to a different memory domain within the GTT). put_pages()
+	 * will therefore most likely be called when the object itself is
+	 * being released or under memory pressure (where we attempt to
+	 * reap pages for the shrinker).
+	 */
+	int (*get_pages)(struct drm_i915_gem_object *obj);
+	void (*put_pages)(struct drm_i915_gem_object *obj,
+			  struct sg_table *pages);
+
+	int (*pwrite)(struct drm_i915_gem_object *obj,
+		      const struct drm_i915_gem_pwrite *arg);
+
+	int (*dmabuf_export)(struct drm_i915_gem_object *obj);
+	void (*release)(struct drm_i915_gem_object *obj);
+};
+
+struct drm_i915_gem_object {
+	struct drm_gem_object base;
+
+	const struct drm_i915_gem_object_ops *ops;
+
+	struct {
+		/**
+		 * @vma.lock: protect the list/tree of vmas
+		 */
+		spinlock_t lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
+
+	/**
+	 * @lut_list: List of vma lookup entries in use for this object.
+	 *
+	 * If this object is closed, we need to remove all of its VMA from
+	 * the fast lookup index in associated contexts; @lut_list provides
+	 * this translation from object to context->handles_vma.
+	 */
+	struct list_head lut_list;
+
+	/** Stolen memory for this object, instead of being backed by shmem. */
+	struct drm_mm_node *stolen;
+	union {
+		struct rcu_head rcu;
+		struct llist_node freed;
+	};
+
+	/**
+	 * Whether the object is currently in the GGTT mmap.
+	 */
+	unsigned int userfault_count;
+	struct list_head userfault_link;
+
+	struct list_head batch_pool_link;
+	I915_SELFTEST_DECLARE(struct list_head st_link);
+
+	unsigned long flags;
+
+	/**
+	 * Have we taken a reference for the object for incomplete GPU
+	 * activity?
+	 */
+#define I915_BO_ACTIVE_REF 0
+
+	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned int cache_level:3;
+	unsigned int cache_coherent:2;
+#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
+	unsigned int cache_dirty:1;
+
+	/**
+	 * @read_domains: Read memory domains.
+	 *
+	 * These monitor which caches contain read/write data related to the
+	 * object. When transitioning from one set of domains to another,
+	 * the driver is called to ensure that caches are suitably flushed and
+	 * invalidated.
+	 */
+	u16 read_domains;
+
+	/**
+	 * @write_domain: Corresponding unique write memory domain.
+	 */
+	u16 write_domain;
+
+	atomic_t frontbuffer_bits;
+	unsigned int frontbuffer_ggtt_origin; /* write once */
+	struct i915_active_request frontbuffer_write;
+
+	/** Current tiling stride for the object, if it's tiled. */
+	unsigned int tiling_and_stride;
+#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
+#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
+#define STRIDE_MASK (~TILING_MASK)
+
+	/** Count of VMA actually bound by this object */
+	unsigned int bind_count;
+	unsigned int active_count;
+	/** Count of how many global VMA are currently pinned for use by HW */
+	unsigned int pin_global;
+
+	struct {
+		struct mutex lock; /* protects the pages and their use */
+		atomic_t pages_pin_count;
+
+		struct sg_table *pages;
+		void *mapping;
+
+		/* TODO: whack some of this into the error state */
+		struct i915_page_sizes {
+			/**
+			 * The sg mask of the pages sg_table. i.e the mask of
+			 * of the lengths for each sg entry.
+			 */
+			unsigned int phys;
+
+			/**
+			 * The gtt page sizes we are allowed to use given the
+			 * sg mask and the supported page sizes. This will
+			 * express the smallest unit we can use for the whole
+			 * object, as well as the larger sizes we may be able
+			 * to use opportunistically.
+			 */
+			unsigned int sg;
+
+			/**
+			 * The actual gtt page size usage. Since we can have
+			 * multiple vma associated with this object we need to
+			 * prevent any trampling of state, hence a copy of this
+			 * struct also lives in each vma, therefore the gtt
+			 * value here should only be read/write through the vma.
+			 */
+			unsigned int gtt;
+		} page_sizes;
+
+		I915_SELFTEST_DECLARE(unsigned int page_mask);
+
+		struct i915_gem_object_page_iter {
+			struct scatterlist *sg_pos;
+			unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+			struct radix_tree_root radix;
+			struct mutex lock; /* protects this cache */
+		} get_page;
+
+		/**
+		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
+		 * locked by i915->mm.obj_lock.
+		 */
+		struct list_head link;
+
+		/**
+		 * Advice: are the backing pages purgeable?
+		 */
+		unsigned int madv:2;
+
+		/**
+		 * This is set if the object has been written to since the
+		 * pages were last acquired.
+		 */
+		bool dirty:1;
+
+		/**
+		 * This is set if the object has been pinned due to unknown
+		 * swizzling.
+		 */
+		bool quirked:1;
+	} mm;
+
+	/** Breadcrumb of last rendering to the buffer.
+	 * There can only be one writer, but we allow for multiple readers.
+	 * If there is a writer that necessarily implies that all other
+	 * read requests are complete - but we may only be lazily clearing
+	 * the read requests. A read request is naturally the most recent
+	 * request on a ring, so we may have two different write and read
+	 * requests on one ring where the write request is older than the
+	 * read request. This allows for the CPU to read from an active
+	 * buffer by only waiting for the write to complete.
+	 */
+	struct reservation_object *resv;
+
+	/** References from framebuffers, locks out tiling changes. */
+	unsigned int framebuffer_references;
+
+	/** Record of address bit 17 of each page at last unbind. */
+	unsigned long *bit_17;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+
+			struct i915_mm_struct *mm;
+			struct i915_mmu_object *mmu_object;
+			struct work_struct *work;
+		} userptr;
+
+		unsigned long scratch;
+
+		void *gvt_info;
+	};
+
+	/** for phys allocated objects */
+	struct drm_dma_handle *phys_handle;
+
+	struct reservation_object __builtin_resv;
+};
+
+static inline struct drm_i915_gem_object *
+to_intel_bo(struct drm_gem_object *gem)
+{
+	/* Assert that to_intel_bo(NULL) == NULL */
+	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
+
+	return container_of(gem, struct drm_i915_gem_object, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index ca8d95a5708d..ae73c6596d08 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -29,6 +29,7 @@
 #define I915_CMD_HASH_ORDER 9
 
 struct dma_fence;
+struct drm_i915_gem_object;
 struct drm_i915_reg_table;
 struct i915_gem_context;
 struct i915_request;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5ca4df9a7428..0cf87495e11b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -80,7 +80,6 @@
 #include "i915_gem.h"
 #include "i915_gem_context.h"
 #include "i915_gem_fence_reg.h"
-#include "i915_gem_object.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
 #include "i915_request.h"
@@ -135,6 +134,8 @@ bool i915_error_injected(void);
 	__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
 		      fmt, ##__VA_ARGS__)
 
+struct drm_i915_gem_object;
+
 enum hpd_pin {
 	HPD_NONE = 0,
 	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 56947daaaf65..feeeeeaa54d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -9,6 +9,7 @@
 
 #include <linux/types.h>
 
+struct drm_i915_gem_object;
 struct intel_engine_cs;
 
 struct i915_gem_batch_pool {
@@ -19,7 +20,7 @@ struct i915_gem_batch_pool {
 void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
 			      struct intel_engine_cs *engine);
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
-struct drm_i915_gem_object*
+struct drm_i915_gem_object *
 i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
 
 #endif /* I915_GEM_BATCH_POOL_H */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 2fafa04c45ec..593079f30fbe 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -61,6 +61,7 @@
 
 struct drm_i915_file_private;
 struct drm_i915_fence_reg;
+struct drm_i915_gem_object;
 struct i915_vma;
 
 typedef u32 gen6_pte_t;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index ca93a40c0c87..3666b0c5f6ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -1,308 +1,19 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #ifndef __I915_GEM_OBJECT_H__
 #define __I915_GEM_OBJECT_H__
 
-#include <linux/reservation.h>
-
-#include <drm/drm_vma_manager.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_file.h>
 #include <drm/drm_device.h>
 
 #include <drm/i915_drm.h>
 
-#include "i915_request.h"
-#include "i915_selftest.h"
-
-struct drm_i915_gem_object;
-
-/*
- * struct i915_lut_handle tracks the fast lookups from handle to vma used
- * for execbuf. Although we use a radixtree for that mapping, in order to
- * remove them as the object or context is closed, we need a secondary list
- * and a translation entry (i915_lut_handle).
- */
-struct i915_lut_handle {
-	struct list_head obj_link;
-	struct list_head ctx_link;
-	struct i915_gem_context *ctx;
-	u32 handle;
-};
-
-struct drm_i915_gem_object_ops {
-	unsigned int flags;
-#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
-#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
-#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
-
-	/* Interface between the GEM object and its backing storage.
-	 * get_pages() is called once prior to the use of the associated set
-	 * of pages before to binding them into the GTT, and put_pages() is
-	 * called after we no longer need them. As we expect there to be
-	 * associated cost with migrating pages between the backing storage
-	 * and making them available for the GPU (e.g. clflush), we may hold
-	 * onto the pages after they are no longer referenced by the GPU
-	 * in case they may be used again shortly (for example migrating the
-	 * pages to a different memory domain within the GTT). put_pages()
-	 * will therefore most likely be called when the object itself is
-	 * being released or under memory pressure (where we attempt to
-	 * reap pages for the shrinker).
-	 */
-	int (*get_pages)(struct drm_i915_gem_object *);
-	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
-
-	int (*pwrite)(struct drm_i915_gem_object *,
-		      const struct drm_i915_gem_pwrite *);
-
-	int (*dmabuf_export)(struct drm_i915_gem_object *);
-	void (*release)(struct drm_i915_gem_object *);
-};
-
-struct drm_i915_gem_object {
-	struct drm_gem_object base;
-
-	const struct drm_i915_gem_object_ops *ops;
-
-	struct {
-		/**
-		 * @vma.lock: protect the list/tree of vmas
-		 */
-		spinlock_t lock;
-
-		/**
-		 * @vma.list: List of VMAs backed by this object
-		 *
-		 * The VMA on this list are ordered by type, all GGTT vma are
-		 * placed at the head and all ppGTT vma are placed at the tail.
-		 * The different types of GGTT vma are unordered between
-		 * themselves, use the @vma.tree (which has a defined order
-		 * between all VMA) to quickly find an exact match.
-		 */
-		struct list_head list;
-
-		/**
-		 * @vma.tree: Ordered tree of VMAs backed by this object
-		 *
-		 * All VMA created for this object are placed in the @vma.tree
-		 * for fast retrieval via a binary search in
-		 * i915_vma_instance(). They are also added to @vma.list for
-		 * easy iteration.
-		 */
-		struct rb_root tree;
-	} vma;
-
-	/**
-	 * @lut_list: List of vma lookup entries in use for this object.
-	 *
-	 * If this object is closed, we need to remove all of its VMA from
-	 * the fast lookup index in associated contexts; @lut_list provides
-	 * this translation from object to context->handles_vma.
-	 */
-	struct list_head lut_list;
-
-	/** Stolen memory for this object, instead of being backed by shmem. */
-	struct drm_mm_node *stolen;
-	union {
-		struct rcu_head rcu;
-		struct llist_node freed;
-	};
-
-	/**
-	 * Whether the object is currently in the GGTT mmap.
-	 */
-	unsigned int userfault_count;
-	struct list_head userfault_link;
-
-	struct list_head batch_pool_link;
-	I915_SELFTEST_DECLARE(struct list_head st_link);
-
-	unsigned long flags;
-
-	/**
-	 * Have we taken a reference for the object for incomplete GPU
-	 * activity?
-	 */
-#define I915_BO_ACTIVE_REF 0
-
-	/*
-	 * Is the object to be mapped as read-only to the GPU
-	 * Only honoured if hardware has relevant pte bit
-	 */
-	unsigned int cache_level:3;
-	unsigned int cache_coherent:2;
-#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
-#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
-	unsigned int cache_dirty:1;
-
-	/**
-	 * @read_domains: Read memory domains.
-	 *
-	 * These monitor which caches contain read/write data related to the
-	 * object. When transitioning from one set of domains to another,
-	 * the driver is called to ensure that caches are suitably flushed and
-	 * invalidated.
-	 */
-	u16 read_domains;
-
-	/**
-	 * @write_domain: Corresponding unique write memory domain.
-	 */
-	u16 write_domain;
-
-	atomic_t frontbuffer_bits;
-	unsigned int frontbuffer_ggtt_origin; /* write once */
-	struct i915_active_request frontbuffer_write;
-
-	/** Current tiling stride for the object, if it's tiled. */
-	unsigned int tiling_and_stride;
-#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
-#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
-#define STRIDE_MASK (~TILING_MASK)
-
-	/** Count of VMA actually bound by this object */
-	unsigned int bind_count;
-	unsigned int active_count;
-	/** Count of how many global VMA are currently pinned for use by HW */
-	unsigned int pin_global;
-
-	struct {
-		struct mutex lock; /* protects the pages and their use */
-		atomic_t pages_pin_count;
-
-		struct sg_table *pages;
-		void *mapping;
-
-		/* TODO: whack some of this into the error state */
-		struct i915_page_sizes {
-			/**
-			 * The sg mask of the pages sg_table. i.e the mask of
-			 * of the lengths for each sg entry.
-			 */
-			unsigned int phys;
-
-			/**
-			 * The gtt page sizes we are allowed to use given the
-			 * sg mask and the supported page sizes. This will
-			 * express the smallest unit we can use for the whole
-			 * object, as well as the larger sizes we may be able
-			 * to use opportunistically.
-			 */
-			unsigned int sg;
-
-			/**
-			 * The actual gtt page size usage. Since we can have
-			 * multiple vma associated with this object we need to
-			 * prevent any trampling of state, hence a copy of this
-			 * struct also lives in each vma, therefore the gtt
-			 * value here should only be read/write through the vma.
-			 */
-			unsigned int gtt;
-		} page_sizes;
-
-		I915_SELFTEST_DECLARE(unsigned int page_mask);
-
-		struct i915_gem_object_page_iter {
-			struct scatterlist *sg_pos;
-			unsigned int sg_idx; /* in pages, but 32bit eek! */
-
-			struct radix_tree_root radix;
-			struct mutex lock; /* protects this cache */
-		} get_page;
-
-		/**
-		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
-		 * locked by i915->mm.obj_lock.
-		 */
-		struct list_head link;
-
-		/**
-		 * Advice: are the backing pages purgeable?
-		 */
-		unsigned int madv:2;
-
-		/**
-		 * This is set if the object has been written to since the
-		 * pages were last acquired.
-		 */
-		bool dirty:1;
-
-		/**
-		 * This is set if the object has been pinned due to unknown
-		 * swizzling.
-		 */
-		bool quirked:1;
-	} mm;
-
-	/** Breadcrumb of last rendering to the buffer.
-	 * There can only be one writer, but we allow for multiple readers.
-	 * If there is a writer that necessarily implies that all other
-	 * read requests are complete - but we may only be lazily clearing
-	 * the read requests. A read request is naturally the most recent
-	 * request on a ring, so we may have two different write and read
-	 * requests on one ring where the write request is older than the
-	 * read request. This allows for the CPU to read from an active
-	 * buffer by only waiting for the write to complete.
-	 */
-	struct reservation_object *resv;
-
-	/** References from framebuffers, locks out tiling changes. */
-	unsigned int framebuffer_references;
-
-	/** Record of address bit 17 of each page at last unbind. */
-	unsigned long *bit_17;
-
-	union {
-		struct i915_gem_userptr {
-			uintptr_t ptr;
-
-			struct i915_mm_struct *mm;
-			struct i915_mmu_object *mmu_object;
-			struct work_struct *work;
-		} userptr;
-
-		unsigned long scratch;
-
-		void *gvt_info;
-	};
-
-	/** for phys allocated objects */
-	struct drm_dma_handle *phys_handle;
-
-	struct reservation_object __builtin_resv;
-};
-
-static inline struct drm_i915_gem_object *
-to_intel_bo(struct drm_gem_object *gem)
-{
-	/* Assert that to_intel_bo(NULL) == NULL */
-	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
-
-	return container_of(gem, struct drm_i915_gem_object, base);
-}
+#include "gem/i915_gem_object_types.h"
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 25/45] drm/i915: Pull GEM ioctls interface to its own file
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (22 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 24/45] drm/i915: Split GEM object type definition to its own header Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 26/45] drm/i915: Move object->pages API to i915_gem_object.[ch] Chris Wilson
                   ` (27 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Declutter i915_drv/gem.h by moving the ioctl API into its own header.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ioctls.h | 52 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_drv.h            | 38 ----------------
 drivers/gpu/drm/i915/i915_gem.c            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  1 +
 drivers/gpu/drm/i915/i915_gem_tiling.c     |  3 ++
 drivers/gpu/drm/i915/i915_gem_userptr.c    | 12 +++--
 7 files changed, 66 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ioctls.h

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
new file mode 100644
index 000000000000..ddc7f2a52b3e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef I915_GEM_IOCTLS_H
+#define I915_GEM_IOCTLS_H
+
+struct drm_device;
+struct drm_file;
+
+int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_create_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file);
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file);
+int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 10d7016e8f06..cd8d0bd38367 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,6 +47,7 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_ioctls.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_reset.h"
 #include "gt/intel_workarounds.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0cf87495e11b..d8ad9571b087 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2824,46 +2824,8 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits)
 }
 
 /* i915_gem.c */
-int i915_gem_create_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file_priv);
-int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv);
-int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file_priv);
-int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
-			     struct drm_file *file_priv);
-int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file_priv);
-int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file);
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file);
-int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file_priv);
-int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv);
-int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
 int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
-int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file);
-int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
-int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
 void i915_gem_sanitize(struct drm_i915_private *i915);
 int i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2fcec1bfb038..78ca69dcfaa6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -39,6 +39,7 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include "gem/i915_gem_ioctls.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_mocs.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7ce25b54c57b..27391300507e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,6 +34,7 @@
 #include <drm/drm_syncobj.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_ioctls.h"
 #include "gt/intel_gt_pm.h"
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index a9b5329dae3b..7d5cc9ccf6dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -28,6 +28,9 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <drm/i915_drm.h>
+
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 215bf3fef10c..b9c753f413da 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -22,16 +22,20 @@
  *
  */
 
-#include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
 #include <linux/mmu_context.h>
 #include <linux/mmu_notifier.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
 #include <linux/sched/mm.h>
 
+#include <drm/i915_drm.h>
+
+#include "gem/i915_gem_ioctls.h"
+
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+
 struct i915_mm_struct {
 	struct mm_struct *mm;
 	struct drm_i915_private *i915;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 26/45] drm/i915: Move object->pages API to i915_gem_object.[ch]
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (23 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 25/45] drm/i915: Pull GEM ioctls interface to its own file Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 27/45] drm/i915: Move shmem object setup to its own file Chris Wilson
                   ` (26 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Currently the code for manipulating the pages on an object is still
residing in i915_gem.c, move it to i915_gem_object.c

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   4 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_object.c  |   5 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_object.h  | 127 +++++++++++++++-
 drivers/gpu/drm/i915/i915_drv.h               | 137 +-----------------
 drivers/gpu/drm/i915/i915_globals.c           |   2 +-
 drivers/gpu/drm/i915/i915_vma.h               |   2 +-
 drivers/gpu/drm/i915/intel_frontbuffer.h      |   2 +-
 7 files changed, 141 insertions(+), 138 deletions(-)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_object.c (97%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_object.h (59%)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index def781c9ea69..deac659ddef0 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -86,7 +86,10 @@ i915-y += $(gt-y)
 
 # GEM (Graphics Execution Management) code
 obj-y += gem/
+gem-y += \
+	gem/i915_gem_object.o
 i915-y += \
+	  $(gem-y) \
 	  i915_active.o \
 	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
@@ -99,7 +102,6 @@ i915-y += \
 	  i915_gem_gtt.o \
 	  i915_gem_internal.o \
 	  i915_gem.o \
-	  i915_gem_object.o \
 	  i915_gem_pm.o \
 	  i915_gem_render_state.o \
 	  i915_gem_shrinker.o \
diff --git a/drivers/gpu/drm/i915/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
similarity index 97%
rename from drivers/gpu/drm/i915/i915_gem_object.c
rename to drivers/gpu/drm/i915/gem/i915_gem_object.c
index ac6a5ab84586..8179252bb39b 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,9 +22,10 @@
  *
  */
 
-#include "i915_drv.h"
 #include "i915_gem_object.h"
-#include "i915_globals.h"
+
+#include "../i915_drv.h"
+#include "../i915_globals.h"
 
 static struct i915_global_object {
 	struct i915_global base;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
similarity index 59%
rename from drivers/gpu/drm/i915/i915_gem_object.h
rename to drivers/gpu/drm/i915/gem/i915_gem_object.h
index 3666b0c5f6ca..061b20c3da5b 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -13,7 +13,7 @@
 
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_object_types.h"
+#include "i915_gem_object_types.h"
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
@@ -192,6 +192,131 @@ i915_gem_object_get_tile_row_size(const struct drm_i915_gem_object *obj)
 int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 			       unsigned int tiling, unsigned int stride);
 
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n, unsigned int *offset);
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+			 unsigned int n);
+
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n);
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n);
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes);
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+
+static inline int __must_check
+i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	might_lock(&obj->mm.lock);
+
+	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
+		return 0;
+
+	return __i915_gem_object_get_pages(obj);
+}
+
+static inline bool
+i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
+{
+	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
+}
+
+static inline void
+__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	atomic_inc(&obj->mm.pages_pin_count);
+}
+
+static inline bool
+i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
+{
+	return atomic_read(&obj->mm.pages_pin_count);
+}
+
+static inline void
+__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	atomic_dec(&obj->mm.pages_pin_count);
+}
+
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_unpin_pages(obj);
+}
+
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
+	I915_MM_NORMAL = 0,
+	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
+};
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass);
+void __i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+
+enum i915_map_type {
+	I915_MAP_WB = 0,
+	I915_MAP_WC,
+#define I915_MAP_OVERRIDE BIT(31)
+	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
+	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
+};
+
+/**
+ * i915_gem_object_pin_map - return a contiguous mapping of the entire object
+ * @obj: the object to map into kernel address space
+ * @type: the type of mapping, used to select pgprot_t
+ *
+ * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
+ * pages and then returns a contiguous mapping of the backing storage into
+ * the kernel address space. Based on the @type of mapping, the PTE will be
+ * set to either WriteBack or WriteCombine (via pgprot_t).
+ *
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
+ *
+ * Returns the pointer through which to access the mapped object, or an
+ * ERR_PTR() on error.
+ */
+void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+					   enum i915_map_type type);
+
+void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
+				 unsigned long offset,
+				 unsigned long size);
+static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_flush_map(obj, 0, obj->base.size);
+}
+
+/**
+ * i915_gem_object_unpin_map - releases an earlier mapping
+ * @obj: the object to unmap
+ *
+ * After pinning the object and mapping its pages, once you are finished
+ * with your access, call i915_gem_object_unpin_map() to release the pin
+ * upon the mapping. Once the pin count reaches zero, that mapping may be
+ * removed.
+ */
+static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d8ad9571b087..86ca50ea9b5d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2897,137 +2897,6 @@ static inline int __sg_page_count(const struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
-struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n, unsigned int *offset);
-
-struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj,
-			 unsigned int n);
-
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
-			       unsigned int n);
-
-dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
-				unsigned long n);
-
-void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages,
-				 unsigned int sg_page_sizes);
-int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
-
-static inline int __must_check
-i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
-{
-	might_lock(&obj->mm.lock);
-
-	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
-		return 0;
-
-	return __i915_gem_object_get_pages(obj);
-}
-
-static inline bool
-i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
-{
-	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
-}
-
-static inline void
-__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
-{
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-
-	atomic_inc(&obj->mm.pages_pin_count);
-}
-
-static inline bool
-i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
-{
-	return atomic_read(&obj->mm.pages_pin_count);
-}
-
-static inline void
-__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
-{
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	atomic_dec(&obj->mm.pages_pin_count);
-}
-
-static inline void
-i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
-{
-	__i915_gem_object_unpin_pages(obj);
-}
-
-enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
-	I915_MM_NORMAL = 0,
-	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
-};
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass);
-void __i915_gem_object_truncate(struct drm_i915_gem_object *obj);
-
-enum i915_map_type {
-	I915_MAP_WB = 0,
-	I915_MAP_WC,
-#define I915_MAP_OVERRIDE BIT(31)
-	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
-	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
-};
-
-static inline enum i915_map_type
-i915_coherent_map_type(struct drm_i915_private *i915)
-{
-	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
-}
-
-/**
- * i915_gem_object_pin_map - return a contiguous mapping of the entire object
- * @obj: the object to map into kernel address space
- * @type: the type of mapping, used to select pgprot_t
- *
- * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
- * pages and then returns a contiguous mapping of the backing storage into
- * the kernel address space. Based on the @type of mapping, the PTE will be
- * set to either WriteBack or WriteCombine (via pgprot_t).
- *
- * The caller is responsible for calling i915_gem_object_unpin_map() when the
- * mapping is no longer required.
- *
- * Returns the pointer through which to access the mapped object, or an
- * ERR_PTR() on error.
- */
-void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
-					   enum i915_map_type type);
-
-void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
-				 unsigned long offset,
-				 unsigned long size);
-static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
-{
-	__i915_gem_object_flush_map(obj, 0, obj->base.size);
-}
-
-/**
- * i915_gem_object_unpin_map - releases an earlier mapping
- * @obj: the object to unmap
- *
- * After pinning the object and mapping its pages, once you are finished
- * with your access, call i915_gem_object_unpin_map() to release the pin
- * upon the mapping. Once the pin count reaches zero, that mapping may be
- * removed.
- */
-static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 				    unsigned int *needs_clflush);
 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
@@ -3642,4 +3511,10 @@ static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
 	return i915_ggtt_offset(i915->gt.scratch);
 }
 
+static inline enum i915_map_type
+i915_coherent_map_type(struct drm_i915_private *i915)
+{
+	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index 81e5c2ce336b..db52a58eadcc 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -9,7 +9,7 @@
 
 #include "i915_active.h"
 #include "i915_gem_context.h"
-#include "i915_gem_object.h"
+#include "gem/i915_gem_object.h"
 #include "i915_globals.h"
 #include "i915_request.h"
 #include "i915_scheduler.h"
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 6eab70953a57..7950fa96ee86 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -32,7 +32,7 @@
 
 #include "i915_gem_gtt.h"
 #include "i915_gem_fence_reg.h"
-#include "i915_gem_object.h"
+#include "gem/i915_gem_object.h"
 
 #include "i915_active.h"
 #include "i915_request.h"
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h
index d5894666f658..5727320c8084 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.h
@@ -24,7 +24,7 @@
 #ifndef __INTEL_FRONTBUFFER_H__
 #define __INTEL_FRONTBUFFER_H__
 
-#include "i915_gem_object.h"
+#include "gem/i915_gem_object.h"
 
 struct drm_i915_private;
 struct drm_i915_gem_object;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 27/45] drm/i915: Move shmem object setup to its own file
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (24 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 26/45] drm/i915: Move object->pages API to i915_gem_object.[ch] Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 28/45] drm/i915: Move phys objects " Chris Wilson
                   ` (25 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Split the plain old shmem object into its own file to start decluttering
i915_gem.c

v2: Lose the confusing, hysterical raisins, suffix of _gtt.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 298 +++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  41 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     | 512 +++++++++++
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   4 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   2 +-
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |  21 +-
 drivers/gpu/drm/i915/i915_drv.h               |  10 -
 drivers/gpu/drm/i915/i915_gem.c               | 826 +-----------------
 drivers/gpu/drm/i915/i915_perf.c              |   2 +-
 drivers/gpu/drm/i915/intel_fbdev.c            |   2 +-
 drivers/gpu/drm/i915/intel_guc.c              |   2 +-
 drivers/gpu/drm/i915/intel_uc_fw.c            |   3 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   6 +-
 .../gpu/drm/i915/selftests/i915_gem_dmabuf.c  |   8 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |   4 +-
 16 files changed, 883 insertions(+), 861 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_shmem.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index deac659ddef0..447143fadb05 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -87,7 +87,8 @@ i915-y += $(gt-y)
 # GEM (Graphics Execution Management) code
 obj-y += gem/
 gem-y += \
-	gem/i915_gem_object.o
+	gem/i915_gem_object.o \
+	gem/i915_gem_shmem.o
 i915-y += \
 	  $(gem-y) \
 	  i915_active.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 8179252bb39b..86e7e88817af 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -26,6 +26,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_globals.h"
+#include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
 	struct i915_global base;
@@ -42,6 +43,64 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
 	return kmem_cache_free(global.slab_objects, obj);
 }
 
+/* some bookkeeping */
+static void i915_gem_info_add_obj(struct drm_i915_private *i915,
+				  u64 size)
+{
+	spin_lock(&i915->mm.object_stat_lock);
+	i915->mm.object_count++;
+	i915->mm.object_memory += size;
+	spin_unlock(&i915->mm.object_stat_lock);
+}
+
+static void i915_gem_info_remove_obj(struct drm_i915_private *i915,
+				     u64 size)
+{
+	spin_lock(&i915->mm.object_stat_lock);
+	i915->mm.object_count--;
+	i915->mm.object_memory -= size;
+	spin_unlock(&i915->mm.object_stat_lock);
+}
+
+static void
+frontbuffer_retire(struct i915_active_request *active,
+		   struct i915_request *request)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(active, typeof(*obj), frontbuffer_write);
+
+	intel_fb_obj_flush(obj, ORIGIN_CS);
+}
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops)
+{
+	mutex_init(&obj->mm.lock);
+
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
+	INIT_LIST_HEAD(&obj->lut_list);
+	INIT_LIST_HEAD(&obj->batch_pool_link);
+
+	init_rcu_head(&obj->rcu);
+
+	obj->ops = ops;
+
+	reservation_object_init(&obj->__builtin_resv);
+	obj->resv = &obj->__builtin_resv;
+
+	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
+	i915_active_request_init(&obj->frontbuffer_write,
+				 NULL, frontbuffer_retire);
+
+	obj->mm.madv = I915_MADV_WILLNEED;
+	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->mm.get_page.lock);
+
+	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
+}
+
 /**
  * Mark up the object's coherency levels for a given cache_level
  * @obj: #drm_i915_gem_object
@@ -64,6 +123,245 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 		!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
 }
 
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(gem->dev);
+	struct drm_i915_gem_object *obj = to_intel_bo(gem);
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_lut_handle *lut, *ln;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
+
+		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
+		if (ctx->file_priv != fpriv)
+			continue;
+
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+		GEM_BUG_ON(vma->obj != obj);
+
+		/* We allow the process to have multiple handles to the same
+		 * vma, in the same fd namespace, by virtue of flink/open.
+		 */
+		GEM_BUG_ON(!vma->open_count);
+		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
+			i915_vma_close(vma);
+
+		list_del(&lut->obj_link);
+		list_del(&lut->ctx_link);
+
+		i915_lut_handle_free(lut);
+		__i915_gem_object_release_unless_active(obj);
+	}
+
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
+static bool discard_backing_storage(struct drm_i915_gem_object *obj)
+{
+	/* If we are the last user of the backing storage (be it shmemfs
+	 * pages or stolen etc), we know that the pages are going to be
+	 * immediately released. In this case, we can then skip copying
+	 * back the contents from the GPU.
+	 */
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return false;
+
+	if (!obj->base.filp)
+		return true;
+
+	/* At first glance, this looks racy, but then again so would be
+	 * userspace racing mmap against close. However, the first external
+	 * reference to the filp can only be obtained through the
+	 * i915_gem_mmap_ioctl() which safeguards us against the user
+	 * acquiring such a reference whilst we are in the middle of
+	 * freeing the object.
+	 */
+	return atomic_long_read(&obj->base.filp->f_count) == 1;
+}
+
+static void __i915_gem_free_objects(struct drm_i915_private *i915,
+				    struct llist_node *freed)
+{
+	struct drm_i915_gem_object *obj, *on;
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
+	llist_for_each_entry_safe(obj, on, freed, freed) {
+		struct i915_vma *vma, *vn;
+
+		trace_i915_gem_object_destroy(obj);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(i915_gem_object_is_active(obj));
+		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
+			GEM_BUG_ON(i915_vma_is_active(vma));
+			vma->flags &= ~I915_VMA_PIN_MASK;
+			i915_vma_destroy(vma);
+		}
+		GEM_BUG_ON(!list_empty(&obj->vma.list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
+
+		/* This serializes freeing with the shrinker. Since the free
+		 * is delayed, first by RCU then by the workqueue, we want the
+		 * shrinker to be able to free pages of unreferenced objects,
+		 * or else we may oom whilst there are plenty of deferred
+		 * freed objects.
+		 */
+		if (i915_gem_object_has_pages(obj)) {
+			spin_lock(&i915->mm.obj_lock);
+			list_del_init(&obj->mm.link);
+			spin_unlock(&i915->mm.obj_lock);
+		}
+
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(obj->bind_count);
+		GEM_BUG_ON(obj->userfault_count);
+		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
+		GEM_BUG_ON(!list_empty(&obj->lut_list));
+
+		if (obj->ops->release)
+			obj->ops->release(obj);
+
+		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
+			atomic_set(&obj->mm.pages_pin_count, 0);
+		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+		if (obj->base.import_attach)
+			drm_prime_gem_destroy(&obj->base, NULL);
+
+		reservation_object_fini(&obj->__builtin_resv);
+		drm_gem_object_release(&obj->base);
+		i915_gem_info_remove_obj(i915, obj->base.size);
+
+		bitmap_free(obj->bit_17);
+		i915_gem_object_free(obj);
+
+		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
+		atomic_dec(&i915->mm.free_count);
+
+		if (on)
+			cond_resched();
+	}
+	intel_runtime_pm_put(i915, wakeref);
+}
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915)
+{
+	struct llist_node *freed;
+
+	/* Free the oldest, most stale object to keep the free_list short */
+	freed = NULL;
+	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
+		/* Only one consumer of llist_del_first() allowed */
+		spin_lock(&i915->mm.free_lock);
+		freed = llist_del_first(&i915->mm.free_list);
+		spin_unlock(&i915->mm.free_lock);
+	}
+	if (unlikely(freed)) {
+		freed->next = NULL;
+		__i915_gem_free_objects(i915, freed);
+	}
+}
+
+static void __i915_gem_free_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, mm.free_work);
+	struct llist_node *freed;
+
+	/*
+	 * All file-owned VMA should have been released by this point through
+	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
+	 * However, the object may also be bound into the global GTT (e.g.
+	 * older GPUs without per-process support, or for direct access through
+	 * the GTT either for the user or for scanout). Those VMA still need to
+	 * unbound now.
+	 */
+
+	spin_lock(&i915->mm.free_lock);
+	while ((freed = llist_del_all(&i915->mm.free_list))) {
+		spin_unlock(&i915->mm.free_lock);
+
+		__i915_gem_free_objects(i915, freed);
+		if (need_resched())
+			return;
+
+		spin_lock(&i915->mm.free_lock);
+	}
+	spin_unlock(&i915->mm.free_lock);
+}
+
+static void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(head, typeof(*obj), rcu);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	/*
+	 * We reuse obj->rcu for the freed list, so we had better not treat
+	 * it like a rcu_head from this point forwards. And we expect all
+	 * objects to be freed via this path.
+	 */
+	destroy_rcu_head(&obj->rcu);
+
+	/*
+	 * Since we require blocking on struct_mutex to unbind the freed
+	 * object from the GPU before releasing resources back to the
+	 * system, we can not do that directly from the RCU callback (which may
+	 * be a softirq context), but must instead then defer that work onto a
+	 * kthread. We use the RCU callback rather than move the freed object
+	 * directly onto the work queue so that we can mix between using the
+	 * worker and performing frees directly from subsequent allocations for
+	 * crude but effective memory throttling.
+	 */
+	if (llist_add(&obj->freed, &i915->mm.free_list))
+		queue_work(i915->wq, &i915->mm.free_work);
+}
+
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+
+	if (obj->mm.quirked)
+		__i915_gem_object_unpin_pages(obj);
+
+	if (discard_backing_storage(obj))
+		obj->mm.madv = I915_MADV_DONTNEED;
+
+	/*
+	 * Before we free the object, make sure any pure RCU-only
+	 * read-side critical sections are complete, e.g.
+	 * i915_gem_busy_ioctl(). For the corresponding synchronized
+	 * lookup see i915_gem_object_lookup_rcu().
+	 */
+	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
+	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+}
+
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	if (!i915_gem_object_has_active_reference(obj) &&
+	    i915_gem_object_is_active(obj))
+		i915_gem_object_set_active_reference(obj);
+	else
+		i915_gem_object_put(obj);
+}
+
+void i915_gem_init__objects(struct drm_i915_private *i915)
+{
+	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+}
+
 static void i915_global_objects_shrink(void)
 {
 	kmem_cache_shrink(global.slab_objects);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 061b20c3da5b..ad82303f741a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -15,9 +15,29 @@
 
 #include "i915_gem_object_types.h"
 
+void i915_gem_init__objects(struct drm_i915_private *i915);
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
 
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
+				       const void *data, size_t size);
+
+extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
+void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				     struct sg_table *pages,
+				     bool needs_clflush);
+
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
+void i915_gem_free_object(struct drm_gem_object *obj);
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915);
+
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
  * @filp: DRM file private date
@@ -338,8 +358,23 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
-void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
-				     struct sg_table *pages,
-				     bool needs_clflush);
+static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	if (obj->cache_dirty)
+		return false;
+
+	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+		return true;
+
+	return obj->pin_global; /* currently in use by HW, keep flushed */
+}
+
+static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
+{
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	if (cpu_write_needs_clflush(obj))
+		obj->cache_dirty = true;
+}
 
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
new file mode 100644
index 000000000000..9d885b5e6643
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -0,0 +1,512 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+/*
+ * Move pages to appropriate lru and release the pagevec, decrementing the
+ * ref count of those pages.
+ */
+static void check_release_pagevec(struct pagevec *pvec)
+{
+	check_move_unevictable_pages(pvec);
+	__pagevec_release(pvec);
+	cond_resched();
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	unsigned long i;
+	struct address_space *mapping;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	unsigned long last_pfn = 0;	/* suppress gcc warning */
+	unsigned int max_segment = i915_sg_segment_size();
+	unsigned int sg_page_sizes;
+	struct pagevec pvec;
+	gfp_t noreclaim;
+	int ret;
+
+	/*
+	 * Assert that the object is not currently in any GPU domain. As it
+	 * wasn't in the GTT, there shouldn't be any way it could have been in
+	 * a GPU cache
+	 */
+	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+	/*
+	 * If there's no chance of allocating enough pages for the whole
+	 * object, bail early.
+	 */
+	if (page_count > totalram_pages())
+		return -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+rebuild_st:
+	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Get the list of pages out of our struct file.  They'll be pinned
+	 * at this point until we release them.
+	 *
+	 * Fail silently without starting the shrinker
+	 */
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_unevictable(mapping);
+	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
+	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+	for (i = 0; i < page_count; i++) {
+		const unsigned int shrink[] = {
+			(I915_SHRINK_BOUND |
+			 I915_SHRINK_UNBOUND |
+			 I915_SHRINK_PURGEABLE),
+			0,
+		}, *s = shrink;
+		gfp_t gfp = noreclaim;
+
+		do {
+			cond_resched();
+			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+			if (!IS_ERR(page))
+				break;
+
+			if (!*s) {
+				ret = PTR_ERR(page);
+				goto err_sg;
+			}
+
+			i915_gem_shrink(i915, 2 * page_count, NULL, *s++);
+
+			/*
+			 * We've tried hard to allocate the memory by reaping
+			 * our own buffer, now let the real VM do its job and
+			 * go down in flames if truly OOM.
+			 *
+			 * However, since graphics tend to be disposable,
+			 * defer the oom here by reporting the ENOMEM back
+			 * to userspace.
+			 */
+			if (!*s) {
+				/* reclaim and warn, but no oom */
+				gfp = mapping_gfp_mask(mapping);
+
+				/*
+				 * Our bo are always dirty and so we require
+				 * kswapd to reclaim our pages (direct reclaim
+				 * does not effectively begin pageout of our
+				 * buffers on its own). However, direct reclaim
+				 * only waits for kswapd when under allocation
+				 * congestion. So as a result __GFP_RECLAIM is
+				 * unreliable and fails to actually reclaim our
+				 * dirty pages -- unless you try over and over
+				 * again with !__GFP_NORETRY. However, we still
+				 * want to fail this allocation rather than
+				 * trigger the out-of-memory killer and for
+				 * this we want __GFP_RETRY_MAYFAIL.
+				 */
+				gfp |= __GFP_RETRY_MAYFAIL;
+			}
+		} while (1);
+
+		if (!i ||
+		    sg->length >= max_segment ||
+		    page_to_pfn(page) != last_pfn + 1) {
+			if (i) {
+				sg_page_sizes |= sg->length;
+				sg = sg_next(sg);
+			}
+			st->nents++;
+			sg_set_page(sg, page, PAGE_SIZE, 0);
+		} else {
+			sg->length += PAGE_SIZE;
+		}
+		last_pfn = page_to_pfn(page);
+
+		/* Check that the i965g/gm workaround works. */
+		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
+	}
+	if (sg) { /* loop terminated early; short sg table */
+		sg_page_sizes |= sg->length;
+		sg_mark_end(sg);
+	}
+
+	/* Trim unused sg entries to avoid wasting memory. */
+	i915_sg_trim(st);
+
+	ret = i915_gem_gtt_prepare_pages(obj, st);
+	if (ret) {
+		/*
+		 * DMA remapping failed? One possible cause is that
+		 * it could not reserve enough large entries, asking
+		 * for PAGE_SIZE chunks instead may be helpful.
+		 */
+		if (max_segment > PAGE_SIZE) {
+			for_each_sgt_page(page, sgt_iter, st)
+				put_page(page);
+			sg_free_table(st);
+
+			max_segment = PAGE_SIZE;
+			goto rebuild_st;
+		} else {
+			dev_warn(&i915->drm.pdev->dev,
+				 "Failed to DMA remap %lu pages\n",
+				 page_count);
+			goto err_pages;
+		}
+	}
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_do_bit_17_swizzle(obj, st);
+
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return 0;
+
+err_sg:
+	sg_mark_end(sg);
+err_pages:
+	mapping_clear_unevictable(mapping);
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, st) {
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	sg_free_table(st);
+	kfree(st);
+
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	return ret;
+}
+
+void
+__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				struct sg_table *pages,
+				bool needs_clflush)
+{
+	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
+
+	if (obj->mm.madv == I915_MADV_DONTNEED)
+		obj->mm.dirty = false;
+
+	if (needs_clflush &&
+	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
+	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+		drm_clflush_sg(pages);
+
+	__start_cpu_write(obj);
+}
+
+static void
+shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
+{
+	struct sgt_iter sgt_iter;
+	struct pagevec pvec;
+	struct page *page;
+
+	__i915_gem_object_release_shmem(obj, pages, true);
+
+	i915_gem_gtt_finish_pages(obj, pages);
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_save_bit_17_swizzle(obj, pages);
+
+	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
+
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, pages) {
+		if (obj->mm.dirty)
+			set_page_dirty(page);
+
+		if (obj->mm.madv == I915_MADV_WILLNEED)
+			mark_page_accessed(page);
+
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	obj->mm.dirty = false;
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static int
+shmem_pwrite(struct drm_i915_gem_object *obj,
+	     const struct drm_i915_gem_pwrite *arg)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+	u64 remain, offset;
+	unsigned int pg;
+
+	/* Caller already validated user args */
+	GEM_BUG_ON(!access_ok(user_data, arg->size));
+
+	/*
+	 * Before we instantiate/pin the backing store for our use, we
+	 * can prepopulate the shmemfs filp efficiently using a write into
+	 * the pagecache. We avoid the penalty of instantiating all the
+	 * pages, important if the user is just writing to a few and never
+	 * uses the object on the GPU, and using a direct write into shmemfs
+	 * allows it to avoid the cost of retrieving a page (either swapin
+	 * or clearing-before-use) before it is overwritten.
+	 */
+	if (i915_gem_object_has_pages(obj))
+		return -ENODEV;
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return -EFAULT;
+
+	/*
+	 * Before the pages are instantiated the object is treated as being
+	 * in the CPU domain. The pages will be clflushed as required before
+	 * use, and we can freely write into the pages directly. If userspace
+	 * races pwrite with any other operation; corruption will ensue -
+	 * that is userspace's prerogative!
+	 */
+
+	remain = arg->size;
+	offset = arg->offset;
+	pg = offset_in_page(offset);
+
+	do {
+		unsigned int len, unwritten;
+		struct page *page;
+		void *data, *vaddr;
+		int err;
+		char c;
+
+		len = PAGE_SIZE - pg;
+		if (len > remain)
+			len = remain;
+
+		/* Prefault the user page to reduce potential recursion */
+		err = __get_user(c, user_data);
+		if (err)
+			return err;
+
+		err = __get_user(c, user_data + len - 1);
+		if (err)
+			return err;
+
+		err = pagecache_write_begin(obj->base.filp, mapping,
+					    offset, len, 0,
+					    &page, &data);
+		if (err < 0)
+			return err;
+
+		vaddr = kmap_atomic(page);
+		unwritten = __copy_from_user_inatomic(vaddr + pg,
+						      user_data,
+						      len);
+		kunmap_atomic(vaddr);
+
+		err = pagecache_write_end(obj->base.filp, mapping,
+					  offset, len, len - unwritten,
+					  page, data);
+		if (err < 0)
+			return err;
+
+		/* We don't handle -EFAULT, leave it to the caller to check */
+		if (unwritten)
+			return -ENODEV;
+
+		remain -= len;
+		user_data += len;
+		offset += len;
+		pg = 0;
+	} while (remain);
+
+	return 0;
+}
+
+const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+		 I915_GEM_OBJECT_IS_SHRINKABLE,
+
+	.get_pages = shmem_get_pages,
+	.put_pages = shmem_put_pages,
+
+	.pwrite = shmem_pwrite,
+};
+
+static int create_shmem(struct drm_i915_private *i915,
+			struct drm_gem_object *obj,
+			size_t size)
+{
+	unsigned long flags = VM_NORESERVE;
+	struct file *filp;
+
+	drm_gem_private_object_init(&i915->drm, obj, size);
+
+	if (i915->mm.gemfs)
+		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
+						 flags);
+	else
+		filp = shmem_file_setup("i915", size, flags);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+
+	obj->filp = filp;
+	return 0;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
+{
+	struct drm_i915_gem_object *obj;
+	struct address_space *mapping;
+	unsigned int cache_level;
+	gfp_t mask;
+	int ret;
+
+	/* There is a prevalence of the assumption that we fit the object's
+	 * page count inside a 32bit _signed_ variable. Let's document this and
+	 * catch if we ever need to fix it. In the meantime, if you do spot
+	 * such a local variable, please consider fixing!
+	 */
+	if (size >> PAGE_SHIFT > INT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	ret = create_shmem(i915, &obj->base, size);
+	if (ret)
+		goto fail;
+
+	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		mask &= ~__GFP_HIGHMEM;
+		mask |= __GFP_DMA32;
+	}
+
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_gfp_mask(mapping, mask);
+	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+	i915_gem_object_init(obj, &i915_gem_shmem_ops);
+
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+
+	if (HAS_LLC(i915))
+		/* On some devices, we can have the GPU use the LLC (the CPU
+		 * cache) for about a 10% performance improvement
+		 * compared to uncached.  Graphics requests other than
+		 * display scanout are coherent with the CPU in
+		 * accessing this cache.  This means in this mode we
+		 * don't need to clflush on the CPU side, and on the
+		 * GPU side we only need to flush internal caches to
+		 * get data visible to the CPU.
+		 *
+		 * However, we maintain the display planes as UC, and so
+		 * need to rebind when first used as such.
+		 */
+		cache_level = I915_CACHE_LLC;
+	else
+		cache_level = I915_CACHE_NONE;
+
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	trace_i915_gem_object_create(obj);
+
+	return obj;
+
+fail:
+	i915_gem_object_free(obj);
+	return ERR_PTR(ret);
+}
+
+/* Allocate a new GEM object and fill it with the supplied data */
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
+				       const void *data, size_t size)
+{
+	struct drm_i915_gem_object *obj;
+	struct file *file;
+	size_t offset;
+	int err;
+
+	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
+	if (IS_ERR(obj))
+		return obj;
+
+	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
+
+	file = obj->base.filp;
+	offset = 0;
+	do {
+		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
+		struct page *page;
+		void *pgdata, *vaddr;
+
+		err = pagecache_write_begin(file, file->f_mapping,
+					    offset, len, 0,
+					    &page, &pgdata);
+		if (err < 0)
+			goto fail;
+
+		vaddr = kmap(page);
+		memcpy(vaddr, data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(file, file->f_mapping,
+					  offset, len, len,
+					  page, pgdata);
+		if (err < 0)
+			goto fail;
+
+		size -= len;
+		data += len;
+		offset += len;
+	} while (size);
+
+	return obj;
+
+fail:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d793d976402d..a833b6c14516 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1938,7 +1938,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
+	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -3035,7 +3035,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
 	 */
 	context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 
-	ctx_obj = i915_gem_object_create(engine->i915, context_size);
+	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
 	if (IS_ERR(ctx_obj))
 		return PTR_ERR(ctx_obj);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 2edeedb748ed..0b503b82db0f 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1410,7 +1410,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(i915, engine->context_size);
+	obj = i915_gem_object_create_shmem(i915, engine->context_size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index ab002cfd3cab..fe66285f0236 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1721,7 +1721,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	int ret = 0;
 	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
 		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
-	unsigned long gma_start_offset = 0;
+	unsigned long start_offset = 0;
 
 	/* get the start gm address of the batch buffer */
 	gma = get_gma_bb_from_cmd(s, 1);
@@ -1738,7 +1738,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 
 	bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
 
-	/* the gma_start_offset stores the batch buffer's start gma's
+	/* the start_offset stores the batch buffer's start gma's
 	 * offset relative to page boundary. so for non-privileged batch
 	 * buffer, the shadowed gem object holds exactly the same page
 	 * layout as original gem object. This is for the convience of
@@ -1750,10 +1750,11 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	 * that of shadowed page.
 	 */
 	if (bb->ppgtt)
-		gma_start_offset = gma & ~I915_GTT_PAGE_MASK;
+		start_offset = gma & ~I915_GTT_PAGE_MASK;
 
-	bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
-			 roundup(bb_size + gma_start_offset, PAGE_SIZE));
+	bb->obj = i915_gem_object_create_shmem(s->vgpu->gvt->dev_priv,
+					       round_up(bb_size + start_offset,
+							PAGE_SIZE));
 	if (IS_ERR(bb->obj)) {
 		ret = PTR_ERR(bb->obj);
 		goto err_free_bb;
@@ -1776,7 +1777,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 
 	ret = copy_gma_to_hva(s->vgpu, mm,
 			      gma, gma + bb_size,
-			      bb->va + gma_start_offset);
+			      bb->va + start_offset);
 	if (ret < 0) {
 		gvt_vgpu_err("fail to copy guest ring buffer\n");
 		ret = -EFAULT;
@@ -1802,7 +1803,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	 * buffer's gma in pair. After all, we don't want to pin the shadow
 	 * buffer here (too early).
 	 */
-	s->ip_va = bb->va + gma_start_offset;
+	s->ip_va = bb->va + start_offset;
 	s->ip_gma = gma;
 	return 0;
 err_unmap:
@@ -2825,9 +2826,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	int ret = 0;
 	void *map;
 
-	obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv,
-				     roundup(ctx_size + CACHELINE_BYTES,
-					     PAGE_SIZE));
+	obj = i915_gem_object_create_shmem(workload->vgpu->gvt->dev_priv,
+					   roundup(ctx_size + CACHELINE_BYTES,
+						   PAGE_SIZE));
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 86ca50ea9b5d..007334e892a3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2833,16 +2833,6 @@ void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
 int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
 
-void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			 const struct drm_i915_gem_object_ops *ops);
-struct drm_i915_gem_object *
-i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size);
-struct drm_i915_gem_object *
-i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
-				 const void *data, size_t size);
-void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
-void i915_gem_free_object(struct drm_gem_object *obj);
-
 static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 {
 	if (!atomic_read(&i915->mm.free_count))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 78ca69dcfaa6..4c5fba0c7bf6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -57,19 +57,6 @@
 #include "intel_frontbuffer.h"
 #include "intel_pm.h"
 
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-
-static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
-	if (obj->cache_dirty)
-		return false;
-
-	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
-		return true;
-
-	return obj->pin_global; /* currently in use by HW, keep flushed */
-}
-
 static int
 insert_mappable_node(struct i915_ggtt *ggtt,
                      struct drm_mm_node *node, u32 size)
@@ -87,25 +74,6 @@ remove_mappable_node(struct drm_mm_node *node)
 	drm_mm_remove_node(node);
 }
 
-/* some bookkeeping */
-static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
-				  u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count++;
-	dev_priv->mm.object_memory += size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
-static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
-				     u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count--;
-	dev_priv->mm.object_memory -= size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
 int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
@@ -206,32 +174,6 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	return err;
 }
 
-static void __start_cpu_write(struct drm_i915_gem_object *obj)
-{
-	obj->read_domains = I915_GEM_DOMAIN_CPU;
-	obj->write_domain = I915_GEM_DOMAIN_CPU;
-	if (cpu_write_needs_clflush(obj))
-		obj->cache_dirty = true;
-}
-
-void
-__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
-				struct sg_table *pages,
-				bool needs_clflush)
-{
-	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
-
-	if (obj->mm.madv == I915_MADV_DONTNEED)
-		obj->mm.dirty = false;
-
-	if (needs_clflush &&
-	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
-		drm_clflush_sg(pages);
-
-	__start_cpu_write(obj);
-}
-
 static void
 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages)
@@ -283,8 +225,6 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 	.release = i915_gem_object_release_phys,
 };
 
-static const struct drm_i915_gem_object_ops i915_gem_object_ops;
-
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -541,7 +481,7 @@ i915_gem_create(struct drm_file *file,
 		return -EINVAL;
 
 	/* Allocate the new object */
-	obj = i915_gem_object_create(dev_priv, size);
+	obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -2065,52 +2005,6 @@ void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 	obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
-/*
- * Move pages to appropriate lru and release the pagevec, decrementing the
- * ref count of those pages.
- */
-static void check_release_pagevec(struct pagevec *pvec)
-{
-	check_move_unevictable_pages(pvec);
-	__pagevec_release(pvec);
-	cond_resched();
-}
-
-static void
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
-			      struct sg_table *pages)
-{
-	struct sgt_iter sgt_iter;
-	struct pagevec pvec;
-	struct page *page;
-
-	__i915_gem_object_release_shmem(obj, pages, true);
-	i915_gem_gtt_finish_pages(obj, pages);
-
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_save_bit_17_swizzle(obj, pages);
-
-	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, pages) {
-		if (obj->mm.dirty)
-			set_page_dirty(page);
-
-		if (obj->mm.madv == I915_MADV_WILLNEED)
-			mark_page_accessed(page);
-
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
-	obj->mm.dirty = false;
-
-	sg_free_table(pages);
-	kfree(pages);
-}
-
 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 {
 	struct radix_tree_iter iter;
@@ -2226,196 +2120,6 @@ bool i915_sg_trim(struct sg_table *orig_st)
 	return true;
 }
 
-static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	const unsigned long page_count = obj->base.size / PAGE_SIZE;
-	unsigned long i;
-	struct address_space *mapping;
-	struct sg_table *st;
-	struct scatterlist *sg;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	unsigned long last_pfn = 0;	/* suppress gcc warning */
-	unsigned int max_segment = i915_sg_segment_size();
-	unsigned int sg_page_sizes;
-	struct pagevec pvec;
-	gfp_t noreclaim;
-	int ret;
-
-	/*
-	 * Assert that the object is not currently in any GPU domain. As it
-	 * wasn't in the GTT, there shouldn't be any way it could have been in
-	 * a GPU cache
-	 */
-	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
-	/*
-	 * If there's no chance of allocating enough pages for the whole
-	 * object, bail early.
-	 */
-	if (page_count > totalram_pages())
-		return -ENOMEM;
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (st == NULL)
-		return -ENOMEM;
-
-rebuild_st:
-	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
-		kfree(st);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Get the list of pages out of our struct file.  They'll be pinned
-	 * at this point until we release them.
-	 *
-	 * Fail silently without starting the shrinker
-	 */
-	mapping = obj->base.filp->f_mapping;
-	mapping_set_unevictable(mapping);
-	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
-	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
-
-	sg = st->sgl;
-	st->nents = 0;
-	sg_page_sizes = 0;
-	for (i = 0; i < page_count; i++) {
-		const unsigned int shrink[] = {
-			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
-			0,
-		}, *s = shrink;
-		gfp_t gfp = noreclaim;
-
-		do {
-			cond_resched();
-			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-			if (!IS_ERR(page))
-				break;
-
-			if (!*s) {
-				ret = PTR_ERR(page);
-				goto err_sg;
-			}
-
-			i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
-
-			/*
-			 * We've tried hard to allocate the memory by reaping
-			 * our own buffer, now let the real VM do its job and
-			 * go down in flames if truly OOM.
-			 *
-			 * However, since graphics tend to be disposable,
-			 * defer the oom here by reporting the ENOMEM back
-			 * to userspace.
-			 */
-			if (!*s) {
-				/* reclaim and warn, but no oom */
-				gfp = mapping_gfp_mask(mapping);
-
-				/*
-				 * Our bo are always dirty and so we require
-				 * kswapd to reclaim our pages (direct reclaim
-				 * does not effectively begin pageout of our
-				 * buffers on its own). However, direct reclaim
-				 * only waits for kswapd when under allocation
-				 * congestion. So as a result __GFP_RECLAIM is
-				 * unreliable and fails to actually reclaim our
-				 * dirty pages -- unless you try over and over
-				 * again with !__GFP_NORETRY. However, we still
-				 * want to fail this allocation rather than
-				 * trigger the out-of-memory killer and for
-				 * this we want __GFP_RETRY_MAYFAIL.
-				 */
-				gfp |= __GFP_RETRY_MAYFAIL;
-			}
-		} while (1);
-
-		if (!i ||
-		    sg->length >= max_segment ||
-		    page_to_pfn(page) != last_pfn + 1) {
-			if (i) {
-				sg_page_sizes |= sg->length;
-				sg = sg_next(sg);
-			}
-			st->nents++;
-			sg_set_page(sg, page, PAGE_SIZE, 0);
-		} else {
-			sg->length += PAGE_SIZE;
-		}
-		last_pfn = page_to_pfn(page);
-
-		/* Check that the i965g/gm workaround works. */
-		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
-	}
-	if (sg) { /* loop terminated early; short sg table */
-		sg_page_sizes |= sg->length;
-		sg_mark_end(sg);
-	}
-
-	/* Trim unused sg entries to avoid wasting memory. */
-	i915_sg_trim(st);
-
-	ret = i915_gem_gtt_prepare_pages(obj, st);
-	if (ret) {
-		/*
-		 * DMA remapping failed? One possible cause is that
-		 * it could not reserve enough large entries, asking
-		 * for PAGE_SIZE chunks instead may be helpful.
-		 */
-		if (max_segment > PAGE_SIZE) {
-			for_each_sgt_page(page, sgt_iter, st)
-				put_page(page);
-			sg_free_table(st);
-
-			max_segment = PAGE_SIZE;
-			goto rebuild_st;
-		} else {
-			dev_warn(&dev_priv->drm.pdev->dev,
-				 "Failed to DMA remap %lu pages\n",
-				 page_count);
-			goto err_pages;
-		}
-	}
-
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_do_bit_17_swizzle(obj, st);
-
-	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
-
-	return 0;
-
-err_sg:
-	sg_mark_end(sg);
-err_pages:
-	mapping_clear_unevictable(mapping);
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, st) {
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
-	sg_free_table(st);
-	kfree(st);
-
-	/*
-	 * shmemfs first checks if there is enough memory to allocate the page
-	 * and reports ENOSPC should there be insufficient, along with the usual
-	 * ENOMEM for a genuine allocation failure.
-	 *
-	 * We use ENOSPC in our driver to mean that we have run out of aperture
-	 * space and so want to translate the error from shmemfs back to our
-	 * usual understanding of ENOMEM.
-	 */
-	if (ret == -ENOSPC)
-		ret = -ENOMEM;
-
-	return ret;
-}
-
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
 				 unsigned int sg_page_sizes)
@@ -2662,133 +2366,6 @@ void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
 	}
 }
 
-static int
-i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
-			   const struct drm_i915_gem_pwrite *arg)
-{
-	struct address_space *mapping = obj->base.filp->f_mapping;
-	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
-	u64 remain, offset;
-	unsigned int pg;
-
-	/* Caller already validated user args */
-	GEM_BUG_ON(!access_ok(user_data, arg->size));
-
-	/*
-	 * Before we instantiate/pin the backing store for our use, we
-	 * can prepopulate the shmemfs filp efficiently using a write into
-	 * the pagecache. We avoid the penalty of instantiating all the
-	 * pages, important if the user is just writing to a few and never
-	 * uses the object on the GPU, and using a direct write into shmemfs
-	 * allows it to avoid the cost of retrieving a page (either swapin
-	 * or clearing-before-use) before it is overwritten.
-	 */
-	if (i915_gem_object_has_pages(obj))
-		return -ENODEV;
-
-	if (obj->mm.madv != I915_MADV_WILLNEED)
-		return -EFAULT;
-
-	/*
-	 * Before the pages are instantiated the object is treated as being
-	 * in the CPU domain. The pages will be clflushed as required before
-	 * use, and we can freely write into the pages directly. If userspace
-	 * races pwrite with any other operation; corruption will ensue -
-	 * that is userspace's prerogative!
-	 */
-
-	remain = arg->size;
-	offset = arg->offset;
-	pg = offset_in_page(offset);
-
-	do {
-		unsigned int len, unwritten;
-		struct page *page;
-		void *data, *vaddr;
-		int err;
-		char c;
-
-		len = PAGE_SIZE - pg;
-		if (len > remain)
-			len = remain;
-
-		/* Prefault the user page to reduce potential recursion */
-		err = __get_user(c, user_data);
-		if (err)
-			return err;
-
-		err = __get_user(c, user_data + len - 1);
-		if (err)
-			return err;
-
-		err = pagecache_write_begin(obj->base.filp, mapping,
-					    offset, len, 0,
-					    &page, &data);
-		if (err < 0)
-			return err;
-
-		vaddr = kmap_atomic(page);
-		unwritten = __copy_from_user_inatomic(vaddr + pg,
-						      user_data,
-						      len);
-		kunmap_atomic(vaddr);
-
-		err = pagecache_write_end(obj->base.filp, mapping,
-					  offset, len, len - unwritten,
-					  page, data);
-		if (err < 0)
-			return err;
-
-		/* We don't handle -EFAULT, leave it to the caller to check */
-		if (unwritten)
-			return -ENODEV;
-
-		remain -= len;
-		user_data += len;
-		offset += len;
-		pg = 0;
-	} while (remain);
-
-	return 0;
-}
-
-void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(gem->dev);
-	struct drm_i915_gem_object *obj = to_intel_bo(gem);
-	struct drm_i915_file_private *fpriv = file->driver_priv;
-	struct i915_lut_handle *lut, *ln;
-
-	mutex_lock(&i915->drm.struct_mutex);
-
-	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
-		struct i915_gem_context *ctx = lut->ctx;
-		struct i915_vma *vma;
-
-		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
-		if (ctx->file_priv != fpriv)
-			continue;
-
-		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
-		GEM_BUG_ON(vma->obj != obj);
-
-		/* We allow the process to have multiple handles to the same
-		 * vma, in the same fd namespace, by virtue of flink/open.
-		 */
-		GEM_BUG_ON(!vma->open_count);
-		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
-
-		list_del(&lut->obj_link);
-		list_del(&lut->ctx_link);
-
-		i915_lut_handle_free(lut);
-		__i915_gem_object_release_unless_active(obj);
-	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
 static unsigned long to_wait_timeout(s64 timeout_ns)
 {
 	if (timeout_ns < 0)
@@ -3787,348 +3364,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	return err;
 }
 
-static void
-frontbuffer_retire(struct i915_active_request *active,
-		   struct i915_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, typeof(*obj), frontbuffer_write);
-
-	intel_fb_obj_flush(obj, ORIGIN_CS);
-}
-
-void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			  const struct drm_i915_gem_object_ops *ops)
-{
-	mutex_init(&obj->mm.lock);
-
-	spin_lock_init(&obj->vma.lock);
-	INIT_LIST_HEAD(&obj->vma.list);
-
-	INIT_LIST_HEAD(&obj->lut_list);
-	INIT_LIST_HEAD(&obj->batch_pool_link);
-
-	init_rcu_head(&obj->rcu);
-
-	obj->ops = ops;
-
-	reservation_object_init(&obj->__builtin_resv);
-	obj->resv = &obj->__builtin_resv;
-
-	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
-	i915_active_request_init(&obj->frontbuffer_write,
-				 NULL, frontbuffer_retire);
-
-	obj->mm.madv = I915_MADV_WILLNEED;
-	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
-	mutex_init(&obj->mm.get_page.lock);
-
-	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
-}
-
-static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
-	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
-		 I915_GEM_OBJECT_IS_SHRINKABLE,
-
-	.get_pages = i915_gem_object_get_pages_gtt,
-	.put_pages = i915_gem_object_put_pages_gtt,
-
-	.pwrite = i915_gem_object_pwrite_gtt,
-};
-
-static int i915_gem_object_create_shmem(struct drm_device *dev,
-					struct drm_gem_object *obj,
-					size_t size)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	unsigned long flags = VM_NORESERVE;
-	struct file *filp;
-
-	drm_gem_private_object_init(dev, obj, size);
-
-	if (i915->mm.gemfs)
-		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
-						 flags);
-	else
-		filp = shmem_file_setup("i915", size, flags);
-
-	if (IS_ERR(filp))
-		return PTR_ERR(filp);
-
-	obj->filp = filp;
-
-	return 0;
-}
-
-struct drm_i915_gem_object *
-i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
-{
-	struct drm_i915_gem_object *obj;
-	struct address_space *mapping;
-	unsigned int cache_level;
-	gfp_t mask;
-	int ret;
-
-	/* There is a prevalence of the assumption that we fit the object's
-	 * page count inside a 32bit _signed_ variable. Let's document this and
-	 * catch if we ever need to fix it. In the meantime, if you do spot
-	 * such a local variable, please consider fixing!
-	 */
-	if (size >> PAGE_SHIFT > INT_MAX)
-		return ERR_PTR(-E2BIG);
-
-	if (overflows_type(size, obj->base.size))
-		return ERR_PTR(-E2BIG);
-
-	obj = i915_gem_object_alloc();
-	if (obj == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
-	if (ret)
-		goto fail;
-
-	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
-	if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
-		/* 965gm cannot relocate objects above 4GiB. */
-		mask &= ~__GFP_HIGHMEM;
-		mask |= __GFP_DMA32;
-	}
-
-	mapping = obj->base.filp->f_mapping;
-	mapping_set_gfp_mask(mapping, mask);
-	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
-
-	i915_gem_object_init(obj, &i915_gem_object_ops);
-
-	obj->write_domain = I915_GEM_DOMAIN_CPU;
-	obj->read_domains = I915_GEM_DOMAIN_CPU;
-
-	if (HAS_LLC(dev_priv))
-		/* On some devices, we can have the GPU use the LLC (the CPU
-		 * cache) for about a 10% performance improvement
-		 * compared to uncached.  Graphics requests other than
-		 * display scanout are coherent with the CPU in
-		 * accessing this cache.  This means in this mode we
-		 * don't need to clflush on the CPU side, and on the
-		 * GPU side we only need to flush internal caches to
-		 * get data visible to the CPU.
-		 *
-		 * However, we maintain the display planes as UC, and so
-		 * need to rebind when first used as such.
-		 */
-		cache_level = I915_CACHE_LLC;
-	else
-		cache_level = I915_CACHE_NONE;
-
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-
-	trace_i915_gem_object_create(obj);
-
-	return obj;
-
-fail:
-	i915_gem_object_free(obj);
-	return ERR_PTR(ret);
-}
-
-static bool discard_backing_storage(struct drm_i915_gem_object *obj)
-{
-	/* If we are the last user of the backing storage (be it shmemfs
-	 * pages or stolen etc), we know that the pages are going to be
-	 * immediately released. In this case, we can then skip copying
-	 * back the contents from the GPU.
-	 */
-
-	if (obj->mm.madv != I915_MADV_WILLNEED)
-		return false;
-
-	if (obj->base.filp == NULL)
-		return true;
-
-	/* At first glance, this looks racy, but then again so would be
-	 * userspace racing mmap against close. However, the first external
-	 * reference to the filp can only be obtained through the
-	 * i915_gem_mmap_ioctl() which safeguards us against the user
-	 * acquiring such a reference whilst we are in the middle of
-	 * freeing the object.
-	 */
-	return atomic_long_read(&obj->base.filp->f_count) == 1;
-}
-
-static void __i915_gem_free_objects(struct drm_i915_private *i915,
-				    struct llist_node *freed)
-{
-	struct drm_i915_gem_object *obj, *on;
-	intel_wakeref_t wakeref;
-
-	wakeref = intel_runtime_pm_get(i915);
-	llist_for_each_entry_safe(obj, on, freed, freed) {
-		struct i915_vma *vma, *vn;
-
-		trace_i915_gem_object_destroy(obj);
-
-		mutex_lock(&i915->drm.struct_mutex);
-
-		GEM_BUG_ON(i915_gem_object_is_active(obj));
-		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
-			GEM_BUG_ON(i915_vma_is_active(vma));
-			vma->flags &= ~I915_VMA_PIN_MASK;
-			i915_vma_destroy(vma);
-		}
-		GEM_BUG_ON(!list_empty(&obj->vma.list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
-
-		/* This serializes freeing with the shrinker. Since the free
-		 * is delayed, first by RCU then by the workqueue, we want the
-		 * shrinker to be able to free pages of unreferenced objects,
-		 * or else we may oom whilst there are plenty of deferred
-		 * freed objects.
-		 */
-		if (i915_gem_object_has_pages(obj)) {
-			spin_lock(&i915->mm.obj_lock);
-			list_del_init(&obj->mm.link);
-			spin_unlock(&i915->mm.obj_lock);
-		}
-
-		mutex_unlock(&i915->drm.struct_mutex);
-
-		GEM_BUG_ON(obj->bind_count);
-		GEM_BUG_ON(obj->userfault_count);
-		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
-		GEM_BUG_ON(!list_empty(&obj->lut_list));
-
-		if (obj->ops->release)
-			obj->ops->release(obj);
-
-		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
-			atomic_set(&obj->mm.pages_pin_count, 0);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-		GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
-		if (obj->base.import_attach)
-			drm_prime_gem_destroy(&obj->base, NULL);
-
-		reservation_object_fini(&obj->__builtin_resv);
-		drm_gem_object_release(&obj->base);
-		i915_gem_info_remove_obj(i915, obj->base.size);
-
-		bitmap_free(obj->bit_17);
-		i915_gem_object_free(obj);
-
-		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
-		atomic_dec(&i915->mm.free_count);
-
-		if (on)
-			cond_resched();
-	}
-	intel_runtime_pm_put(i915, wakeref);
-}
-
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
-{
-	struct llist_node *freed;
-
-	/* Free the oldest, most stale object to keep the free_list short */
-	freed = NULL;
-	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
-		/* Only one consumer of llist_del_first() allowed */
-		spin_lock(&i915->mm.free_lock);
-		freed = llist_del_first(&i915->mm.free_list);
-		spin_unlock(&i915->mm.free_lock);
-	}
-	if (unlikely(freed)) {
-		freed->next = NULL;
-		__i915_gem_free_objects(i915, freed);
-	}
-}
-
-static void __i915_gem_free_work(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, struct drm_i915_private, mm.free_work);
-	struct llist_node *freed;
-
-	/*
-	 * All file-owned VMA should have been released by this point through
-	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
-	 * However, the object may also be bound into the global GTT (e.g.
-	 * older GPUs without per-process support, or for direct access through
-	 * the GTT either for the user or for scanout). Those VMA still need to
-	 * unbound now.
-	 */
-
-	spin_lock(&i915->mm.free_lock);
-	while ((freed = llist_del_all(&i915->mm.free_list))) {
-		spin_unlock(&i915->mm.free_lock);
-
-		__i915_gem_free_objects(i915, freed);
-		if (need_resched())
-			return;
-
-		spin_lock(&i915->mm.free_lock);
-	}
-	spin_unlock(&i915->mm.free_lock);
-}
-
-static void __i915_gem_free_object_rcu(struct rcu_head *head)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(head, typeof(*obj), rcu);
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-
-	/*
-	 * We reuse obj->rcu for the freed list, so we had better not treat
-	 * it like a rcu_head from this point forwards. And we expect all
-	 * objects to be freed via this path.
-	 */
-	destroy_rcu_head(&obj->rcu);
-
-	/*
-	 * Since we require blocking on struct_mutex to unbind the freed
-	 * object from the GPU before releasing resources back to the
-	 * system, we can not do that directly from the RCU callback (which may
-	 * be a softirq context), but must instead then defer that work onto a
-	 * kthread. We use the RCU callback rather than move the freed object
-	 * directly onto the work queue so that we can mix between using the
-	 * worker and performing frees directly from subsequent allocations for
-	 * crude but effective memory throttling.
-	 */
-	if (llist_add(&obj->freed, &i915->mm.free_list))
-		queue_work(i915->wq, &i915->mm.free_work);
-}
-
-void i915_gem_free_object(struct drm_gem_object *gem_obj)
-{
-	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-
-	if (obj->mm.quirked)
-		__i915_gem_object_unpin_pages(obj);
-
-	if (discard_backing_storage(obj))
-		obj->mm.madv = I915_MADV_DONTNEED;
-
-	/*
-	 * Before we free the object, make sure any pure RCU-only
-	 * read-side critical sections are complete, e.g.
-	 * i915_gem_busy_ioctl(). For the corresponding synchronized
-	 * lookup see i915_gem_object_lookup_rcu().
-	 */
-	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
-	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
-}
-
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!i915_gem_object_has_active_reference(obj) &&
-	    i915_gem_object_is_active(obj))
-		i915_gem_object_set_active_reference(obj);
-	else
-		i915_gem_object_put(obj);
-}
-
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
@@ -4725,7 +3960,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&i915->mm.fence_list);
 	INIT_LIST_HEAD(&i915->mm.userfault_list);
 
-	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+	i915_gem_init__objects(i915);
 }
 
 int i915_gem_init_early(struct drm_i915_private *dev_priv)
@@ -4891,57 +4126,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-/* Allocate a new GEM object and fill it with the supplied data */
-struct drm_i915_gem_object *
-i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
-			         const void *data, size_t size)
-{
-	struct drm_i915_gem_object *obj;
-	struct file *file;
-	size_t offset;
-	int err;
-
-	obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
-	if (IS_ERR(obj))
-		return obj;
-
-	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
-
-	file = obj->base.filp;
-	offset = 0;
-	do {
-		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
-		struct page *page;
-		void *pgdata, *vaddr;
-
-		err = pagecache_write_begin(file, file->f_mapping,
-					    offset, len, 0,
-					    &page, &pgdata);
-		if (err < 0)
-			goto fail;
-
-		vaddr = kmap(page);
-		memcpy(vaddr, data, len);
-		kunmap(page);
-
-		err = pagecache_write_end(file, file->f_mapping,
-					  offset, len, len,
-					  page, pgdata);
-		if (err < 0)
-			goto fail;
-
-		size -= len;
-		data += len;
-		offset += len;
-	} while (size);
-
-	return obj;
-
-fail:
-	i915_gem_object_put(obj);
-	return ERR_PTR(err);
-}
-
 struct scatterlist *
 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 		       unsigned int n,
@@ -5103,7 +4287,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	if (obj->ops == &i915_gem_phys_ops)
 		return 0;
 
-	if (obj->ops != &i915_gem_object_ops)
+	if (obj->ops != &i915_gem_shmem_ops)
 		return -EINVAL;
 
 	err = i915_gem_object_unbind(obj);
@@ -5139,12 +4323,12 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	__i915_gem_object_pin_pages(obj);
 
 	if (!IS_ERR_OR_NULL(pages))
-		i915_gem_object_ops.put_pages(obj, pages);
+		i915_gem_shmem_ops.put_pages(obj, pages);
 	mutex_unlock(&obj->mm.lock);
 	return 0;
 
 err_xfer:
-	obj->ops = &i915_gem_object_ops;
+	obj->ops = &i915_gem_shmem_ops;
 	if (!IS_ERR_OR_NULL(pages)) {
 		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c4995d5a16d2..379fd89a180f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1510,7 +1510,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
 	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
 	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
 
-	bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
+	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
 	if (IS_ERR(bo)) {
 		DRM_ERROR("Failed to allocate OA buffer\n");
 		ret = PTR_ERR(bo);
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 89db71996148..0d3a6fa674e6 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -144,7 +144,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	if (size * 2 < dev_priv->stolen_usable_size)
 		obj = i915_gem_object_create_stolen(dev_priv, size);
 	if (obj == NULL)
-		obj = i915_gem_object_create(dev_priv, size);
+		obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("failed to allocate framebuffer\n");
 		ret = PTR_ERR(obj);
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index c4ac29309fcc..98e45a500a15 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -690,7 +690,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
 	u64 flags;
 	int ret;
 
-	obj = i915_gem_object_create(dev_priv, size);
+	obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index b9cb6fea9332..3257a054cb0b 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -159,7 +159,8 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		goto fail;
 	}
 
-	obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
+	obj = i915_gem_object_create_shmem_from_data(dev_priv,
+						     fw->data, fw->size);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		DRM_DEBUG_DRIVER("%s fw object_create err=%d\n",
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 1e1f83326a96..ce4ec87698f6 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1390,7 +1390,7 @@ static int igt_ppgtt_gemfs_huge(void *arg)
 	for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
 		unsigned int size = sizes[i];
 
-		obj = i915_gem_object_create(i915, size);
+		obj = i915_gem_object_create_shmem(i915, size);
 		if (IS_ERR(obj))
 			return PTR_ERR(obj);
 
@@ -1568,7 +1568,7 @@ static int igt_tmpfs_fallback(void *arg)
 
 	i915->mm.gemfs = NULL;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto out_restore;
@@ -1628,7 +1628,7 @@ static int igt_shrink_thp(void *arg)
 		return 0;
 	}
 
-	obj = i915_gem_object_create(i915, SZ_2M);
+	obj = i915_gem_object_create_shmem(i915, SZ_2M);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
index 2b943ee246c9..cc65a503e2f0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
@@ -33,7 +33,7 @@ static int igt_dmabuf_export(void *arg)
 	struct drm_i915_gem_object *obj;
 	struct dma_buf *dmabuf;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -57,7 +57,7 @@ static int igt_dmabuf_import_self(void *arg)
 	struct dma_buf *dmabuf;
 	int err;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -232,7 +232,7 @@ static int igt_dmabuf_export_vmap(void *arg)
 	void *ptr;
 	int err;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -279,7 +279,7 @@ static int igt_dmabuf_export_kmap(void *arg)
 	void *ptr;
 	int err;
 
-	obj = i915_gem_object_create(i915, 2*PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 088b2aa05dcd..ff31fe0cecec 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -36,7 +36,7 @@ static int igt_gem_object(void *arg)
 
 	/* Basic test to ensure we can create an object */
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		pr_err("i915_gem_object_create failed, err=%d\n", err);
@@ -59,7 +59,7 @@ static int igt_phys_object(void *arg)
 	 * i.e. exercise the i915_gem_object_phys API.
 	 */
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		pr_err("i915_gem_object_create failed, err=%d\n", err);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 28/45] drm/i915: Move phys objects to its own file
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (25 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 27/45] drm/i915: Move shmem object setup to its own file Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 29/45] drm/i915: Move mmap and friends " Chris Wilson
                   ` (24 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Continuing the decluttering of i915_gem.c, this time the legacy physical
object.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  11 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     | 510 +++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_phys.c      | 212 ++++++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  61 ++
 .../drm/i915/gem/selftests/i915_gem_phys.c    |  80 ++
 drivers/gpu/drm/i915/i915_drv.h               |   2 -
 drivers/gpu/drm/i915/i915_gem.c               | 699 +-----------------
 drivers/gpu/drm/i915/i915_gem_shrinker.c      |  59 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  54 --
 .../drm/i915/selftests/i915_mock_selftests.h  |   1 +
 12 files changed, 885 insertions(+), 808 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_pages.c
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_phys.c
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 447143fadb05..84277b29389c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -88,6 +88,8 @@ i915-y += $(gt-y)
 obj-y += gem/
 gem-y += \
 	gem/i915_gem_object.o \
+	gem/i915_gem_pages.o \
+	gem/i915_gem_phys.o \
 	gem/i915_gem_shmem.o
 i915-y += \
 	  $(gem-y) \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index ad82303f741a..2e963a593245 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -33,11 +33,17 @@ void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				     struct sg_table *pages,
 				     bool needs_clflush);
 
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align);
+
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
 void i915_gem_free_object(struct drm_gem_object *obj);
 
 void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
  * @filp: DRM file private date
@@ -231,6 +237,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
 				 unsigned int sg_page_sizes);
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
 static inline int __must_check
@@ -286,7 +294,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
 
 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 				enum i915_mm_subclass subclass);
-void __i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
 
 enum i915_map_type {
 	I915_MAP_WB = 0,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index e4b50944f553..24abdd6999b5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -52,6 +52,8 @@ struct drm_i915_gem_object_ops {
 	int (*get_pages)(struct drm_i915_gem_object *obj);
 	void (*put_pages)(struct drm_i915_gem_object *obj,
 			  struct sg_table *pages);
+	void (*truncate)(struct drm_i915_gem_object *obj);
+	void (*writeback)(struct drm_i915_gem_object *obj);
 
 	int (*pwrite)(struct drm_i915_gem_object *obj,
 		      const struct drm_i915_gem_pwrite *arg);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
new file mode 100644
index 000000000000..452aba0a3359
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -0,0 +1,510 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned long supported = INTEL_INFO(i915)->page_sizes;
+	int i;
+
+	lockdep_assert_held(&obj->mm.lock);
+
+	/* Make the pages coherent with the GPU (flushing any swapin). */
+	if (obj->cache_dirty) {
+		obj->write_domain = 0;
+		if (i915_gem_object_has_struct_page(obj))
+			drm_clflush_sg(pages);
+		obj->cache_dirty = false;
+	}
+
+	obj->mm.get_page.sg_pos = pages->sgl;
+	obj->mm.get_page.sg_idx = 0;
+
+	obj->mm.pages = pages;
+
+	if (i915_gem_object_is_tiled(obj) &&
+	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+		GEM_BUG_ON(obj->mm.quirked);
+		__i915_gem_object_pin_pages(obj);
+		obj->mm.quirked = true;
+	}
+
+	GEM_BUG_ON(!sg_page_sizes);
+	obj->mm.page_sizes.phys = sg_page_sizes;
+
+	/*
+	 * Calculate the supported page-sizes which fit into the given
+	 * sg_page_sizes. This will give us the page-sizes which we may be able
+	 * to use opportunistically when later inserting into the GTT. For
+	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
+	 * 64K or 4K pages, although in practice this will depend on a number of
+	 * other factors.
+	 */
+	obj->mm.page_sizes.sg = 0;
+	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		if (obj->mm.page_sizes.phys & ~0u << i)
+			obj->mm.page_sizes.sg |= BIT(i);
+	}
+	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
+
+	spin_lock(&i915->mm.obj_lock);
+	list_add(&obj->mm.link, &i915->mm.unbound_list);
+	spin_unlock(&i915->mm.obj_lock);
+}
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
+		DRM_DEBUG("Attempting to obtain a purgeable object\n");
+		return -EFAULT;
+	}
+
+	err = obj->ops->get_pages(obj);
+	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
+
+	return err;
+}
+
+/* Ensure that the associated pages are gathered from the backing storage
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
+ * multiple times before they are released by a single call to
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
+ * either as a result of memory pressure (reaping pages under the shrinker)
+ * or as the object is itself released.
+ */
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return err;
+
+	if (unlikely(!i915_gem_object_has_pages(obj))) {
+		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+		err = ____i915_gem_object_get_pages(obj);
+		if (err)
+			goto unlock;
+
+		smp_mb__before_atomic();
+	}
+	atomic_inc(&obj->mm.pages_pin_count);
+
+unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+/* Immediately discard the backing storage */
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+{
+	drm_gem_free_mmap_offset(&obj->base);
+	if (obj->ops->truncate)
+		obj->ops->truncate(obj);
+}
+
+/* Try to discard unwanted pages */
+void i915_gem_object_writeback(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->mm.lock);
+	GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+	if (obj->ops->writeback)
+		obj->ops->writeback(obj);
+}
+
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
+		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+	rcu_read_unlock();
+}
+
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *pages;
+
+	pages = fetch_and_zero(&obj->mm.pages);
+	if (IS_ERR_OR_NULL(pages))
+		return pages;
+
+	spin_lock(&i915->mm.obj_lock);
+	list_del(&obj->mm.link);
+	spin_unlock(&i915->mm.obj_lock);
+
+	if (obj->mm.mapping) {
+		void *ptr;
+
+		ptr = page_mask_bits(obj->mm.mapping);
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		obj->mm.mapping = NULL;
+	}
+
+	__i915_gem_object_reset_page_iter(obj);
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
+	return pages;
+}
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (i915_gem_object_has_pinned_pages(obj))
+		return -EBUSY;
+
+	GEM_BUG_ON(obj->bind_count);
+
+	/* May be called by shrinker from within get_pages() (on another bo) */
+	mutex_lock_nested(&obj->mm.lock, subclass);
+	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	/*
+	 * ->put_pages might need to allocate memory for the bit17 swizzle
+	 * array, hence protect them from being reaped by removing them from gtt
+	 * lists early.
+	 */
+	pages = __i915_gem_object_unset_pages(obj);
+
+	/*
+	 * XXX Temporary hijinx to avoid updating all backends to handle
+	 * NULL pages. In the future, when we have more asynchronous
+	 * get_pages backends we should be better able to handle the
+	 * cancellation of the async task in a more uniform manner.
+	 */
+	if (!pages && !i915_gem_object_needs_async_cancel(obj))
+		pages = ERR_PTR(-EINVAL);
+
+	if (!IS_ERR(pages))
+		obj->ops->put_pages(obj, pages);
+
+	err = 0;
+unlock:
+	mutex_unlock(&obj->mm.lock);
+
+	return err;
+}
+
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+				 enum i915_map_type type)
+{
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *sgt = obj->mm.pages;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	struct page *stack_pages[32];
+	struct page **pages = stack_pages;
+	unsigned long i = 0;
+	pgprot_t pgprot;
+	void *addr;
+
+	/* A single page can always be kmapped */
+	if (n_pages == 1 && type == I915_MAP_WB)
+		return kmap(sg_page(sgt->sgl));
+
+	if (n_pages > ARRAY_SIZE(stack_pages)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+		if (!pages)
+			return NULL;
+	}
+
+	for_each_sgt_page(page, sgt_iter, sgt)
+		pages[i++] = page;
+
+	/* Check that we have the expected number of pages */
+	GEM_BUG_ON(i != n_pages);
+
+	switch (type) {
+	default:
+		MISSING_CASE(type);
+		/* fallthrough to use PAGE_KERNEL anyway */
+	case I915_MAP_WB:
+		pgprot = PAGE_KERNEL;
+		break;
+	case I915_MAP_WC:
+		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+		break;
+	}
+	addr = vmap(pages, n_pages, 0, pgprot);
+
+	if (pages != stack_pages)
+		kvfree(pages);
+
+	return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+			      enum i915_map_type type)
+{
+	enum i915_map_type has_type;
+	bool pinned;
+	void *ptr;
+	int err;
+
+	if (unlikely(!i915_gem_object_has_struct_page(obj)))
+		return ERR_PTR(-ENXIO);
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return ERR_PTR(err);
+
+	pinned = !(type & I915_MAP_OVERRIDE);
+	type &= ~I915_MAP_OVERRIDE;
+
+	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+		if (unlikely(!i915_gem_object_has_pages(obj))) {
+			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+			err = ____i915_gem_object_get_pages(obj);
+			if (err)
+				goto err_unlock;
+
+			smp_mb__before_atomic();
+		}
+		atomic_inc(&obj->mm.pages_pin_count);
+		pinned = false;
+	}
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (ptr && has_type != type) {
+		if (pinned) {
+			err = -EBUSY;
+			goto err_unpin;
+		}
+
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		ptr = obj->mm.mapping = NULL;
+	}
+
+	if (!ptr) {
+		ptr = i915_gem_object_map(obj, type);
+		if (!ptr) {
+			err = -ENOMEM;
+			goto err_unpin;
+		}
+
+		obj->mm.mapping = page_pack_bits(ptr, type);
+	}
+
+out_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return ptr;
+
+err_unpin:
+	atomic_dec(&obj->mm.pages_pin_count);
+err_unlock:
+	ptr = ERR_PTR(err);
+	goto out_unlock;
+}
+
+void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
+				 unsigned long offset,
+				 unsigned long size)
+{
+	enum i915_map_type has_type;
+	void *ptr;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
+				     offset, size, obj->base.size));
+
+	obj->mm.dirty = true;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
+		return;
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (has_type == I915_MAP_WC)
+		return;
+
+	drm_clflush_virt_range(ptr + offset, size);
+	if (size == obj->base.size) {
+		obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
+		obj->cache_dirty = false;
+	}
+}
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n,
+		       unsigned int *offset)
+{
+	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
+	struct scatterlist *sg;
+	unsigned int idx, count;
+
+	might_sleep();
+	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	/* As we iterate forward through the sg, we record each entry in a
+	 * radixtree for quick repeated (backwards) lookups. If we have seen
+	 * this index previously, we will have an entry for it.
+	 *
+	 * Initial lookup is O(N), but this is amortized to O(1) for
+	 * sequential page access (where each new request is consecutive
+	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
+	 * i.e. O(1) with a large constant!
+	 */
+	if (n < READ_ONCE(iter->sg_idx))
+		goto lookup;
+
+	mutex_lock(&iter->lock);
+
+	/* We prefer to reuse the last sg so that repeated lookup of this
+	 * (or the subsequent) sg are fast - comparing against the last
+	 * sg is faster than going through the radixtree.
+	 */
+
+	sg = iter->sg_pos;
+	idx = iter->sg_idx;
+	count = __sg_page_count(sg);
+
+	while (idx + count <= n) {
+		void *entry;
+		unsigned long i;
+		int ret;
+
+		/* If we cannot allocate and insert this entry, or the
+		 * individual pages from this range, cancel updating the
+		 * sg_idx so that on this lookup we are forced to linearly
+		 * scan onwards, but on future lookups we will try the
+		 * insertion again (in which case we need to be careful of
+		 * the error return reporting that we have already inserted
+		 * this index).
+		 */
+		ret = radix_tree_insert(&iter->radix, idx, sg);
+		if (ret && ret != -EEXIST)
+			goto scan;
+
+		entry = xa_mk_value(idx);
+		for (i = 1; i < count; i++) {
+			ret = radix_tree_insert(&iter->radix, idx + i, entry);
+			if (ret && ret != -EEXIST)
+				goto scan;
+		}
+
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+scan:
+	iter->sg_pos = sg;
+	iter->sg_idx = idx;
+
+	mutex_unlock(&iter->lock);
+
+	if (unlikely(n < idx)) /* insertion completed by another thread */
+		goto lookup;
+
+	/* In case we failed to insert the entry into the radixtree, we need
+	 * to look beyond the current sg.
+	 */
+	while (idx + count <= n) {
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+	*offset = n - idx;
+	return sg;
+
+lookup:
+	rcu_read_lock();
+
+	sg = radix_tree_lookup(&iter->radix, n);
+	GEM_BUG_ON(!sg);
+
+	/* If this index is in the middle of multi-page sg entry,
+	 * the radix tree will contain a value entry that points
+	 * to the start of that range. We will return the pointer to
+	 * the base page and the offset of this page within the
+	 * sg entry's range.
+	 */
+	*offset = 0;
+	if (unlikely(xa_is_value(sg))) {
+		unsigned long base = xa_to_value(sg);
+
+		sg = radix_tree_lookup(&iter->radix, base);
+		GEM_BUG_ON(!sg);
+
+		*offset = n - base;
+	}
+
+	rcu_read_unlock();
+
+	return sg;
+}
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n)
+{
+	struct page *page;
+
+	page = i915_gem_object_get_page(obj, n);
+	if (!obj->mm.dirty)
+		set_page_dirty(page);
+
+	return page;
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
new file mode 100644
index 000000000000..1bf3e0afcba2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -0,0 +1,212 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/highmem.h>
+#include <linux/shmem_fs.h>
+#include <linux/swap.h>
+
+#include <drm/drm.h> /* for drm_legacy.h! */
+#include <drm/drm_cache.h>
+#include <drm/drm_legacy.h> /* for drm_pci.h! */
+#include <drm/drm_pci.h>
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	struct drm_dma_handle *phys;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	char *vaddr;
+	int i;
+	int err;
+
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	/* Always aligning to the object size, allows a single allocation
+	 * to handle all possible callers, and given typical object sizes,
+	 * the alignment of the buddy allocation will naturally match.
+	 */
+	phys = drm_pci_alloc(obj->base.dev,
+			     roundup_pow_of_two(obj->base.size),
+			     roundup_pow_of_two(obj->base.size));
+	if (!phys)
+		return -ENOMEM;
+
+	vaddr = phys->vaddr;
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto err_phys;
+		}
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		put_page(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	i915_gem_chipset_flush(to_i915(obj->base.dev));
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st) {
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
+
+	sg_dma_address(sg) = phys->busaddr;
+	sg_dma_len(sg) = obj->base.size;
+
+	obj->phys_handle = phys;
+
+	__i915_gem_object_set_pages(obj, st, sg->length);
+
+	return 0;
+
+err_phys:
+	drm_pci_free(obj->base.dev, phys);
+
+	return err;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages)
+{
+	__i915_gem_object_release_shmem(obj, pages, false);
+
+	if (obj->mm.dirty) {
+		struct address_space *mapping = obj->base.filp->f_mapping;
+		char *vaddr = obj->phys_handle->vaddr;
+		int i;
+
+		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->mm.madv == I915_MADV_WILLNEED)
+				mark_page_accessed(page);
+			put_page(page);
+			vaddr += PAGE_SIZE;
+		}
+		obj->mm.dirty = false;
+	}
+
+	sg_free_table(pages);
+	kfree(pages);
+
+	drm_pci_free(obj->base.dev, obj->phys_handle);
+}
+
+static void
+i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+	.release = i915_gem_object_release_phys,
+};
+
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (align > obj->base.size)
+		return -EINVAL;
+
+	if (obj->ops == &i915_gem_phys_ops)
+		return 0;
+
+	if (obj->ops != &i915_gem_shmem_ops)
+		return -EINVAL;
+
+	err = i915_gem_object_unbind(obj);
+	if (err)
+		return err;
+
+	mutex_lock(&obj->mm.lock);
+
+	if (obj->mm.madv != I915_MADV_WILLNEED) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.quirked) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.mapping) {
+		err = -EBUSY;
+		goto err_unlock;
+	}
+
+	pages = __i915_gem_object_unset_pages(obj);
+
+	obj->ops = &i915_gem_phys_ops;
+
+	err = ____i915_gem_object_get_pages(obj);
+	if (err)
+		goto err_xfer;
+
+	/* Perma-pin (until release) the physical set of pages */
+	__i915_gem_object_pin_pages(obj);
+
+	if (!IS_ERR_OR_NULL(pages))
+		i915_gem_shmem_ops.put_pages(obj, pages);
+	mutex_unlock(&obj->mm.lock);
+	return 0;
+
+err_xfer:
+	obj->ops = &i915_gem_shmem_ops;
+	if (!IS_ERR_OR_NULL(pages)) {
+		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
+
+		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+	}
+err_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_phys.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 9d885b5e6643..78df86d1db7a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -214,6 +214,65 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
+static void
+shmem_truncate(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * Our goal here is to return as much of the memory as
+	 * is possible back to the system as we are called from OOM.
+	 * To do this we must instruct the shmfs to drop all of its
+	 * backing pages, *now*.
+	 */
+	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
+	obj->mm.madv = __I915_MADV_PURGED;
+	obj->mm.pages = ERR_PTR(-EFAULT);
+}
+
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+	struct address_space *mapping;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_NONE,
+		.nr_to_write = SWAP_CLUSTER_MAX,
+		.range_start = 0,
+		.range_end = LLONG_MAX,
+		.for_reclaim = 1,
+	};
+	unsigned long i;
+
+	/*
+	 * Leave mmapings intact (GTT will have been revoked on unbinding,
+	 * leaving only CPU mmapings around) and add those pages to the LRU
+	 * instead of invoking writeback so they are aged and paged out
+	 * as normal.
+	 */
+	mapping = obj->base.filp->f_mapping;
+
+	/* Begin writeback on each dirty page */
+	for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
+		struct page *page;
+
+		page = find_lock_entry(mapping, i);
+		if (!page || xa_is_value(page))
+			continue;
+
+		if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
+			int ret;
+
+			SetPageReclaim(page);
+			ret = mapping->a_ops->writepage(page, &wbc);
+			if (!PageWriteback(page))
+				ClearPageReclaim(page);
+			if (!ret)
+				goto put;
+		}
+		unlock_page(page);
+put:
+		put_page(page);
+	}
+}
+
 void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				struct sg_table *pages,
@@ -363,6 +422,8 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 
 	.get_pages = shmem_get_pages,
 	.put_pages = shmem_put_pages,
+	.truncate = shmem_truncate,
+	.writeback = shmem_writeback,
 
 	.pwrite = shmem_pwrite,
 };
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
new file mode 100644
index 000000000000..b76b503b3999
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "../../i915_selftest.h"
+
+#include "../../selftests/mock_gem_device.h"
+
+static int mock_phys_object(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	/* Create an object and bind it to a contiguous set of physical pages,
+	 * i.e. exercise the i915_gem_object_phys API.
+	 */
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("i915_gem_object_create failed, err=%d\n", err);
+		goto out;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (err) {
+		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
+		goto out_obj;
+	}
+
+	if (obj->ops != &i915_gem_phys_ops) {
+		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	if (!atomic_read(&obj->mm.pages_pin_count)) {
+		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	/* Make the object dirty so that put_pages must do copy back the data */
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (err) {
+		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
+		       err);
+		goto out_obj;
+	}
+
+out_obj:
+	i915_gem_object_put(obj);
+out:
+	return err;
+}
+
+int i915_gem_phys_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_phys_object),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	drm_dev_put(&i915->drm);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 007334e892a3..ac9723466488 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2975,8 +2975,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     const struct i915_ggtt_view *view,
 				     unsigned int flags);
 void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
-int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
-				int align);
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4c5fba0c7bf6..0549b73f01ff 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -26,7 +26,6 @@
  */
 
 #include <drm/drm_vma_manager.h>
-#include <drm/drm_pci.h>
 #include <drm/i915_drm.h>
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
@@ -98,133 +97,6 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
-{
-	struct address_space *mapping = obj->base.filp->f_mapping;
-	drm_dma_handle_t *phys;
-	struct sg_table *st;
-	struct scatterlist *sg;
-	char *vaddr;
-	int i;
-	int err;
-
-	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
-		return -EINVAL;
-
-	/* Always aligning to the object size, allows a single allocation
-	 * to handle all possible callers, and given typical object sizes,
-	 * the alignment of the buddy allocation will naturally match.
-	 */
-	phys = drm_pci_alloc(obj->base.dev,
-			     roundup_pow_of_two(obj->base.size),
-			     roundup_pow_of_two(obj->base.size));
-	if (!phys)
-		return -ENOMEM;
-
-	vaddr = phys->vaddr;
-	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-		struct page *page;
-		char *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page)) {
-			err = PTR_ERR(page);
-			goto err_phys;
-		}
-
-		src = kmap_atomic(page);
-		memcpy(vaddr, src, PAGE_SIZE);
-		drm_clflush_virt_range(vaddr, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		put_page(page);
-		vaddr += PAGE_SIZE;
-	}
-
-	i915_gem_chipset_flush(to_i915(obj->base.dev));
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st) {
-		err = -ENOMEM;
-		goto err_phys;
-	}
-
-	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
-		kfree(st);
-		err = -ENOMEM;
-		goto err_phys;
-	}
-
-	sg = st->sgl;
-	sg->offset = 0;
-	sg->length = obj->base.size;
-
-	sg_dma_address(sg) = phys->busaddr;
-	sg_dma_len(sg) = obj->base.size;
-
-	obj->phys_handle = phys;
-
-	__i915_gem_object_set_pages(obj, st, sg->length);
-
-	return 0;
-
-err_phys:
-	drm_pci_free(obj->base.dev, phys);
-
-	return err;
-}
-
-static void
-i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
-			       struct sg_table *pages)
-{
-	__i915_gem_object_release_shmem(obj, pages, false);
-
-	if (obj->mm.dirty) {
-		struct address_space *mapping = obj->base.filp->f_mapping;
-		char *vaddr = obj->phys_handle->vaddr;
-		int i;
-
-		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-			struct page *page;
-			char *dst;
-
-			page = shmem_read_mapping_page(mapping, i);
-			if (IS_ERR(page))
-				continue;
-
-			dst = kmap_atomic(page);
-			drm_clflush_virt_range(vaddr, PAGE_SIZE);
-			memcpy(dst, vaddr, PAGE_SIZE);
-			kunmap_atomic(dst);
-
-			set_page_dirty(page);
-			if (obj->mm.madv == I915_MADV_WILLNEED)
-				mark_page_accessed(page);
-			put_page(page);
-			vaddr += PAGE_SIZE;
-		}
-		obj->mm.dirty = false;
-	}
-
-	sg_free_table(pages);
-	kfree(pages);
-
-	drm_pci_free(obj->base.dev, obj->phys_handle);
-}
-
-static void
-i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
-static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
-	.get_pages = i915_gem_object_get_pages_phys,
-	.put_pages = i915_gem_object_put_pages_phys,
-	.release = i915_gem_object_release_phys,
-};
-
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -1937,11 +1809,6 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 	return err;
 }
 
-static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	drm_gem_free_mmap_offset(&obj->base);
-}
-
 int
 i915_gem_mmap_gtt(struct drm_file *file,
 		  struct drm_device *dev,
@@ -1987,111 +1854,6 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
 }
 
-/* Immediately discard the backing storage */
-void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_free_mmap_offset(obj);
-
-	if (obj->base.filp == NULL)
-		return;
-
-	/* Our goal here is to return as much of the memory as
-	 * is possible back to the system as we are called from OOM.
-	 * To do this we must instruct the shmfs to drop all of its
-	 * backing pages, *now*.
-	 */
-	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
-	obj->mm.madv = __I915_MADV_PURGED;
-	obj->mm.pages = ERR_PTR(-EFAULT);
-}
-
-static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
-{
-	struct radix_tree_iter iter;
-	void __rcu **slot;
-
-	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
-		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
-	rcu_read_unlock();
-}
-
-static struct sg_table *
-__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct sg_table *pages;
-
-	pages = fetch_and_zero(&obj->mm.pages);
-	if (IS_ERR_OR_NULL(pages))
-		return pages;
-
-	spin_lock(&i915->mm.obj_lock);
-	list_del(&obj->mm.link);
-	spin_unlock(&i915->mm.obj_lock);
-
-	if (obj->mm.mapping) {
-		void *ptr;
-
-		ptr = page_mask_bits(obj->mm.mapping);
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
-
-		obj->mm.mapping = NULL;
-	}
-
-	__i915_gem_object_reset_page_iter(obj);
-	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
-
-	return pages;
-}
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass)
-{
-	struct sg_table *pages;
-	int ret;
-
-	if (i915_gem_object_has_pinned_pages(obj))
-		return -EBUSY;
-
-	GEM_BUG_ON(obj->bind_count);
-
-	/* May be called by shrinker from within get_pages() (on another bo) */
-	mutex_lock_nested(&obj->mm.lock, subclass);
-	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
-		ret = -EBUSY;
-		goto unlock;
-	}
-
-	/*
-	 * ->put_pages might need to allocate memory for the bit17 swizzle
-	 * array, hence protect them from being reaped by removing them from gtt
-	 * lists early.
-	 */
-	pages = __i915_gem_object_unset_pages(obj);
-
-	/*
-	 * XXX Temporary hijinx to avoid updating all backends to handle
-	 * NULL pages. In the future, when we have more asynchronous
-	 * get_pages backends we should be better able to handle the
-	 * cancellation of the async task in a more uniform manner.
-	 */
-	if (!pages && !i915_gem_object_needs_async_cancel(obj))
-		pages = ERR_PTR(-EINVAL);
-
-	if (!IS_ERR(pages))
-		obj->ops->put_pages(obj, pages);
-
-	ret = 0;
-unlock:
-	mutex_unlock(&obj->mm.lock);
-
-	return ret;
-}
-
 bool i915_sg_trim(struct sg_table *orig_st)
 {
 	struct sg_table new_st;
@@ -2120,252 +1882,6 @@ bool i915_sg_trim(struct sg_table *orig_st)
 	return true;
 }
 
-void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages,
-				 unsigned int sg_page_sizes)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	unsigned long supported = INTEL_INFO(i915)->page_sizes;
-	int i;
-
-	lockdep_assert_held(&obj->mm.lock);
-
-	/* Make the pages coherent with the GPU (flushing any swapin). */
-	if (obj->cache_dirty) {
-		obj->write_domain = 0;
-		if (i915_gem_object_has_struct_page(obj))
-			drm_clflush_sg(pages);
-		obj->cache_dirty = false;
-	}
-
-	obj->mm.get_page.sg_pos = pages->sgl;
-	obj->mm.get_page.sg_idx = 0;
-
-	obj->mm.pages = pages;
-
-	if (i915_gem_object_is_tiled(obj) &&
-	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
-		GEM_BUG_ON(obj->mm.quirked);
-		__i915_gem_object_pin_pages(obj);
-		obj->mm.quirked = true;
-	}
-
-	GEM_BUG_ON(!sg_page_sizes);
-	obj->mm.page_sizes.phys = sg_page_sizes;
-
-	/*
-	 * Calculate the supported page-sizes which fit into the given
-	 * sg_page_sizes. This will give us the page-sizes which we may be able
-	 * to use opportunistically when later inserting into the GTT. For
-	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
-	 * 64K or 4K pages, although in practice this will depend on a number of
-	 * other factors.
-	 */
-	obj->mm.page_sizes.sg = 0;
-	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
-		if (obj->mm.page_sizes.phys & ~0u << i)
-			obj->mm.page_sizes.sg |= BIT(i);
-	}
-	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
-
-	spin_lock(&i915->mm.obj_lock);
-	list_add(&obj->mm.link, &i915->mm.unbound_list);
-	spin_unlock(&i915->mm.obj_lock);
-}
-
-static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
-{
-	int err;
-
-	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
-		DRM_DEBUG("Attempting to obtain a purgeable object\n");
-		return -EFAULT;
-	}
-
-	err = obj->ops->get_pages(obj);
-	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
-
-	return err;
-}
-
-/* Ensure that the associated pages are gathered from the backing storage
- * and pinned into our object. i915_gem_object_pin_pages() may be called
- * multiple times before they are released by a single call to
- * i915_gem_object_unpin_pages() - once the pages are no longer referenced
- * either as a result of memory pressure (reaping pages under the shrinker)
- * or as the object is itself released.
- */
-int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
-{
-	int err;
-
-	err = mutex_lock_interruptible(&obj->mm.lock);
-	if (err)
-		return err;
-
-	if (unlikely(!i915_gem_object_has_pages(obj))) {
-		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
-
-		err = ____i915_gem_object_get_pages(obj);
-		if (err)
-			goto unlock;
-
-		smp_mb__before_atomic();
-	}
-	atomic_inc(&obj->mm.pages_pin_count);
-
-unlock:
-	mutex_unlock(&obj->mm.lock);
-	return err;
-}
-
-/* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
-				 enum i915_map_type type)
-{
-	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
-	struct sg_table *sgt = obj->mm.pages;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	struct page *stack_pages[32];
-	struct page **pages = stack_pages;
-	unsigned long i = 0;
-	pgprot_t pgprot;
-	void *addr;
-
-	/* A single page can always be kmapped */
-	if (n_pages == 1 && type == I915_MAP_WB)
-		return kmap(sg_page(sgt->sgl));
-
-	if (n_pages > ARRAY_SIZE(stack_pages)) {
-		/* Too big for stack -- allocate temporary array instead */
-		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
-		if (!pages)
-			return NULL;
-	}
-
-	for_each_sgt_page(page, sgt_iter, sgt)
-		pages[i++] = page;
-
-	/* Check that we have the expected number of pages */
-	GEM_BUG_ON(i != n_pages);
-
-	switch (type) {
-	default:
-		MISSING_CASE(type);
-		/* fallthrough to use PAGE_KERNEL anyway */
-	case I915_MAP_WB:
-		pgprot = PAGE_KERNEL;
-		break;
-	case I915_MAP_WC:
-		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
-		break;
-	}
-	addr = vmap(pages, n_pages, 0, pgprot);
-
-	if (pages != stack_pages)
-		kvfree(pages);
-
-	return addr;
-}
-
-/* get, pin, and map the pages of the object into kernel space */
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
-			      enum i915_map_type type)
-{
-	enum i915_map_type has_type;
-	bool pinned;
-	void *ptr;
-	int ret;
-
-	if (unlikely(!i915_gem_object_has_struct_page(obj)))
-		return ERR_PTR(-ENXIO);
-
-	ret = mutex_lock_interruptible(&obj->mm.lock);
-	if (ret)
-		return ERR_PTR(ret);
-
-	pinned = !(type & I915_MAP_OVERRIDE);
-	type &= ~I915_MAP_OVERRIDE;
-
-	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-		if (unlikely(!i915_gem_object_has_pages(obj))) {
-			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
-
-			ret = ____i915_gem_object_get_pages(obj);
-			if (ret)
-				goto err_unlock;
-
-			smp_mb__before_atomic();
-		}
-		atomic_inc(&obj->mm.pages_pin_count);
-		pinned = false;
-	}
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-
-	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
-	if (ptr && has_type != type) {
-		if (pinned) {
-			ret = -EBUSY;
-			goto err_unpin;
-		}
-
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
-
-		ptr = obj->mm.mapping = NULL;
-	}
-
-	if (!ptr) {
-		ptr = i915_gem_object_map(obj, type);
-		if (!ptr) {
-			ret = -ENOMEM;
-			goto err_unpin;
-		}
-
-		obj->mm.mapping = page_pack_bits(ptr, type);
-	}
-
-out_unlock:
-	mutex_unlock(&obj->mm.lock);
-	return ptr;
-
-err_unpin:
-	atomic_dec(&obj->mm.pages_pin_count);
-err_unlock:
-	ptr = ERR_PTR(ret);
-	goto out_unlock;
-}
-
-void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
-				 unsigned long offset,
-				 unsigned long size)
-{
-	enum i915_map_type has_type;
-	void *ptr;
-
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-	GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
-				     offset, size, obj->base.size));
-
-	obj->mm.dirty = true;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
-		return;
-
-	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
-	if (has_type == I915_MAP_WC)
-		return;
-
-	drm_clflush_virt_range(ptr + offset, size);
-	if (size == obj->base.size) {
-		obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
-		obj->cache_dirty = false;
-	}
-}
-
 static unsigned long to_wait_timeout(s64 timeout_ns)
 {
 	if (timeout_ns < 0)
@@ -3354,7 +2870,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	/* if the object is no longer attached, discard its backing storage */
 	if (obj->mm.madv == I915_MADV_DONTNEED &&
 	    !i915_gem_object_has_pages(obj))
-		__i915_gem_object_truncate(obj);
+		i915_gem_object_truncate(obj);
 
 	args->retained = obj->mm.madv != __I915_MADV_PURGED;
 	mutex_unlock(&obj->mm.lock);
@@ -4126,219 +3642,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n,
-		       unsigned int *offset)
-{
-	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
-	struct scatterlist *sg;
-	unsigned int idx, count;
-
-	might_sleep();
-	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	/* As we iterate forward through the sg, we record each entry in a
-	 * radixtree for quick repeated (backwards) lookups. If we have seen
-	 * this index previously, we will have an entry for it.
-	 *
-	 * Initial lookup is O(N), but this is amortized to O(1) for
-	 * sequential page access (where each new request is consecutive
-	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
-	 * i.e. O(1) with a large constant!
-	 */
-	if (n < READ_ONCE(iter->sg_idx))
-		goto lookup;
-
-	mutex_lock(&iter->lock);
-
-	/* We prefer to reuse the last sg so that repeated lookup of this
-	 * (or the subsequent) sg are fast - comparing against the last
-	 * sg is faster than going through the radixtree.
-	 */
-
-	sg = iter->sg_pos;
-	idx = iter->sg_idx;
-	count = __sg_page_count(sg);
-
-	while (idx + count <= n) {
-		void *entry;
-		unsigned long i;
-		int ret;
-
-		/* If we cannot allocate and insert this entry, or the
-		 * individual pages from this range, cancel updating the
-		 * sg_idx so that on this lookup we are forced to linearly
-		 * scan onwards, but on future lookups we will try the
-		 * insertion again (in which case we need to be careful of
-		 * the error return reporting that we have already inserted
-		 * this index).
-		 */
-		ret = radix_tree_insert(&iter->radix, idx, sg);
-		if (ret && ret != -EEXIST)
-			goto scan;
-
-		entry = xa_mk_value(idx);
-		for (i = 1; i < count; i++) {
-			ret = radix_tree_insert(&iter->radix, idx + i, entry);
-			if (ret && ret != -EEXIST)
-				goto scan;
-		}
-
-		idx += count;
-		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
-	}
-
-scan:
-	iter->sg_pos = sg;
-	iter->sg_idx = idx;
-
-	mutex_unlock(&iter->lock);
-
-	if (unlikely(n < idx)) /* insertion completed by another thread */
-		goto lookup;
-
-	/* In case we failed to insert the entry into the radixtree, we need
-	 * to look beyond the current sg.
-	 */
-	while (idx + count <= n) {
-		idx += count;
-		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
-	}
-
-	*offset = n - idx;
-	return sg;
-
-lookup:
-	rcu_read_lock();
-
-	sg = radix_tree_lookup(&iter->radix, n);
-	GEM_BUG_ON(!sg);
-
-	/* If this index is in the middle of multi-page sg entry,
-	 * the radix tree will contain a value entry that points
-	 * to the start of that range. We will return the pointer to
-	 * the base page and the offset of this page within the
-	 * sg entry's range.
-	 */
-	*offset = 0;
-	if (unlikely(xa_is_value(sg))) {
-		unsigned long base = xa_to_value(sg);
-
-		sg = radix_tree_lookup(&iter->radix, base);
-		GEM_BUG_ON(!sg);
-
-		*offset = n - base;
-	}
-
-	rcu_read_unlock();
-
-	return sg;
-}
-
-struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
-{
-	struct scatterlist *sg;
-	unsigned int offset;
-
-	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
-
-	sg = i915_gem_object_get_sg(obj, n, &offset);
-	return nth_page(sg_page(sg), offset);
-}
-
-/* Like i915_gem_object_get_page(), but mark the returned page dirty */
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
-			       unsigned int n)
-{
-	struct page *page;
-
-	page = i915_gem_object_get_page(obj, n);
-	if (!obj->mm.dirty)
-		set_page_dirty(page);
-
-	return page;
-}
-
-dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
-				unsigned long n)
-{
-	struct scatterlist *sg;
-	unsigned int offset;
-
-	sg = i915_gem_object_get_sg(obj, n, &offset);
-	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
-}
-
-int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
-{
-	struct sg_table *pages;
-	int err;
-
-	if (align > obj->base.size)
-		return -EINVAL;
-
-	if (obj->ops == &i915_gem_phys_ops)
-		return 0;
-
-	if (obj->ops != &i915_gem_shmem_ops)
-		return -EINVAL;
-
-	err = i915_gem_object_unbind(obj);
-	if (err)
-		return err;
-
-	mutex_lock(&obj->mm.lock);
-
-	if (obj->mm.madv != I915_MADV_WILLNEED) {
-		err = -EFAULT;
-		goto err_unlock;
-	}
-
-	if (obj->mm.quirked) {
-		err = -EFAULT;
-		goto err_unlock;
-	}
-
-	if (obj->mm.mapping) {
-		err = -EBUSY;
-		goto err_unlock;
-	}
-
-	pages = __i915_gem_object_unset_pages(obj);
-
-	obj->ops = &i915_gem_phys_ops;
-
-	err = ____i915_gem_object_get_pages(obj);
-	if (err)
-		goto err_xfer;
-
-	/* Perma-pin (until release) the physical set of pages */
-	__i915_gem_object_pin_pages(obj);
-
-	if (!IS_ERR_OR_NULL(pages))
-		i915_gem_shmem_ops.put_pages(obj, pages);
-	mutex_unlock(&obj->mm.lock);
-	return 0;
-
-err_xfer:
-	obj->ops = &i915_gem_shmem_ops;
-	if (!IS_ERR_OR_NULL(pages)) {
-		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
-
-		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
-	}
-err_unlock:
-	mutex_unlock(&obj->mm.lock);
-	return err;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 588e3898b120..2c7aefb3e101 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -114,65 +114,18 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
 	return !i915_gem_object_has_pages(obj);
 }
 
-static void __start_writeback(struct drm_i915_gem_object *obj,
-			      unsigned int flags)
+static void try_to_writeback(struct drm_i915_gem_object *obj,
+			     unsigned int flags)
 {
-	struct address_space *mapping;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_NONE,
-		.nr_to_write = SWAP_CLUSTER_MAX,
-		.range_start = 0,
-		.range_end = LLONG_MAX,
-		.for_reclaim = 1,
-	};
-	unsigned long i;
-
-	lockdep_assert_held(&obj->mm.lock);
-	GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
 	switch (obj->mm.madv) {
 	case I915_MADV_DONTNEED:
-		__i915_gem_object_truncate(obj);
+		i915_gem_object_truncate(obj);
 	case __I915_MADV_PURGED:
 		return;
 	}
 
-	if (!obj->base.filp)
-		return;
-
-	if (!(flags & I915_SHRINK_WRITEBACK))
-		return;
-
-	/*
-	 * Leave mmapings intact (GTT will have been revoked on unbinding,
-	 * leaving only CPU mmapings around) and add those pages to the LRU
-	 * instead of invoking writeback so they are aged and paged out
-	 * as normal.
-	 */
-	mapping = obj->base.filp->f_mapping;
-
-	/* Begin writeback on each dirty page */
-	for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
-		struct page *page;
-
-		page = find_lock_entry(mapping, i);
-		if (!page || xa_is_value(page))
-			continue;
-
-		if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
-			int ret;
-
-			SetPageReclaim(page);
-			ret = mapping->a_ops->writepage(page, &wbc);
-			if (!PageWriteback(page))
-				ClearPageReclaim(page);
-			if (!ret)
-				goto put;
-		}
-		unlock_page(page);
-put:
-		put_page(page);
-	}
+	if (flags & I915_SHRINK_WRITEBACK)
+		i915_gem_object_writeback(obj);
 }
 
 /**
@@ -315,7 +268,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 				mutex_lock_nested(&obj->mm.lock,
 						  I915_MM_SHRINKER);
 				if (!i915_gem_object_has_pages(obj)) {
-					__start_writeback(obj, flags);
+					try_to_writeback(obj, flags);
 					count += obj->base.size >> PAGE_SHIFT;
 				}
 				mutex_unlock(&obj->mm.lock);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index ff31fe0cecec..37f32c2eca19 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -49,59 +49,6 @@ static int igt_gem_object(void *arg)
 	return err;
 }
 
-static int igt_phys_object(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	int err;
-
-	/* Create an object and bind it to a contiguous set of physical pages,
-	 * i.e. exercise the i915_gem_object_phys API.
-	 */
-
-	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		pr_err("i915_gem_object_create failed, err=%d\n", err);
-		goto out;
-	}
-
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err) {
-		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
-		goto out_obj;
-	}
-
-	if (obj->ops != &i915_gem_phys_ops) {
-		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
-		err = -EINVAL;
-		goto out_obj;
-	}
-
-	if (!atomic_read(&obj->mm.pages_pin_count)) {
-		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
-		err = -EINVAL;
-		goto out_obj;
-	}
-
-	/* Make the object dirty so that put_pages must do copy back the data */
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err) {
-		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
-		       err);
-		goto out_obj;
-	}
-
-out_obj:
-	i915_gem_object_put(obj);
-out:
-	return err;
-}
-
 static int igt_gem_huge(void *arg)
 {
 	const unsigned int nreal = 509; /* just to be awkward */
@@ -631,7 +578,6 @@ int i915_gem_object_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_gem_object),
-		SUBTEST(igt_phys_object),
 	};
 	struct drm_i915_private *i915;
 	int err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 88e5ab586337..510eb176bb2c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -18,6 +18,7 @@ selftest(engine, intel_engine_cs_mock_selftests)
 selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
+selftest(phys, i915_gem_phys_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
 selftest(vma, i915_vma_mock_selftests)
 selftest(evict, i915_gem_evict_mock_selftests)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 29/45] drm/i915: Move mmap and friends to its own file
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (26 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 28/45] drm/i915: Move phys objects " Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 30/45] drm/i915: Move GEM domain management " Chris Wilson
                   ` (23 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Continuing the decluttering of i915_gem.c, now the turn of do_mmap and
the faulthandlers

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      | 505 ++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  56 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   7 +
 .../drm/i915/gem/selftests/i915_gem_mman.c    | 503 ++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h               |   1 -
 drivers/gpu/drm/i915/i915_gem.c               | 561 +-----------------
 drivers/gpu/drm/i915/i915_gem_tiling.c        |   2 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  | 487 ---------------
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 10 files changed, 1088 insertions(+), 1036 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_mman.c
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 84277b29389c..b86af182b1ac 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -88,6 +88,7 @@ i915-y += $(gt-y)
 obj-y += gem/
 gem-y += \
 	gem/i915_gem_object.o \
+	gem/i915_gem_mman.o \
 	gem/i915_gem_pages.o \
 	gem/i915_gem_phys.o \
 	gem/i915_gem_shmem.o
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
new file mode 100644
index 000000000000..1bcc6e1091e9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -0,0 +1,505 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/mman.h>
+#include <linux/sizes.h>
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+#include "../i915_drv.h"
+#include "../intel_drv.h"
+
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+	      unsigned long addr, unsigned long size)
+{
+	if (vma->vm_file != filp)
+		return false;
+
+	return vma->vm_start == addr &&
+	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
+}
+
+/**
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ *			 it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ *
+ * While the mapping holds a reference on the contents of the object, it doesn't
+ * imply a ref on the object itself.
+ *
+ * IMPORTANT:
+ *
+ * DRM driver writers who look a this function as an example for how to do GEM
+ * mmap support, please don't implement mmap support like here. The modern way
+ * to implement DRM mmap support is with an mmap offset ioctl (like
+ * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
+ * That way debug tooling like valgrind will understand what's going on, hiding
+ * the mmap call in a driver private ioctl will break that. The i915 driver only
+ * does cpu mmaps this way because we didn't know better.
+ */
+int
+i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_mmap *args = data;
+	struct drm_i915_gem_object *obj;
+	unsigned long addr;
+
+	if (args->flags & ~(I915_MMAP_WC))
+		return -EINVAL;
+
+	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+		return -ENODEV;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/* prime objects have no backing filp to GEM mmap
+	 * pages from.
+	 */
+	if (!obj->base.filp) {
+		addr = -ENXIO;
+		goto err;
+	}
+
+	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
+		addr = -EINVAL;
+		goto err;
+	}
+
+	addr = vm_mmap(obj->base.filp, 0, args->size,
+		       PROT_READ | PROT_WRITE, MAP_SHARED,
+		       args->offset);
+	if (IS_ERR_VALUE(addr))
+		goto err;
+
+	if (args->flags & I915_MMAP_WC) {
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+
+		if (down_write_killable(&mm->mmap_sem)) {
+			addr = -EINTR;
+			goto err;
+		}
+		vma = find_vma(mm, addr);
+		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
+			vma->vm_page_prot =
+				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		else
+			addr = -ENOMEM;
+		up_write(&mm->mmap_sem);
+		if (IS_ERR_VALUE(addr))
+			goto err;
+
+		/* This may race, but that's ok, it only gets set */
+		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
+	}
+	i915_gem_object_put(obj);
+
+	args->addr_ptr = (u64)addr;
+	return 0;
+
+err:
+	i915_gem_object_put(obj);
+	return addr;
+}
+
+static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
+}
+
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ *     aligned and suitable for fencing, and still fit into the available
+ *     mappable space left by the pinned display objects. A classic problem
+ *     we called the page-fault-of-doom where we would ping-pong between
+ *     two objects that could not fit inside the GTT and so the memcpy
+ *     would page one object in at the expense of the other between every
+ *     single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ *     object is too large for the available space (or simply too large
+ *     for the mappable aperture!), a view is created instead and faulted
+ *     into userspace. (This view is aligned and sized appropriately for
+ *     fenced access.)
+ *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
+ * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
+ *     pagefault; swapin remains transparent.
+ *
+ * Restrictions:
+ *
+ *  * snoopable objects cannot be accessed via the GTT. It can cause machine
+ *    hangs on some architectures, corruption on others. An attempt to service
+ *    a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ *  * the object must be able to fit into RAM (physical memory, though no
+ *    limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ *  * a new GTT page fault will synchronize rendering from the GPU and flush
+ *    all data to system memory. Subsequent access will not be synchronized.
+ *
+ *  * all mappings are revoked on runtime device suspend.
+ *
+ *  * there are only 8, 16 or 32 fence registers to share between all users
+ *    (older machines require fence register for display and blitter access
+ *    as well). Contention of the fence registers will cause the previous users
+ *    to be unmapped and any new access will generate new page faults.
+ *
+ *  * running out of memory while servicing a fault may generate a SIGBUS,
+ *    rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+	return 3;
+}
+
+static inline struct i915_ggtt_view
+compute_partial_view(const struct drm_i915_gem_object *obj,
+		     pgoff_t page_offset,
+		     unsigned int chunk)
+{
+	struct i915_ggtt_view view;
+
+	if (i915_gem_object_is_tiled(obj))
+		chunk = roundup(chunk, tile_row_pages(obj));
+
+	view.type = I915_GGTT_VIEW_PARTIAL;
+	view.partial.offset = rounddown(page_offset, chunk);
+	view.partial.size =
+		min_t(unsigned int, chunk,
+		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
+
+	/* If the partial covers the entire object, just create a normal VMA. */
+	if (chunk >= obj->base.size >> PAGE_SHIFT)
+		view.type = I915_GGTT_VIEW_NORMAL;
+
+	return view;
+}
+
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * @vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace.  The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room.  So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ *
+ * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
+ */
+vm_fault_t i915_gem_fault(struct vm_fault *vmf)
+{
+#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
+	struct vm_area_struct *area = vmf->vma;
+	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	bool write = area->vm_flags & VM_WRITE;
+	intel_wakeref_t wakeref;
+	struct i915_vma *vma;
+	pgoff_t page_offset;
+	int srcu;
+	int ret;
+
+	/* Sanity check that we allow writing into this object */
+	if (i915_gem_object_is_readonly(obj) && write)
+		return VM_FAULT_SIGBUS;
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+
+	trace_i915_gem_object_fault(obj, page_offset, true, write);
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err;
+
+	wakeref = intel_runtime_pm_get(i915);
+
+	srcu = i915_reset_trylock(i915);
+	if (srcu < 0) {
+		ret = srcu;
+		goto err_rpm;
+	}
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto err_reset;
+
+	/* Access to snoopable pages through the GTT is incoherent. */
+	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+		ret = -EFAULT;
+		goto err_unlock;
+	}
+
+	/* Now pin it into the GTT as needed */
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE |
+				       PIN_NONBLOCK |
+				       PIN_NONFAULT);
+	if (IS_ERR(vma)) {
+		/* Use a partial view if it is bigger than available space */
+		struct i915_ggtt_view view =
+			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+		unsigned int flags;
+
+		flags = PIN_MAPPABLE;
+		if (view.type == I915_GGTT_VIEW_NORMAL)
+			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
+
+		/*
+		 * Userspace is now writing through an untracked VMA, abandon
+		 * all hope that the hardware is able to track future writes.
+		 */
+		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		if (IS_ERR(vma) && !view.type) {
+			flags = PIN_MAPPABLE;
+			view.type = I915_GGTT_VIEW_PARTIAL;
+			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		}
+	}
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unlock;
+	}
+
+	ret = i915_vma_pin_fence(vma);
+	if (ret)
+		goto err_unpin;
+
+	/* Finally, remap it using the new GTT offset */
+	ret = remap_io_mapping(area,
+			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
+			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
+			       min_t(u64, vma->size, area->vm_end - area->vm_start),
+			       &ggtt->iomap);
+	if (ret)
+		goto err_fence;
+
+	/* Mark as being mmapped into userspace for later revocation */
+	assert_rpm_wakelock_held(i915);
+	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
+		list_add(&obj->userfault_link, &i915->mm.userfault_list);
+	GEM_BUG_ON(!obj->userfault_count);
+
+	i915_vma_set_ggtt_write(vma);
+
+err_fence:
+	i915_vma_unpin_fence(vma);
+err_unpin:
+	__i915_vma_unpin(vma);
+err_unlock:
+	mutex_unlock(&dev->struct_mutex);
+err_reset:
+	i915_reset_unlock(i915, srcu);
+err_rpm:
+	intel_runtime_pm_put(i915, wakeref);
+	i915_gem_object_unpin_pages(obj);
+err:
+	switch (ret) {
+	case -EIO:
+		/*
+		 * We eat errors when the gpu is terminally wedged to avoid
+		 * userspace unduly crashing (gl has no provisions for mmaps to
+		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
+		 * and so needs to be reported.
+		 */
+		if (!i915_terminally_wedged(i915))
+			return VM_FAULT_SIGBUS;
+		/* else: fall through */
+	case -EAGAIN:
+		/*
+		 * EAGAIN means the gpu is hung and we'll wait for the error
+		 * handler to reset everything when re-faulting in
+		 * i915_mutex_lock_interruptible.
+		 */
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	case -ENOSPC:
+	case -EFAULT:
+		return VM_FAULT_SIGBUS;
+	default:
+		WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!obj->userfault_count);
+
+	obj->userfault_count = 0;
+	list_del(&obj->userfault_link);
+	drm_vma_node_unmap(&obj->base.vma_node,
+			   obj->base.dev->anon_inode->i_mapping);
+
+	for_each_ggtt_vma(vma, obj)
+		i915_vma_unset_userfault(vma);
+}
+
+/**
+ * i915_gem_object_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmapping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ *
+ * It is vital that we remove the page mapping if we have mapped a tiled
+ * object through the GTT and then lose the fence register due to
+ * resource pressure. Similarly if the object has been moved out of the
+ * aperture, than pages mapped into userspace must be revoked. Removing the
+ * mapping will then trigger a page fault on the next user access, allowing
+ * fixup by i915_gem_fault().
+ */
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	intel_wakeref_t wakeref;
+
+	/* Serialisation between user GTT access and our code depends upon
+	 * revoking the CPU's PTE whilst the mutex is held. The next user
+	 * pagefault then has to wait until we release the mutex.
+	 *
+	 * Note that RPM complicates somewhat by adding an additional
+	 * requirement that operations to the GGTT be made holding the RPM
+	 * wakeref.
+	 */
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	if (!obj->userfault_count)
+		goto out;
+
+	__i915_gem_object_release_mmap(obj);
+
+	/* Ensure that the CPU's PTE are revoked and there are not outstanding
+	 * memory transactions from userspace before we return. The TLB
+	 * flushing implied above by changing the PTE above *should* be
+	 * sufficient, an extra barrier here just provides us with a bit
+	 * of paranoid documentation about our requirement to serialise
+	 * memory writes before touching registers / GSM.
+	 */
+	wmb();
+
+out:
+	intel_runtime_pm_put(i915, wakeref);
+}
+
+static int create_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	int err;
+
+	err = drm_gem_create_mmap_offset(&obj->base);
+	if (likely(!err))
+		return 0;
+
+	/* Attempt to reap some mmap space from dead objects */
+	do {
+		err = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE,
+					     MAX_SCHEDULE_TIMEOUT);
+		if (err)
+			break;
+
+		i915_gem_drain_freed_objects(i915);
+		err = drm_gem_create_mmap_offset(&obj->base);
+		if (!err)
+			break;
+
+	} while (flush_delayed_work(&i915->gem.retire_work));
+
+	return err;
+}
+
+int
+i915_gem_mmap_gtt(struct drm_file *file,
+		  struct drm_device *dev,
+		  u32 handle,
+		  u64 *offset)
+{
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	obj = i915_gem_object_lookup(file, handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = create_mmap_offset(obj);
+	if (ret == 0)
+		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct drm_i915_gem_mmap_gtt *args = data;
+
+	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_mman.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 86e7e88817af..005e17b3acce 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -26,6 +26,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_globals.h"
+#include "../i915_gem_clflush.h"
 #include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
@@ -357,6 +358,61 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 		i915_gem_object_put(obj);
 }
 
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	return !(obj->cache_level == I915_CACHE_NONE ||
+		 obj->cache_level == I915_CACHE_WT);
+}
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+
+	if (!(obj->write_domain & flush_domains))
+		return;
+
+	switch (obj->write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		i915_gem_flush_ggtt_writes(dev_priv);
+
+		intel_fb_obj_flush(obj,
+				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+		for_each_ggtt_vma(vma, obj) {
+			if (vma->iomap)
+				continue;
+
+			i915_vma_unset_ggtt_write(vma);
+		}
+		break;
+
+	case I915_GEM_DOMAIN_WC:
+		wmb();
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		break;
+
+	case I915_GEM_DOMAIN_RENDER:
+		if (gpu_write_needs_clflush(obj))
+			obj->cache_dirty = true;
+		break;
+	}
+
+	obj->write_domain = 0;
+}
+
 void i915_gem_init__objects(struct drm_i915_private *i915)
 {
 	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 2e963a593245..9bf8155d27de 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -346,6 +346,13 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 }
 
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains);
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
new file mode 100644
index 000000000000..031e7ad875e3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -0,0 +1,503 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "gt/intel_gt_pm.h"
+#include "i915_selftest.h"
+#include "selftests/huge_gem_object.h"
+#include "selftests/igt_flush_test.h"
+
+struct tile {
+	unsigned int width;
+	unsigned int height;
+	unsigned int stride;
+	unsigned int size;
+	unsigned int tiling;
+	unsigned int swizzle;
+};
+
+static u64 swizzle_bit(unsigned int bit, u64 offset)
+{
+	return (offset & BIT_ULL(bit)) >> (bit - 6);
+}
+
+static u64 tiled_offset(const struct tile *tile, u64 v)
+{
+	u64 x, y;
+
+	if (tile->tiling == I915_TILING_NONE)
+		return v;
+
+	y = div64_u64_rem(v, tile->stride, &x);
+	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
+
+	if (tile->tiling == I915_TILING_X) {
+		v += y * tile->width;
+		v += div64_u64_rem(x, tile->width, &x) << tile->size;
+		v += x;
+	} else if (tile->width == 128) {
+		const unsigned int ytile_span = 16;
+		const unsigned int ytile_height = 512;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	} else {
+		const unsigned int ytile_span = 32;
+		const unsigned int ytile_height = 256;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	}
+
+	switch (tile->swizzle) {
+	case I915_BIT_6_SWIZZLE_9:
+		v ^= swizzle_bit(9, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
+		break;
+	}
+
+	return v;
+}
+
+static int check_partial_mapping(struct drm_i915_gem_object *obj,
+				 const struct tile *tile,
+				 unsigned long end_time)
+{
+	const unsigned int nreal = obj->scratch / PAGE_SIZE;
+	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct i915_vma *vma;
+	unsigned long page;
+	int err;
+
+	if (igt_timeout(end_time,
+			"%s: timed out before tiling=%d stride=%d\n",
+			__func__, tile->tiling, tile->stride))
+		return -EINTR;
+
+	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
+	if (err) {
+		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
+		       tile->tiling, tile->stride, err);
+		return err;
+	}
+
+	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
+	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
+
+	for_each_prime_number_from(page, 1, npages) {
+		struct i915_ggtt_view view =
+			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
+		u32 __iomem *io;
+		struct page *p;
+		unsigned int n;
+		u64 offset;
+		u32 *cpu;
+
+		GEM_BUG_ON(view.partial.size > nreal);
+		cond_resched();
+
+		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		if (err) {
+			pr_err("Failed to flush to GTT write domain; err=%d\n",
+			       err);
+			return err;
+		}
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+		if (IS_ERR(vma)) {
+			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(vma));
+			return PTR_ERR(vma);
+		}
+
+		n = page - view.partial.offset;
+		GEM_BUG_ON(n >= view.partial.size);
+
+		io = i915_vma_pin_iomap(vma);
+		i915_vma_unpin(vma);
+		if (IS_ERR(io)) {
+			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(io));
+			return PTR_ERR(io);
+		}
+
+		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
+		i915_vma_unpin_iomap(vma);
+
+		offset = tiled_offset(tile, page << PAGE_SHIFT);
+		if (offset >= obj->base.size)
+			continue;
+
+		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+		cpu = kmap(p) + offset_in_page(offset);
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		if (*cpu != (u32)page) {
+			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
+			       page, n,
+			       view.partial.offset,
+			       view.partial.size,
+			       vma->size >> PAGE_SHIFT,
+			       tile->tiling ? tile_row_pages(obj) : 0,
+			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
+			       offset >> PAGE_SHIFT,
+			       (unsigned int)offset_in_page(offset),
+			       offset,
+			       (u32)page, *cpu);
+			err = -EINVAL;
+		}
+		*cpu = 0;
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		kunmap(p);
+		if (err)
+			return err;
+
+		i915_vma_destroy(vma);
+	}
+
+	return 0;
+}
+
+static int igt_partial_tiling(void *arg)
+{
+	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	int tiling;
+	int err;
+
+	/* We want to check the page mapping and fencing of a large object
+	 * mmapped through the GTT. The object we create is larger than can
+	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
+	 * We then check that a write through each partial GGTT vma ends up
+	 * in the right set of pages within the object, and with the expected
+	 * tiling, which we verify by manual swizzling.
+	 */
+
+	obj = huge_gem_object(i915,
+			      nreal << PAGE_SHIFT,
+			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err) {
+		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	if (1) {
+		IGT_TIMEOUT(end);
+		struct tile tile;
+
+		tile.height = 1;
+		tile.width = 1;
+		tile.size = 0;
+		tile.stride = 0;
+		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
+		tile.tiling = I915_TILING_NONE;
+
+		err = check_partial_mapping(obj, &tile, end);
+		if (err && err != -EINTR)
+			goto out_unlock;
+	}
+
+	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
+		IGT_TIMEOUT(end);
+		unsigned int max_pitch;
+		unsigned int pitch;
+		struct tile tile;
+
+		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+			/*
+			 * The swizzling pattern is actually unknown as it
+			 * varies based on physical address of each page.
+			 * See i915_gem_detect_bit_6_swizzle().
+			 */
+			break;
+
+		tile.tiling = tiling;
+		switch (tiling) {
+		case I915_TILING_X:
+			tile.swizzle = i915->mm.bit_6_swizzle_x;
+			break;
+		case I915_TILING_Y:
+			tile.swizzle = i915->mm.bit_6_swizzle_y;
+			break;
+		}
+
+		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
+		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
+		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
+			continue;
+
+		if (INTEL_GEN(i915) <= 2) {
+			tile.height = 16;
+			tile.width = 128;
+			tile.size = 11;
+		} else if (tile.tiling == I915_TILING_Y &&
+			   HAS_128_BYTE_Y_TILING(i915)) {
+			tile.height = 32;
+			tile.width = 128;
+			tile.size = 12;
+		} else {
+			tile.height = 8;
+			tile.width = 512;
+			tile.size = 12;
+		}
+
+		if (INTEL_GEN(i915) < 4)
+			max_pitch = 8192 / tile.width;
+		else if (INTEL_GEN(i915) < 7)
+			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
+		else
+			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
+
+		for (pitch = max_pitch; pitch; pitch >>= 1) {
+			tile.stride = tile.width * pitch;
+			err = check_partial_mapping(obj, &tile, end);
+			if (err == -EINTR)
+				goto next_tiling;
+			if (err)
+				goto out_unlock;
+
+			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
+				tile.stride = tile.width * (pitch - 1);
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+
+			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
+				tile.stride = tile.width * (pitch + 1);
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+		if (INTEL_GEN(i915) >= 4) {
+			for_each_prime_number(pitch, max_pitch) {
+				tile.stride = tile.width * pitch;
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+next_tiling: ;
+	}
+
+out_unlock:
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int make_obj_busy(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	int err;
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		return err;
+
+	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
+	if (IS_ERR(rq)) {
+		i915_vma_unpin(vma);
+		return PTR_ERR(rq);
+	}
+
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+	i915_request_add(rq);
+
+	__i915_gem_object_release_unless_active(obj);
+	i915_vma_unpin(vma);
+
+	return err;
+}
+
+static bool assert_mmap_offset(struct drm_i915_private *i915,
+			       unsigned long size,
+			       int expected)
+{
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = create_mmap_offset(obj);
+	i915_gem_object_put(obj);
+
+	return err == expected;
+}
+
+static void disable_retire_worker(struct drm_i915_private *i915)
+{
+	i915_gem_shrinker_unregister(i915);
+
+	intel_gt_pm_get(i915);
+
+	cancel_delayed_work_sync(&i915->gem.retire_work);
+	cancel_delayed_work_sync(&i915->gem.idle_work);
+}
+
+static void restore_retire_worker(struct drm_i915_private *i915)
+{
+	intel_gt_pm_put(i915);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	i915_gem_shrinker_register(i915);
+}
+
+static int igt_mmap_offset_exhaustion(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
+	struct drm_i915_gem_object *obj;
+	struct drm_mm_node resv, *hole;
+	u64 hole_start, hole_end;
+	int loop, err;
+
+	/* Disable background reaper */
+	disable_retire_worker(i915);
+	GEM_BUG_ON(!i915->gt.awake);
+
+	/* Trim the device mmap space to only a page */
+	memset(&resv, 0, sizeof(resv));
+	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
+		resv.start = hole_start;
+		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
+		err = drm_mm_reserve_node(mm, &resv);
+		if (err) {
+			pr_err("Failed to trim VMA manager, err=%d\n", err);
+			goto out_park;
+		}
+		break;
+	}
+
+	/* Just fits! */
+	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
+		pr_err("Unable to insert object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Too large */
+	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
+		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Fill the hole, further allocation attempts should then fail */
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto out;
+	}
+
+	err = create_mmap_offset(obj);
+	if (err) {
+		pr_err("Unable to insert object into reclaimed hole\n");
+		goto err_obj;
+	}
+
+	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
+		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
+		err = -EINVAL;
+		goto err_obj;
+	}
+
+	i915_gem_object_put(obj);
+
+	/* Now fill with busy dead objects that we expect to reap */
+	for (loop = 0; loop < 3; loop++) {
+		if (i915_terminally_wedged(i915))
+			break;
+
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto out;
+		}
+
+		mutex_lock(&i915->drm.struct_mutex);
+		err = make_obj_busy(obj);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err) {
+			pr_err("[loop %d] Failed to busy the object\n", loop);
+			goto err_obj;
+		}
+
+		/* NB we rely on the _active_ reference to access obj now */
+		GEM_BUG_ON(!i915_gem_object_is_active(obj));
+		err = create_mmap_offset(obj);
+		if (err) {
+			pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
+			       loop, err);
+			goto out;
+		}
+	}
+
+out:
+	drm_mm_remove_node(&resv);
+out_park:
+	restore_retire_worker(i915);
+	return err;
+err_obj:
+	i915_gem_object_put(obj);
+	goto out;
+}
+
+int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_partial_tiling),
+		SUBTEST(igt_mmap_offset_exhaustion),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ac9723466488..c4893dd9489f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2878,7 +2878,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 u64 flags);
 
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
-void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0549b73f01ff..78d99841cf76 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -380,12 +380,6 @@ i915_gem_dumb_create(struct drm_file *file,
 			       &args->size, &args->handle);
 }
 
-static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
-	return !(obj->cache_level == I915_CACHE_NONE ||
-		 obj->cache_level == I915_CACHE_WT);
-}
-
 /**
  * Creates a new mm object and returns a handle to it.
  * @dev: drm device pointer
@@ -405,13 +399,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			       &args->size, &args->handle);
 }
 
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 {
 	intel_wakeref_t wakeref;
@@ -451,47 +438,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_vma *vma;
-
-	if (!(obj->write_domain & flush_domains))
-		return;
-
-	switch (obj->write_domain) {
-	case I915_GEM_DOMAIN_GTT:
-		i915_gem_flush_ggtt_writes(dev_priv);
-
-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
-
-		for_each_ggtt_vma(vma, obj) {
-			if (vma->iomap)
-				continue;
-
-			i915_vma_unset_ggtt_write(vma);
-		}
-		break;
-
-	case I915_GEM_DOMAIN_WC:
-		wmb();
-		break;
-
-	case I915_GEM_DOMAIN_CPU:
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-		break;
-
-	case I915_GEM_DOMAIN_RENDER:
-		if (gpu_write_needs_clflush(obj))
-			obj->cache_dirty = true;
-		break;
-	}
-
-	obj->write_domain = 0;
-}
-
 /*
  * Pins the specified object's pages and synchronizes the object with
  * GPU accesses. Sets needs_clflush to non-zero if the caller should
@@ -528,7 +474,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu read domain, set ourself into the gtt
 	 * read domain and manually flush cachelines (if required). This
@@ -580,7 +526,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu write domain, set ourself into the
 	 * gtt write domain and manually flush cachelines (as required).
@@ -1183,6 +1129,13 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 	spin_unlock(&i915->mm.obj_lock);
 }
 
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -1326,420 +1279,6 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static inline bool
-__vma_matches(struct vm_area_struct *vma, struct file *filp,
-	      unsigned long addr, unsigned long size)
-{
-	if (vma->vm_file != filp)
-		return false;
-
-	return vma->vm_start == addr &&
-	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
-}
-
-/**
- * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
- *			 it is mapped to.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- *
- * While the mapping holds a reference on the contents of the object, it doesn't
- * imply a ref on the object itself.
- *
- * IMPORTANT:
- *
- * DRM driver writers who look a this function as an example for how to do GEM
- * mmap support, please don't implement mmap support like here. The modern way
- * to implement DRM mmap support is with an mmap offset ioctl (like
- * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
- * That way debug tooling like valgrind will understand what's going on, hiding
- * the mmap call in a driver private ioctl will break that. The i915 driver only
- * does cpu mmaps this way because we didn't know better.
- */
-int
-i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file)
-{
-	struct drm_i915_gem_mmap *args = data;
-	struct drm_i915_gem_object *obj;
-	unsigned long addr;
-
-	if (args->flags & ~(I915_MMAP_WC))
-		return -EINVAL;
-
-	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
-		return -ENODEV;
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/* prime objects have no backing filp to GEM mmap
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		addr = -ENXIO;
-		goto err;
-	}
-
-	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
-		addr = -EINVAL;
-		goto err;
-	}
-
-	addr = vm_mmap(obj->base.filp, 0, args->size,
-		       PROT_READ | PROT_WRITE, MAP_SHARED,
-		       args->offset);
-	if (IS_ERR_VALUE(addr))
-		goto err;
-
-	if (args->flags & I915_MMAP_WC) {
-		struct mm_struct *mm = current->mm;
-		struct vm_area_struct *vma;
-
-		if (down_write_killable(&mm->mmap_sem)) {
-			addr = -EINTR;
-			goto err;
-		}
-		vma = find_vma(mm, addr);
-		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
-			vma->vm_page_prot =
-				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
-		else
-			addr = -ENOMEM;
-		up_write(&mm->mmap_sem);
-		if (IS_ERR_VALUE(addr))
-			goto err;
-
-		/* This may race, but that's ok, it only gets set */
-		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
-	}
-	i915_gem_object_put(obj);
-
-	args->addr_ptr = (u64)addr;
-	return 0;
-
-err:
-	i915_gem_object_put(obj);
-	return addr;
-}
-
-static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
-{
-	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
-}
-
-/**
- * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
- *
- * A history of the GTT mmap interface:
- *
- * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
- *     aligned and suitable for fencing, and still fit into the available
- *     mappable space left by the pinned display objects. A classic problem
- *     we called the page-fault-of-doom where we would ping-pong between
- *     two objects that could not fit inside the GTT and so the memcpy
- *     would page one object in at the expense of the other between every
- *     single byte.
- *
- * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
- *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
- *     object is too large for the available space (or simply too large
- *     for the mappable aperture!), a view is created instead and faulted
- *     into userspace. (This view is aligned and sized appropriately for
- *     fenced access.)
- *
- * 2 - Recognise WC as a separate cache domain so that we can flush the
- *     delayed writes via GTT before performing direct access via WC.
- *
- * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
- *     pagefault; swapin remains transparent.
- *
- * Restrictions:
- *
- *  * snoopable objects cannot be accessed via the GTT. It can cause machine
- *    hangs on some architectures, corruption on others. An attempt to service
- *    a GTT page fault from a snoopable object will generate a SIGBUS.
- *
- *  * the object must be able to fit into RAM (physical memory, though no
- *    limited to the mappable aperture).
- *
- *
- * Caveats:
- *
- *  * a new GTT page fault will synchronize rendering from the GPU and flush
- *    all data to system memory. Subsequent access will not be synchronized.
- *
- *  * all mappings are revoked on runtime device suspend.
- *
- *  * there are only 8, 16 or 32 fence registers to share between all users
- *    (older machines require fence register for display and blitter access
- *    as well). Contention of the fence registers will cause the previous users
- *    to be unmapped and any new access will generate new page faults.
- *
- *  * running out of memory while servicing a fault may generate a SIGBUS,
- *    rather than the expected SIGSEGV.
- */
-int i915_gem_mmap_gtt_version(void)
-{
-	return 3;
-}
-
-static inline struct i915_ggtt_view
-compute_partial_view(const struct drm_i915_gem_object *obj,
-		     pgoff_t page_offset,
-		     unsigned int chunk)
-{
-	struct i915_ggtt_view view;
-
-	if (i915_gem_object_is_tiled(obj))
-		chunk = roundup(chunk, tile_row_pages(obj));
-
-	view.type = I915_GGTT_VIEW_PARTIAL;
-	view.partial.offset = rounddown(page_offset, chunk);
-	view.partial.size =
-		min_t(unsigned int, chunk,
-		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
-
-	/* If the partial covers the entire object, just create a normal VMA. */
-	if (chunk >= obj->base.size >> PAGE_SHIFT)
-		view.type = I915_GGTT_VIEW_NORMAL;
-
-	return view;
-}
-
-/**
- * i915_gem_fault - fault a page into the GTT
- * @vmf: fault info
- *
- * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
- * from userspace.  The fault handler takes care of binding the object to
- * the GTT (if needed), allocating and programming a fence register (again,
- * only if needed based on whether the old reg is still valid or the object
- * is tiled) and inserting a new PTE into the faulting process.
- *
- * Note that the faulting process may involve evicting existing objects
- * from the GTT and/or fence registers to make room.  So performance may
- * suffer if the GTT working set is large or there are few fence registers
- * left.
- *
- * The current feature set supported by i915_gem_fault() and thus GTT mmaps
- * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
- */
-vm_fault_t i915_gem_fault(struct vm_fault *vmf)
-{
-#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
-	struct vm_area_struct *area = vmf->vma;
-	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	bool write = area->vm_flags & VM_WRITE;
-	intel_wakeref_t wakeref;
-	struct i915_vma *vma;
-	pgoff_t page_offset;
-	int srcu;
-	int ret;
-
-	/* Sanity check that we allow writing into this object */
-	if (i915_gem_object_is_readonly(obj) && write)
-		return VM_FAULT_SIGBUS;
-
-	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
-
-	trace_i915_gem_object_fault(obj, page_offset, true, write);
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		goto err;
-
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	srcu = i915_reset_trylock(dev_priv);
-	if (srcu < 0) {
-		ret = srcu;
-		goto err_rpm;
-	}
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto err_reset;
-
-	/* Access to snoopable pages through the GTT is incoherent. */
-	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
-		ret = -EFAULT;
-		goto err_unlock;
-	}
-
-	/* Now pin it into the GTT as needed */
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-				       PIN_MAPPABLE |
-				       PIN_NONBLOCK |
-				       PIN_NONFAULT);
-	if (IS_ERR(vma)) {
-		/* Use a partial view if it is bigger than available space */
-		struct i915_ggtt_view view =
-			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
-		unsigned int flags;
-
-		flags = PIN_MAPPABLE;
-		if (view.type == I915_GGTT_VIEW_NORMAL)
-			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
-
-		/*
-		 * Userspace is now writing through an untracked VMA, abandon
-		 * all hope that the hardware is able to track future writes.
-		 */
-		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
-
-		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		if (IS_ERR(vma) && !view.type) {
-			flags = PIN_MAPPABLE;
-			view.type = I915_GGTT_VIEW_PARTIAL;
-			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		}
-	}
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_unlock;
-	}
-
-	ret = i915_vma_pin_fence(vma);
-	if (ret)
-		goto err_unpin;
-
-	/* Finally, remap it using the new GTT offset */
-	ret = remap_io_mapping(area,
-			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
-			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
-			       min_t(u64, vma->size, area->vm_end - area->vm_start),
-			       &ggtt->iomap);
-	if (ret)
-		goto err_fence;
-
-	/* Mark as being mmapped into userspace for later revocation */
-	assert_rpm_wakelock_held(dev_priv);
-	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
-		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
-	GEM_BUG_ON(!obj->userfault_count);
-
-	i915_vma_set_ggtt_write(vma);
-
-err_fence:
-	i915_vma_unpin_fence(vma);
-err_unpin:
-	__i915_vma_unpin(vma);
-err_unlock:
-	mutex_unlock(&dev->struct_mutex);
-err_reset:
-	i915_reset_unlock(dev_priv, srcu);
-err_rpm:
-	intel_runtime_pm_put(dev_priv, wakeref);
-	i915_gem_object_unpin_pages(obj);
-err:
-	switch (ret) {
-	case -EIO:
-		/*
-		 * We eat errors when the gpu is terminally wedged to avoid
-		 * userspace unduly crashing (gl has no provisions for mmaps to
-		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
-		 * and so needs to be reported.
-		 */
-		if (!i915_terminally_wedged(dev_priv))
-			return VM_FAULT_SIGBUS;
-		/* else: fall through */
-	case -EAGAIN:
-		/*
-		 * EAGAIN means the gpu is hung and we'll wait for the error
-		 * handler to reset everything when re-faulting in
-		 * i915_mutex_lock_interruptible.
-		 */
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-	case -EBUSY:
-		/*
-		 * EBUSY is ok: this just means that another thread
-		 * already did the job.
-		 */
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	case -ENOSPC:
-	case -EFAULT:
-		return VM_FAULT_SIGBUS;
-	default:
-		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
-		return VM_FAULT_SIGBUS;
-	}
-}
-
-static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!obj->userfault_count);
-
-	obj->userfault_count = 0;
-	list_del(&obj->userfault_link);
-	drm_vma_node_unmap(&obj->base.vma_node,
-			   obj->base.dev->anon_inode->i_mapping);
-
-	for_each_ggtt_vma(vma, obj)
-		i915_vma_unset_userfault(vma);
-}
-
-/**
- * i915_gem_release_mmap - remove physical page mappings
- * @obj: obj in question
- *
- * Preserve the reservation of the mmapping with the DRM core code, but
- * relinquish ownership of the pages back to the system.
- *
- * It is vital that we remove the page mapping if we have mapped a tiled
- * object through the GTT and then lose the fence register due to
- * resource pressure. Similarly if the object has been moved out of the
- * aperture, than pages mapped into userspace must be revoked. Removing the
- * mapping will then trigger a page fault on the next user access, allowing
- * fixup by i915_gem_fault().
- */
-void
-i915_gem_release_mmap(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	intel_wakeref_t wakeref;
-
-	/* Serialisation between user GTT access and our code depends upon
-	 * revoking the CPU's PTE whilst the mutex is held. The next user
-	 * pagefault then has to wait until we release the mutex.
-	 *
-	 * Note that RPM complicates somewhat by adding an additional
-	 * requirement that operations to the GGTT be made holding the RPM
-	 * wakeref.
-	 */
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
-
-	if (!obj->userfault_count)
-		goto out;
-
-	__i915_gem_object_release_mmap(obj);
-
-	/* Ensure that the CPU's PTE are revoked and there are not outstanding
-	 * memory transactions from userspace before we return. The TLB
-	 * flushing implied above by changing the PTE above *should* be
-	 * sufficient, an extra barrier here just provides us with a bit
-	 * of paranoid documentation about our requirement to serialise
-	 * memory writes before touching registers / GSM.
-	 */
-	wmb();
-
-out:
-	intel_runtime_pm_put(i915, wakeref);
-}
-
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj, *on;
@@ -1782,78 +1321,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	}
 }
 
-static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	int err;
-
-	err = drm_gem_create_mmap_offset(&obj->base);
-	if (likely(!err))
-		return 0;
-
-	/* Attempt to reap some mmap space from dead objects */
-	do {
-		err = i915_gem_wait_for_idle(dev_priv,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
-		if (err)
-			break;
-
-		i915_gem_drain_freed_objects(dev_priv);
-		err = drm_gem_create_mmap_offset(&obj->base);
-		if (!err)
-			break;
-
-	} while (flush_delayed_work(&dev_priv->gem.retire_work));
-
-	return err;
-}
-
-int
-i915_gem_mmap_gtt(struct drm_file *file,
-		  struct drm_device *dev,
-		  u32 handle,
-		  u64 *offset)
-{
-	struct drm_i915_gem_object *obj;
-	int ret;
-
-	obj = i915_gem_object_lookup(file, handle);
-	if (!obj)
-		return -ENOENT;
-
-	ret = i915_gem_object_create_mmap_offset(obj);
-	if (ret == 0)
-		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
-
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-/**
- * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
- * @dev: DRM device
- * @data: GTT mapping ioctl data
- * @file: GEM object info
- *
- * Simply returns the fake offset to userspace so it can mmap it.
- * The mmap call will end up in drm_gem_mmap(), which will set things
- * up so we can get faults in the handler above.
- *
- * The fault handler will take care of binding the object into the GTT
- * (since it may have been evicted to make room for something), allocating
- * a fence register, and mapping the appropriate aperture address into
- * userspace.
- */
-int
-i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file)
-{
-	struct drm_i915_gem_mmap_gtt *args = data;
-
-	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
-}
-
 bool i915_sg_trim(struct sg_table *orig_st)
 {
 	struct sg_table new_st;
@@ -2057,7 +1524,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 	 * We manually flush the CPU domain so that we can override and
 	 * force the flush for the display, and perform it asyncrhonously.
 	 */
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 	if (obj->cache_dirty)
 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 	obj->write_domain = 0;
@@ -2111,7 +1578,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -2173,7 +1640,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -2282,7 +1749,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 			 * then double check if the GTT mapping is still
 			 * valid for that pointer access.
 			 */
-			i915_gem_release_mmap(obj);
+			i915_gem_object_release_mmap(obj);
 
 			/* As we no longer need a fence for GTT access,
 			 * we can relinquish it now (and so prevent having
@@ -2537,7 +2004,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 7d5cc9ccf6dd..86d6d92ccbc9 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -299,7 +299,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 	i915_gem_object_unlock(obj);
 
 	/* Force the fence to be reacquired for GTT access */
-	i915_gem_release_mmap(obj);
+	i915_gem_object_release_mmap(obj);
 
 	/* Try to preallocate memory required to save swizzling on put-pages */
 	if (i915_gem_object_needs_bit17_swizzle(obj)) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 37f32c2eca19..a3dd2f1be95b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -89,491 +89,6 @@ static int igt_gem_huge(void *arg)
 	return err;
 }
 
-struct tile {
-	unsigned int width;
-	unsigned int height;
-	unsigned int stride;
-	unsigned int size;
-	unsigned int tiling;
-	unsigned int swizzle;
-};
-
-static u64 swizzle_bit(unsigned int bit, u64 offset)
-{
-	return (offset & BIT_ULL(bit)) >> (bit - 6);
-}
-
-static u64 tiled_offset(const struct tile *tile, u64 v)
-{
-	u64 x, y;
-
-	if (tile->tiling == I915_TILING_NONE)
-		return v;
-
-	y = div64_u64_rem(v, tile->stride, &x);
-	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
-
-	if (tile->tiling == I915_TILING_X) {
-		v += y * tile->width;
-		v += div64_u64_rem(x, tile->width, &x) << tile->size;
-		v += x;
-	} else if (tile->width == 128) {
-		const unsigned int ytile_span = 16;
-		const unsigned int ytile_height = 512;
-
-		v += y * ytile_span;
-		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
-		v += x;
-	} else {
-		const unsigned int ytile_span = 32;
-		const unsigned int ytile_height = 256;
-
-		v += y * ytile_span;
-		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
-		v += x;
-	}
-
-	switch (tile->swizzle) {
-	case I915_BIT_6_SWIZZLE_9:
-		v ^= swizzle_bit(9, v);
-		break;
-	case I915_BIT_6_SWIZZLE_9_10:
-		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
-		break;
-	case I915_BIT_6_SWIZZLE_9_11:
-		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
-		break;
-	case I915_BIT_6_SWIZZLE_9_10_11:
-		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
-		break;
-	}
-
-	return v;
-}
-
-static int check_partial_mapping(struct drm_i915_gem_object *obj,
-				 const struct tile *tile,
-				 unsigned long end_time)
-{
-	const unsigned int nreal = obj->scratch / PAGE_SIZE;
-	const unsigned long npages = obj->base.size / PAGE_SIZE;
-	struct i915_vma *vma;
-	unsigned long page;
-	int err;
-
-	if (igt_timeout(end_time,
-			"%s: timed out before tiling=%d stride=%d\n",
-			__func__, tile->tiling, tile->stride))
-		return -EINTR;
-
-	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
-	if (err) {
-		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
-		       tile->tiling, tile->stride, err);
-		return err;
-	}
-
-	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
-	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
-
-	for_each_prime_number_from(page, 1, npages) {
-		struct i915_ggtt_view view =
-			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
-		u32 __iomem *io;
-		struct page *p;
-		unsigned int n;
-		u64 offset;
-		u32 *cpu;
-
-		GEM_BUG_ON(view.partial.size > nreal);
-		cond_resched();
-
-		err = i915_gem_object_set_to_gtt_domain(obj, true);
-		if (err) {
-			pr_err("Failed to flush to GTT write domain; err=%d\n",
-			       err);
-			return err;
-		}
-
-		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
-		if (IS_ERR(vma)) {
-			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
-			       page, (int)PTR_ERR(vma));
-			return PTR_ERR(vma);
-		}
-
-		n = page - view.partial.offset;
-		GEM_BUG_ON(n >= view.partial.size);
-
-		io = i915_vma_pin_iomap(vma);
-		i915_vma_unpin(vma);
-		if (IS_ERR(io)) {
-			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
-			       page, (int)PTR_ERR(io));
-			return PTR_ERR(io);
-		}
-
-		iowrite32(page, io + n * PAGE_SIZE/sizeof(*io));
-		i915_vma_unpin_iomap(vma);
-
-		offset = tiled_offset(tile, page << PAGE_SHIFT);
-		if (offset >= obj->base.size)
-			continue;
-
-		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
-		cpu = kmap(p) + offset_in_page(offset);
-		drm_clflush_virt_range(cpu, sizeof(*cpu));
-		if (*cpu != (u32)page) {
-			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
-			       page, n,
-			       view.partial.offset,
-			       view.partial.size,
-			       vma->size >> PAGE_SHIFT,
-			       tile->tiling ? tile_row_pages(obj) : 0,
-			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
-			       offset >> PAGE_SHIFT,
-			       (unsigned int)offset_in_page(offset),
-			       offset,
-			       (u32)page, *cpu);
-			err = -EINVAL;
-		}
-		*cpu = 0;
-		drm_clflush_virt_range(cpu, sizeof(*cpu));
-		kunmap(p);
-		if (err)
-			return err;
-
-		i915_vma_destroy(vma);
-	}
-
-	return 0;
-}
-
-static int igt_partial_tiling(void *arg)
-{
-	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	intel_wakeref_t wakeref;
-	int tiling;
-	int err;
-
-	/* We want to check the page mapping and fencing of a large object
-	 * mmapped through the GTT. The object we create is larger than can
-	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
-	 * We then check that a write through each partial GGTT vma ends up
-	 * in the right set of pages within the object, and with the expected
-	 * tiling, which we verify by manual swizzling.
-	 */
-
-	obj = huge_gem_object(i915,
-			      nreal << PAGE_SHIFT,
-			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
-	err = i915_gem_object_pin_pages(obj);
-	if (err) {
-		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
-		       nreal, obj->base.size / PAGE_SIZE, err);
-		goto out;
-	}
-
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
-
-	if (1) {
-		IGT_TIMEOUT(end);
-		struct tile tile;
-
-		tile.height = 1;
-		tile.width = 1;
-		tile.size = 0;
-		tile.stride = 0;
-		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
-		tile.tiling = I915_TILING_NONE;
-
-		err = check_partial_mapping(obj, &tile, end);
-		if (err && err != -EINTR)
-			goto out_unlock;
-	}
-
-	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
-		IGT_TIMEOUT(end);
-		unsigned int max_pitch;
-		unsigned int pitch;
-		struct tile tile;
-
-		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
-			/*
-			 * The swizzling pattern is actually unknown as it
-			 * varies based on physical address of each page.
-			 * See i915_gem_detect_bit_6_swizzle().
-			 */
-			break;
-
-		tile.tiling = tiling;
-		switch (tiling) {
-		case I915_TILING_X:
-			tile.swizzle = i915->mm.bit_6_swizzle_x;
-			break;
-		case I915_TILING_Y:
-			tile.swizzle = i915->mm.bit_6_swizzle_y;
-			break;
-		}
-
-		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
-		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
-		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
-			continue;
-
-		if (INTEL_GEN(i915) <= 2) {
-			tile.height = 16;
-			tile.width = 128;
-			tile.size = 11;
-		} else if (tile.tiling == I915_TILING_Y &&
-			   HAS_128_BYTE_Y_TILING(i915)) {
-			tile.height = 32;
-			tile.width = 128;
-			tile.size = 12;
-		} else {
-			tile.height = 8;
-			tile.width = 512;
-			tile.size = 12;
-		}
-
-		if (INTEL_GEN(i915) < 4)
-			max_pitch = 8192 / tile.width;
-		else if (INTEL_GEN(i915) < 7)
-			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
-		else
-			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
-
-		for (pitch = max_pitch; pitch; pitch >>= 1) {
-			tile.stride = tile.width * pitch;
-			err = check_partial_mapping(obj, &tile, end);
-			if (err == -EINTR)
-				goto next_tiling;
-			if (err)
-				goto out_unlock;
-
-			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
-				tile.stride = tile.width * (pitch - 1);
-				err = check_partial_mapping(obj, &tile, end);
-				if (err == -EINTR)
-					goto next_tiling;
-				if (err)
-					goto out_unlock;
-			}
-
-			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
-				tile.stride = tile.width * (pitch + 1);
-				err = check_partial_mapping(obj, &tile, end);
-				if (err == -EINTR)
-					goto next_tiling;
-				if (err)
-					goto out_unlock;
-			}
-		}
-
-		if (INTEL_GEN(i915) >= 4) {
-			for_each_prime_number(pitch, max_pitch) {
-				tile.stride = tile.width * pitch;
-				err = check_partial_mapping(obj, &tile, end);
-				if (err == -EINTR)
-					goto next_tiling;
-				if (err)
-					goto out_unlock;
-			}
-		}
-
-next_tiling: ;
-	}
-
-out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-	i915_gem_object_unpin_pages(obj);
-out:
-	i915_gem_object_put(obj);
-	return err;
-}
-
-static int make_obj_busy(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_request *rq;
-	struct i915_vma *vma;
-	int err;
-
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err)
-		return err;
-
-	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
-	if (IS_ERR(rq)) {
-		i915_vma_unpin(vma);
-		return PTR_ERR(rq);
-	}
-
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-
-	i915_request_add(rq);
-
-	__i915_gem_object_release_unless_active(obj);
-	i915_vma_unpin(vma);
-
-	return err;
-}
-
-static bool assert_mmap_offset(struct drm_i915_private *i915,
-			       unsigned long size,
-			       int expected)
-{
-	struct drm_i915_gem_object *obj;
-	int err;
-
-	obj = i915_gem_object_create_internal(i915, size);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
-	err = i915_gem_object_create_mmap_offset(obj);
-	i915_gem_object_put(obj);
-
-	return err == expected;
-}
-
-static void disable_retire_worker(struct drm_i915_private *i915)
-{
-	i915_gem_shrinker_unregister(i915);
-
-	intel_gt_pm_get(i915);
-
-	cancel_delayed_work_sync(&i915->gem.retire_work);
-	cancel_delayed_work_sync(&i915->gem.idle_work);
-}
-
-static void restore_retire_worker(struct drm_i915_private *i915)
-{
-	intel_gt_pm_put(i915);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	igt_flush_test(i915, I915_WAIT_LOCKED);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	i915_gem_shrinker_register(i915);
-}
-
-static int igt_mmap_offset_exhaustion(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
-	struct drm_i915_gem_object *obj;
-	struct drm_mm_node resv, *hole;
-	u64 hole_start, hole_end;
-	int loop, err;
-
-	/* Disable background reaper */
-	disable_retire_worker(i915);
-	GEM_BUG_ON(!i915->gt.awake);
-
-	/* Trim the device mmap space to only a page */
-	memset(&resv, 0, sizeof(resv));
-	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
-		resv.start = hole_start;
-		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
-		err = drm_mm_reserve_node(mm, &resv);
-		if (err) {
-			pr_err("Failed to trim VMA manager, err=%d\n", err);
-			goto out_park;
-		}
-		break;
-	}
-
-	/* Just fits! */
-	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
-		pr_err("Unable to insert object into single page hole\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* Too large */
-	if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) {
-		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* Fill the hole, further allocation attempts should then fail */
-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto out;
-	}
-
-	err = i915_gem_object_create_mmap_offset(obj);
-	if (err) {
-		pr_err("Unable to insert object into reclaimed hole\n");
-		goto err_obj;
-	}
-
-	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
-		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
-		err = -EINVAL;
-		goto err_obj;
-	}
-
-	i915_gem_object_put(obj);
-
-	/* Now fill with busy dead objects that we expect to reap */
-	for (loop = 0; loop < 3; loop++) {
-		intel_wakeref_t wakeref;
-
-		if (i915_terminally_wedged(i915))
-			break;
-
-		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-		if (IS_ERR(obj)) {
-			err = PTR_ERR(obj);
-			goto out;
-		}
-
-		err = 0;
-		mutex_lock(&i915->drm.struct_mutex);
-		with_intel_runtime_pm(i915, wakeref)
-			err = make_obj_busy(obj);
-		mutex_unlock(&i915->drm.struct_mutex);
-		if (err) {
-			pr_err("[loop %d] Failed to busy the object\n", loop);
-			goto err_obj;
-		}
-
-		/* NB we rely on the _active_ reference to access obj now */
-		GEM_BUG_ON(!i915_gem_object_is_active(obj));
-		err = i915_gem_object_create_mmap_offset(obj);
-		if (err) {
-			pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n",
-			       loop, err);
-			goto out;
-		}
-	}
-
-out:
-	drm_mm_remove_node(&resv);
-out_park:
-	restore_retire_worker(i915);
-	return err;
-err_obj:
-	i915_gem_object_put(obj);
-	goto out;
-}
-
 int i915_gem_object_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
@@ -596,8 +111,6 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_gem_huge),
-		SUBTEST(igt_partial_tiling),
-		SUBTEST(igt_mmap_offset_exhaustion),
 	};
 
 	return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 6d766925ad04..bbf387de8db3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -16,6 +16,7 @@ selftest(timelines, i915_timeline_live_selftests)
 selftest(requests, i915_request_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
+selftest(mman, i915_gem_mman_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
 selftest(coherency, i915_gem_coherency_live_selftests)
 selftest(gtt, i915_gem_gtt_live_selftests)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 30/45] drm/i915: Move GEM domain management to its own file
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (27 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 29/45] drm/i915: Move mmap and friends " Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 31/45] drm/i915: Move more GEM objects under gem/ Chris Wilson
                   ` (22 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Continuing the decluttering of i915_gem.c, that of the read/write
domains, perhaps the biggest of GEM's follies?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 784 ++++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  29 +
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   4 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |   6 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c        |   8 +-
 drivers/gpu/drm/i915/i915_drv.h               |  26 -
 drivers/gpu/drm/i915/i915_gem.c               | 777 +----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |   4 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   4 +-
 .../drm/i915/selftests/i915_gem_coherency.c   |   8 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c |   8 +-
 13 files changed, 841 insertions(+), 822 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_domain.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b86af182b1ac..35561c0d80f9 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -87,6 +87,7 @@ i915-y += $(gt-y)
 # GEM (Graphics Execution Management) code
 obj-y += gem/
 gem-y += \
+	gem/i915_gem_domain.o \
 	gem/i915_gem_object.o \
 	gem/i915_gem_mman.o \
 	gem/i915_gem_pages.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
new file mode 100644
index 000000000000..eee421e3021c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -0,0 +1,784 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+#include "../i915_gem_clflush.h"
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+
+#include "../intel_frontbuffer.h"
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * We manually flush the CPU domain so that we can override and
+	 * force the flush for the display, and perform it asyncrhonously.
+	 */
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	if (obj->cache_dirty)
+		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+	obj->write_domain = 0;
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+	if (!READ_ONCE(obj->pin_global))
+		return;
+
+	mutex_lock(&obj->base.dev->struct_mutex);
+	__i915_gem_object_flush_for_display(obj);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+}
+
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_WC;
+		obj->write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Moves a single object to the GTT read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * GTT domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_GTT;
+		obj->write_domain = I915_GEM_DOMAIN_GTT;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Changes the cache-level of an object across all VMA.
+ * @obj: object to act on
+ * @cache_level: new cache level to set for the object
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	if (obj->cache_level == cache_level)
+		return 0;
+
+	/* Inspect the list of currently bound VMA and unbind any that would
+	 * be invalid given the new cache-level. This is principally to
+	 * catch the issue of the CS prefetch crossing page boundaries and
+	 * reading an invalid PTE on older architectures.
+	 */
+restart:
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		if (i915_vma_is_pinned(vma)) {
+			DRM_DEBUG("can not change the cache level of pinned objects\n");
+			return -EBUSY;
+		}
+
+		if (!i915_vma_is_closed(vma) &&
+		    i915_gem_valid_gtt_space(vma, cache_level))
+			continue;
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+
+		/* As unbinding may affect other elements in the
+		 * obj->vma_list (due to side-effects from retiring
+		 * an active vma), play safe and restart the iterator.
+		 */
+		goto restart;
+	}
+
+	/* We can reuse the existing drm_mm nodes but need to change the
+	 * cache-level on the PTE. We could simply unbind them all and
+	 * rebind with the correct cache-level on next use. However since
+	 * we already have a valid slot, dma mapping, pages etc, we may as
+	 * rewrite the PTE in the belief that doing so tramples upon less
+	 * state and so involves less work.
+	 */
+	if (obj->bind_count) {
+		/* Before we change the PTE, the GPU must not be accessing it.
+		 * If we wait upon the object, we know that all the bound
+		 * VMA are no longer active.
+		 */
+		ret = i915_gem_object_wait(obj,
+					   I915_WAIT_INTERRUPTIBLE |
+					   I915_WAIT_LOCKED |
+					   I915_WAIT_ALL,
+					   MAX_SCHEDULE_TIMEOUT);
+		if (ret)
+			return ret;
+
+		if (!HAS_LLC(to_i915(obj->base.dev)) &&
+		    cache_level != I915_CACHE_NONE) {
+			/* Access to snoopable pages through the GTT is
+			 * incoherent and on some machines causes a hard
+			 * lockup. Relinquish the CPU mmaping to force
+			 * userspace to refault in the pages and we can
+			 * then double check if the GTT mapping is still
+			 * valid for that pointer access.
+			 */
+			i915_gem_object_release_mmap(obj);
+
+			/* As we no longer need a fence for GTT access,
+			 * we can relinquish it now (and so prevent having
+			 * to steal a fence from someone else on the next
+			 * fence request). Note GPU activity would have
+			 * dropped the fence as all snoopable access is
+			 * supposed to be linear.
+			 */
+			for_each_ggtt_vma(vma, obj) {
+				ret = i915_vma_put_fence(vma);
+				if (ret)
+					return ret;
+			}
+		} else {
+			/* We either have incoherent backing store and
+			 * so no GTT access or the architecture is fully
+			 * coherent. In such cases, existing GTT mmaps
+			 * ignore the cache bit in the PTE and we can
+			 * rewrite it without confusing the GPU or having
+			 * to force userspace to fault back in its mmaps.
+			 */
+		}
+
+		list_for_each_entry(vma, &obj->vma.list, obj_link) {
+			if (!drm_mm_node_allocated(&vma->node))
+				continue;
+
+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+			if (ret)
+				return ret;
+		}
+	}
+
+	list_for_each_entry(vma, &obj->vma.list, obj_link)
+		vma->node.color = cache_level;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+	obj->cache_dirty = true; /* Always invalidate stale cachelines */
+
+	return 0;
+}
+
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	int err = 0;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	switch (obj->cache_level) {
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		args->caching = I915_CACHING_CACHED;
+		break;
+
+	case I915_CACHE_WT:
+		args->caching = I915_CACHING_DISPLAY;
+		break;
+
+	default:
+		args->caching = I915_CACHING_NONE;
+		break;
+	}
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	enum i915_cache_level level;
+	int ret = 0;
+
+	switch (args->caching) {
+	case I915_CACHING_NONE:
+		level = I915_CACHE_NONE;
+		break;
+	case I915_CACHING_CACHED:
+		/*
+		 * Due to a HW issue on BXT A stepping, GPU stores via a
+		 * snooped mapping may leave stale data in a corresponding CPU
+		 * cacheline, whereas normally such cachelines would get
+		 * invalidated.
+		 */
+		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
+			return -ENODEV;
+
+		level = I915_CACHE_LLC;
+		break;
+	case I915_CACHING_DISPLAY:
+		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * The caching mode of proxy object is handled by its generator, and
+	 * not allowed to be changed by userspace.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (obj->cache_level == level)
+		goto out;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto out;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto out;
+
+	ret = i915_gem_object_set_cache_level(obj, level);
+	mutex_unlock(&dev->struct_mutex);
+
+out:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/*
+ * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
+ * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
+ * (for pageflips). We only flush the caches while preparing the buffer for
+ * display, the callers are responsible for frontbuffer flush.
+ */
+struct i915_vma *
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	/* Mark the global pin early so that we account for the
+	 * display coherency whilst setting up the cache domains.
+	 */
+	obj->pin_global++;
+
+	/* The display engine is not coherent with the LLC cache on gen6.  As
+	 * a result, we make sure that the pinning that is about to occur is
+	 * done with uncached PTEs. This is lowest common denominator for all
+	 * chipsets.
+	 *
+	 * However for gen6+, we could do better by using the GFDT bit instead
+	 * of uncaching, which would allow us to flush all the LLC-cached data
+	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+	 */
+	ret = i915_gem_object_set_cache_level(obj,
+					      HAS_WT(to_i915(obj->base.dev)) ?
+					      I915_CACHE_WT : I915_CACHE_NONE);
+	if (ret) {
+		vma = ERR_PTR(ret);
+		goto err_unpin_global;
+	}
+
+	/* As the user may map the buffer once pinned in the display plane
+	 * (e.g. libkms for the bootup splash), we have to ensure that we
+	 * always use map_and_fenceable for all scanout buffers. However,
+	 * it may simply be too big to fit into mappable, in which case
+	 * put it anyway and hope that userspace can cope (but always first
+	 * try to preserve the existing ABI).
+	 */
+	vma = ERR_PTR(-ENOSPC);
+	if ((flags & PIN_MAPPABLE) == 0 &&
+	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+					       flags |
+					       PIN_MAPPABLE |
+					       PIN_NONBLOCK);
+	if (IS_ERR(vma))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+	if (IS_ERR(vma))
+		goto err_unpin_global;
+
+	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
+	__i915_gem_object_flush_for_display(obj);
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+
+	return vma;
+
+err_unpin_global:
+	obj->pin_global--;
+	return vma;
+}
+
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct list_head *list;
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	mutex_lock(&i915->ggtt.vm.mutex);
+	for_each_ggtt_vma(vma, obj) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
+
+	spin_lock(&i915->mm.obj_lock);
+	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
+	list_move_tail(&obj->mm.link, list);
+	spin_unlock(&i915->mm.obj_lock);
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
+{
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+	if (WARN_ON(vma->obj->pin_global == 0))
+		return;
+
+	if (--vma->obj->pin_global == 0)
+		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+
+	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
+	i915_gem_object_bump_inactive_ggtt(vma->obj);
+
+	i915_vma_unpin(vma);
+}
+
+/**
+ * Moves a single object to the CPU read, and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* Flush the CPU cache if it's still invalid. */
+	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		obj->read_domains |= I915_GEM_DOMAIN_CPU;
+	}
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're writing through the CPU, then the GPU read domains will
+	 * need to be invalidated at next use.
+	 */
+	if (write)
+		__start_cpu_write(obj);
+
+	return 0;
+}
+
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+/**
+ * Called when user space prepares to use an object with the CPU, either
+ * through the mmap ioctl's mapping or a GTT mapping.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ */
+int
+i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_set_domain *args = data;
+	struct drm_i915_gem_object *obj;
+	u32 read_domains = args->read_domains;
+	u32 write_domain = args->write_domain;
+	int err;
+
+	/* Only handle setting domains to types used by the CPU. */
+	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
+		return -EINVAL;
+
+	/*
+	 * Having something in the write domain implies it's in the read
+	 * domain, and only that read domain.  Enforce that in the request.
+	 */
+	if (write_domain && read_domains != write_domain)
+		return -EINVAL;
+
+	if (!read_domains)
+		return 0;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * Already in the desired write domain? Nothing for us to do!
+	 *
+	 * We apply a little bit of cunning here to catch a broader set of
+	 * no-ops. If obj->write_domain is set, we must be in the same
+	 * obj->read_domains, and only that domain. Therefore, if that
+	 * obj->write_domain matches the request read_domains, we are
+	 * already in the same read/write domain and can skip the operation,
+	 * without having to further check the requested write_domain.
+	 */
+	if (READ_ONCE(obj->write_domain) == read_domains) {
+		err = 0;
+		goto out;
+	}
+
+	/*
+	 * Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	err = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   (write_domain ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto out;
+
+	/*
+	 * Proxy objects do not control access to the backing storage, ergo
+	 * they cannot be used as a means to manipulate the cache domain
+	 * tracking for that backing storage. The proxy object is always
+	 * considered to be outside of any cache domain.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	/*
+	 * Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto out;
+
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
+		goto out_unpin;
+
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
+	else
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
+
+	/* And bump the LRU for this access */
+	i915_gem_object_bump_inactive_ggtt(obj);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	if (write_domain != 0)
+		intel_fb_obj_invalidate(obj,
+					fb_write_origin(obj, write_domain));
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, false);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu read domain, set ourself into the gtt
+	 * read domain and manually flush cachelines (if required). This
+	 * optimizes for the case when the gpu will dirty the data
+	 * anyway again before the next pread happens.
+	 */
+	if (!obj->cache_dirty &&
+	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
+		*needs_clflush = CLFLUSH_BEFORE;
+
+out:
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
+
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu write domain, set ourself into the
+	 * gtt write domain and manually flush cachelines (as required).
+	 * This optimizes for the case when the gpu will use the data
+	 * right away and we therefore have to clflush anyway.
+	 */
+	if (!obj->cache_dirty) {
+		*needs_clflush |= CLFLUSH_AFTER;
+
+		/*
+		 * Same trick applies to invalidate partially written
+		 * cachelines read before writing.
+		 */
+		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
+			*needs_clflush |= CLFLUSH_BEFORE;
+	}
+
+out:
+	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	obj->mm.dirty = true;
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 9bf8155d27de..980587e420e6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -15,6 +15,8 @@
 
 #include "i915_gem_object_types.h"
 
+#include "../i915_gem_gtt.h"
+
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
@@ -353,6 +355,20 @@ void
 i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 				   unsigned int flush_domains);
 
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush);
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE	BIT(0)
+#define CLFLUSH_AFTER	BIT(1)
+#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
+
+static inline void
+i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
@@ -374,6 +390,19 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
+int __must_check
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
+struct i915_vma * __must_check
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags);
+void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
+
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	if (obj->cache_dirty)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index fe66285f0236..1ee881825e38 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1760,7 +1760,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 		goto err_free_bb;
 	}
 
-	ret = i915_gem_obj_prepare_shmem_write(bb->obj, &bb->clflush);
+	ret = i915_gem_object_prepare_write(bb->obj, &bb->clflush);
 	if (ret)
 		goto err_free_obj;
 
@@ -1809,7 +1809,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 err_unmap:
 	i915_gem_object_unpin_map(bb->obj);
 err_finish_shmem_access:
-	i915_gem_obj_finish_shmem_access(bb->obj);
+	i915_gem_object_finish_access(bb->obj);
 err_free_obj:
 	i915_gem_object_put(bb->obj);
 err_free_bb:
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 8b6574e1b495..2deffb3e6ab7 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -465,7 +465,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 						bb->obj->base.size);
 				bb->clflush &= ~CLFLUSH_AFTER;
 			}
-			i915_gem_obj_finish_shmem_access(bb->obj);
+			i915_gem_object_finish_access(bb->obj);
 			bb->accessing = false;
 
 		} else {
@@ -493,7 +493,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			if (ret)
 				goto err;
 
-			i915_gem_obj_finish_shmem_access(bb->obj);
+			i915_gem_object_finish_access(bb->obj);
 			bb->accessing = false;
 
 			ret = i915_vma_move_to_active(bb->vma,
@@ -571,7 +571,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 	list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
 		if (bb->obj) {
 			if (bb->accessing)
-				i915_gem_obj_finish_shmem_access(bb->obj);
+				i915_gem_object_finish_access(bb->obj);
 
 			if (bb->va && !IS_ERR(bb->va))
 				i915_gem_object_unpin_map(bb->obj);
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index e9fadcb4d592..c893bd4eb2c8 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1058,11 +1058,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	void *dst, *src;
 	int ret;
 
-	ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
+	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
+	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
 	if (ret) {
 		dst = ERR_PTR(ret);
 		goto unpin_src;
@@ -1120,9 +1120,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
 
 unpin_dst:
-	i915_gem_obj_finish_shmem_access(dst_obj);
+	i915_gem_object_finish_access(dst_obj);
 unpin_src:
-	i915_gem_obj_finish_shmem_access(src_obj);
+	i915_gem_object_finish_access(src_obj);
 	return dst;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c4893dd9489f..72f2f04d51d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2886,20 +2886,6 @@ static inline int __sg_page_count(const struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    unsigned int *needs_clflush);
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-				     unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE	BIT(0)
-#define CLFLUSH_AFTER	BIT(1)
-#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
-
-static inline void
-i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
 static inline int __must_check
 i915_mutex_lock_interruptible(struct drm_device *dev)
 {
@@ -2962,18 +2948,6 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  const struct i915_sched_attr *attr);
 #define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
-int __must_check
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
-struct i915_vma * __must_check
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-				     u32 alignment,
-				     const struct i915_ggtt_view *view,
-				     unsigned int flags);
-void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 78d99841cf76..fc9c9776999a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -438,123 +438,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 	}
 }
 
-/*
- * Pins the specified object's pages and synchronizes the object with
- * GPU accesses. Sets needs_clflush to non-zero if the caller should
- * flush the object from the CPU cache.
- */
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    unsigned int *needs_clflush)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	*needs_clflush = 0;
-	if (!i915_gem_object_has_struct_page(obj))
-		return -ENODEV;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		ret = i915_gem_object_set_to_cpu_domain(obj, false);
-		if (ret)
-			goto err_unpin;
-		else
-			goto out;
-	}
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're not in the cpu read domain, set ourself into the gtt
-	 * read domain and manually flush cachelines (if required). This
-	 * optimizes for the case when the gpu will dirty the data
-	 * anyway again before the next pread happens.
-	 */
-	if (!obj->cache_dirty &&
-	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
-		*needs_clflush = CLFLUSH_BEFORE;
-
-out:
-	/* return with the pages pinned */
-	return 0;
-
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-	return ret;
-}
-
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-				     unsigned int *needs_clflush)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	*needs_clflush = 0;
-	if (!i915_gem_object_has_struct_page(obj))
-		return -ENODEV;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   I915_WAIT_ALL,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		ret = i915_gem_object_set_to_cpu_domain(obj, true);
-		if (ret)
-			goto err_unpin;
-		else
-			goto out;
-	}
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're not in the cpu write domain, set ourself into the
-	 * gtt write domain and manually flush cachelines (as required).
-	 * This optimizes for the case when the gpu will use the data
-	 * right away and we therefore have to clflush anyway.
-	 */
-	if (!obj->cache_dirty) {
-		*needs_clflush |= CLFLUSH_AFTER;
-
-		/*
-		 * Same trick applies to invalidate partially written
-		 * cachelines read before writing.
-		 */
-		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
-			*needs_clflush |= CLFLUSH_BEFORE;
-	}
-
-out:
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-	obj->mm.dirty = true;
-	/* return with the pages pinned */
-	return 0;
-
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-	return ret;
-}
-
 static int
 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
 	    bool needs_clflush)
@@ -588,7 +471,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
 	mutex_unlock(&obj->base.dev->struct_mutex);
 	if (ret)
 		return ret;
@@ -610,7 +493,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return ret;
 }
 
@@ -985,7 +868,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
@@ -1017,7 +900,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	}
 
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return ret;
 }
 
@@ -1106,150 +989,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
-static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct list_head *list;
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	mutex_lock(&i915->ggtt.vm.mutex);
-	for_each_ggtt_vma(vma, obj) {
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
-	}
-	mutex_unlock(&i915->ggtt.vm.mutex);
-
-	spin_lock(&i915->mm.obj_lock);
-	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
-	list_move_tail(&obj->mm.link, list);
-	spin_unlock(&i915->mm.obj_lock);
-}
-
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
-/**
- * Called when user space prepares to use an object with the CPU, either
- * through the mmap ioctl's mapping or a GTT mapping.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- */
-int
-i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file)
-{
-	struct drm_i915_gem_set_domain *args = data;
-	struct drm_i915_gem_object *obj;
-	u32 read_domains = args->read_domains;
-	u32 write_domain = args->write_domain;
-	int err;
-
-	/* Only handle setting domains to types used by the CPU. */
-	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
-		return -EINVAL;
-
-	/*
-	 * Having something in the write domain implies it's in the read
-	 * domain, and only that read domain.  Enforce that in the request.
-	 */
-	if (write_domain && read_domains != write_domain)
-		return -EINVAL;
-
-	if (!read_domains)
-		return 0;
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/*
-	 * Already in the desired write domain? Nothing for us to do!
-	 *
-	 * We apply a little bit of cunning here to catch a broader set of
-	 * no-ops. If obj->write_domain is set, we must be in the same
-	 * obj->read_domains, and only that domain. Therefore, if that
-	 * obj->write_domain matches the request read_domains, we are
-	 * already in the same read/write domain and can skip the operation,
-	 * without having to further check the requested write_domain.
-	 */
-	if (READ_ONCE(obj->write_domain) == read_domains) {
-		err = 0;
-		goto out;
-	}
-
-	/*
-	 * Try to flush the object off the GPU without holding the lock.
-	 * We will repeat the flush holding the lock in the normal manner
-	 * to catch cases where we are gazumped.
-	 */
-	err = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_PRIORITY |
-				   (write_domain ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto out;
-
-	/*
-	 * Proxy objects do not control access to the backing storage, ergo
-	 * they cannot be used as a means to manipulate the cache domain
-	 * tracking for that backing storage. The proxy object is always
-	 * considered to be outside of any cache domain.
-	 */
-	if (i915_gem_object_is_proxy(obj)) {
-		err = -ENXIO;
-		goto out;
-	}
-
-	/*
-	 * Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	err = i915_gem_object_pin_pages(obj);
-	if (err)
-		goto out;
-
-	err = i915_mutex_lock_interruptible(dev);
-	if (err)
-		goto out_unpin;
-
-	if (read_domains & I915_GEM_DOMAIN_WC)
-		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
-	else if (read_domains & I915_GEM_DOMAIN_GTT)
-		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
-	else
-		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
-
-	/* And bump the LRU for this access */
-	i915_gem_object_bump_inactive_ggtt(obj);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj,
-					fb_write_origin(obj, write_domain));
-
-out_unpin:
-	i915_gem_object_unpin_pages(obj);
-out:
-	i915_gem_object_put(obj);
-	return err;
-}
-
 /**
  * Called when user space has done writes to this buffer
  * @dev: drm device
@@ -1518,514 +1257,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	return 0;
 }
 
-static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
-{
-	/*
-	 * We manually flush the CPU domain so that we can override and
-	 * force the flush for the display, and perform it asyncrhonously.
-	 */
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-	if (obj->cache_dirty)
-		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
-	obj->write_domain = 0;
-}
-
-void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
-{
-	if (!READ_ONCE(obj->pin_global))
-		return;
-
-	mutex_lock(&obj->base.dev->struct_mutex);
-	__i915_gem_object_flush_for_display(obj);
-	mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
-/**
- * Moves a single object to the WC read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	if (obj->write_domain == I915_GEM_DOMAIN_WC)
-		return 0;
-
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
-
-	/* Serialise direct access to this object with the barriers for
-	 * coherent writes from the GPU, by effectively invalidating the
-	 * WC domain upon first access.
-	 */
-	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
-		mb();
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
-	obj->read_domains |= I915_GEM_DOMAIN_WC;
-	if (write) {
-		obj->read_domains = I915_GEM_DOMAIN_WC;
-		obj->write_domain = I915_GEM_DOMAIN_WC;
-		obj->mm.dirty = true;
-	}
-
-	i915_gem_object_unpin_pages(obj);
-	return 0;
-}
-
-/**
- * Moves a single object to the GTT read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
-		return 0;
-
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
-
-	/* Serialise direct access to this object with the barriers for
-	 * coherent writes from the GPU, by effectively invalidating the
-	 * GTT domain upon first access.
-	 */
-	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
-		mb();
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
-	obj->read_domains |= I915_GEM_DOMAIN_GTT;
-	if (write) {
-		obj->read_domains = I915_GEM_DOMAIN_GTT;
-		obj->write_domain = I915_GEM_DOMAIN_GTT;
-		obj->mm.dirty = true;
-	}
-
-	i915_gem_object_unpin_pages(obj);
-	return 0;
-}
-
-/**
- * Changes the cache-level of an object across all VMA.
- * @obj: object to act on
- * @cache_level: new cache level to set for the object
- *
- * After this function returns, the object will be in the new cache-level
- * across all GTT and the contents of the backing storage will be coherent,
- * with respect to the new cache-level. In order to keep the backing storage
- * coherent for all users, we only allow a single cache level to be set
- * globally on the object and prevent it from being changed whilst the
- * hardware is reading from the object. That is if the object is currently
- * on the scanout it will be set to uncached (or equivalent display
- * cache coherency) and all non-MOCS GPU access will also be uncached so
- * that all direct access to the scanout remains coherent.
- */
-int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
-				    enum i915_cache_level cache_level)
-{
-	struct i915_vma *vma;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (obj->cache_level == cache_level)
-		return 0;
-
-	/* Inspect the list of currently bound VMA and unbind any that would
-	 * be invalid given the new cache-level. This is principally to
-	 * catch the issue of the CS prefetch crossing page boundaries and
-	 * reading an invalid PTE on older architectures.
-	 */
-restart:
-	list_for_each_entry(vma, &obj->vma.list, obj_link) {
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		if (i915_vma_is_pinned(vma)) {
-			DRM_DEBUG("can not change the cache level of pinned objects\n");
-			return -EBUSY;
-		}
-
-		if (!i915_vma_is_closed(vma) &&
-		    i915_gem_valid_gtt_space(vma, cache_level))
-			continue;
-
-		ret = i915_vma_unbind(vma);
-		if (ret)
-			return ret;
-
-		/* As unbinding may affect other elements in the
-		 * obj->vma_list (due to side-effects from retiring
-		 * an active vma), play safe and restart the iterator.
-		 */
-		goto restart;
-	}
-
-	/* We can reuse the existing drm_mm nodes but need to change the
-	 * cache-level on the PTE. We could simply unbind them all and
-	 * rebind with the correct cache-level on next use. However since
-	 * we already have a valid slot, dma mapping, pages etc, we may as
-	 * rewrite the PTE in the belief that doing so tramples upon less
-	 * state and so involves less work.
-	 */
-	if (obj->bind_count) {
-		/* Before we change the PTE, the GPU must not be accessing it.
-		 * If we wait upon the object, we know that all the bound
-		 * VMA are no longer active.
-		 */
-		ret = i915_gem_object_wait(obj,
-					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_LOCKED |
-					   I915_WAIT_ALL,
-					   MAX_SCHEDULE_TIMEOUT);
-		if (ret)
-			return ret;
-
-		if (!HAS_LLC(to_i915(obj->base.dev)) &&
-		    cache_level != I915_CACHE_NONE) {
-			/* Access to snoopable pages through the GTT is
-			 * incoherent and on some machines causes a hard
-			 * lockup. Relinquish the CPU mmaping to force
-			 * userspace to refault in the pages and we can
-			 * then double check if the GTT mapping is still
-			 * valid for that pointer access.
-			 */
-			i915_gem_object_release_mmap(obj);
-
-			/* As we no longer need a fence for GTT access,
-			 * we can relinquish it now (and so prevent having
-			 * to steal a fence from someone else on the next
-			 * fence request). Note GPU activity would have
-			 * dropped the fence as all snoopable access is
-			 * supposed to be linear.
-			 */
-			for_each_ggtt_vma(vma, obj) {
-				ret = i915_vma_put_fence(vma);
-				if (ret)
-					return ret;
-			}
-		} else {
-			/* We either have incoherent backing store and
-			 * so no GTT access or the architecture is fully
-			 * coherent. In such cases, existing GTT mmaps
-			 * ignore the cache bit in the PTE and we can
-			 * rewrite it without confusing the GPU or having
-			 * to force userspace to fault back in its mmaps.
-			 */
-		}
-
-		list_for_each_entry(vma, &obj->vma.list, obj_link) {
-			if (!drm_mm_node_allocated(&vma->node))
-				continue;
-
-			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
-			if (ret)
-				return ret;
-		}
-	}
-
-	list_for_each_entry(vma, &obj->vma.list, obj_link)
-		vma->node.color = cache_level;
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-	obj->cache_dirty = true; /* Always invalidate stale cachelines */
-
-	return 0;
-}
-
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file)
-{
-	struct drm_i915_gem_caching *args = data;
-	struct drm_i915_gem_object *obj;
-	int err = 0;
-
-	rcu_read_lock();
-	obj = i915_gem_object_lookup_rcu(file, args->handle);
-	if (!obj) {
-		err = -ENOENT;
-		goto out;
-	}
-
-	switch (obj->cache_level) {
-	case I915_CACHE_LLC:
-	case I915_CACHE_L3_LLC:
-		args->caching = I915_CACHING_CACHED;
-		break;
-
-	case I915_CACHE_WT:
-		args->caching = I915_CACHING_DISPLAY;
-		break;
-
-	default:
-		args->caching = I915_CACHING_NONE;
-		break;
-	}
-out:
-	rcu_read_unlock();
-	return err;
-}
-
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	struct drm_i915_gem_caching *args = data;
-	struct drm_i915_gem_object *obj;
-	enum i915_cache_level level;
-	int ret = 0;
-
-	switch (args->caching) {
-	case I915_CACHING_NONE:
-		level = I915_CACHE_NONE;
-		break;
-	case I915_CACHING_CACHED:
-		/*
-		 * Due to a HW issue on BXT A stepping, GPU stores via a
-		 * snooped mapping may leave stale data in a corresponding CPU
-		 * cacheline, whereas normally such cachelines would get
-		 * invalidated.
-		 */
-		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
-			return -ENODEV;
-
-		level = I915_CACHE_LLC;
-		break;
-	case I915_CACHING_DISPLAY:
-		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/*
-	 * The caching mode of proxy object is handled by its generator, and
-	 * not allowed to be changed by userspace.
-	 */
-	if (i915_gem_object_is_proxy(obj)) {
-		ret = -ENXIO;
-		goto out;
-	}
-
-	if (obj->cache_level == level)
-		goto out;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		goto out;
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto out;
-
-	ret = i915_gem_object_set_cache_level(obj, level);
-	mutex_unlock(&dev->struct_mutex);
-
-out:
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-/*
- * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
- * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
- * (for pageflips). We only flush the caches while preparing the buffer for
- * display, the callers are responsible for frontbuffer flush.
- */
-struct i915_vma *
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-				     u32 alignment,
-				     const struct i915_ggtt_view *view,
-				     unsigned int flags)
-{
-	struct i915_vma *vma;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	/* Mark the global pin early so that we account for the
-	 * display coherency whilst setting up the cache domains.
-	 */
-	obj->pin_global++;
-
-	/* The display engine is not coherent with the LLC cache on gen6.  As
-	 * a result, we make sure that the pinning that is about to occur is
-	 * done with uncached PTEs. This is lowest common denominator for all
-	 * chipsets.
-	 *
-	 * However for gen6+, we could do better by using the GFDT bit instead
-	 * of uncaching, which would allow us to flush all the LLC-cached data
-	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
-	 */
-	ret = i915_gem_object_set_cache_level(obj,
-					      HAS_WT(to_i915(obj->base.dev)) ?
-					      I915_CACHE_WT : I915_CACHE_NONE);
-	if (ret) {
-		vma = ERR_PTR(ret);
-		goto err_unpin_global;
-	}
-
-	/* As the user may map the buffer once pinned in the display plane
-	 * (e.g. libkms for the bootup splash), we have to ensure that we
-	 * always use map_and_fenceable for all scanout buffers. However,
-	 * it may simply be too big to fit into mappable, in which case
-	 * put it anyway and hope that userspace can cope (but always first
-	 * try to preserve the existing ABI).
-	 */
-	vma = ERR_PTR(-ENOSPC);
-	if ((flags & PIN_MAPPABLE) == 0 &&
-	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-					       flags |
-					       PIN_MAPPABLE |
-					       PIN_NONBLOCK);
-	if (IS_ERR(vma))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
-	if (IS_ERR(vma))
-		goto err_unpin_global;
-
-	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
-
-	__i915_gem_object_flush_for_display(obj);
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	obj->read_domains |= I915_GEM_DOMAIN_GTT;
-
-	return vma;
-
-err_unpin_global:
-	obj->pin_global--;
-	return vma;
-}
-
-void
-i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
-{
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
-	if (WARN_ON(vma->obj->pin_global == 0))
-		return;
-
-	if (--vma->obj->pin_global == 0)
-		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
-
-	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
-	i915_gem_object_bump_inactive_ggtt(vma->obj);
-
-	i915_vma_unpin(vma);
-}
-
-/**
- * Moves a single object to the CPU read, and possibly write domain.
- * @obj: object to act on
- * @write: requesting write or read-only access
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* Flush the CPU cache if it's still invalid. */
-	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-		obj->read_domains |= I915_GEM_DOMAIN_CPU;
-	}
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're writing through the CPU, then the GPU read domains will
-	 * need to be invalidated at next use.
-	 */
-	if (write)
-		__start_cpu_write(obj);
-
-	return 0;
-}
-
 /* Throttle our rendering by waiting until the ring has completed our requests
  * emitted over 20 msec ago.
  *
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 27391300507e..fc68c9096e94 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1026,7 +1026,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 			mb();
 
 		kunmap_atomic(vaddr);
-		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
+		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
 	} else {
 		wmb();
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
@@ -1058,7 +1058,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 		unsigned int flushes;
 		int err;
 
-		err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
+		err = i915_gem_object_prepare_write(obj, &flushes);
 		if (err)
 			return ERR_PTR(err);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 9440024c763f..f3b42b026fff 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -84,7 +84,7 @@ static int render_state_setup(struct intel_render_state *so,
 	u32 *d;
 	int ret;
 
-	ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush);
+	ret = i915_gem_object_prepare_write(so->obj, &needs_clflush);
 	if (ret)
 		return ret;
 
@@ -166,7 +166,7 @@ static int render_state_setup(struct intel_render_state *so,
 
 	ret = 0;
 out:
-	i915_gem_obj_finish_shmem_access(so->obj);
+	i915_gem_object_finish_access(so->obj);
 	return ret;
 
 err:
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index ce4ec87698f6..b22b8249dfbd 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1017,7 +1017,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 	unsigned long n;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
 		return err;
 
@@ -1038,7 +1038,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		kunmap_atomic(ptr);
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index 046a38743152..cb25b5fc8027 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -37,7 +37,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+	err = i915_gem_object_prepare_write(obj, &needs_clflush);
 	if (err)
 		return err;
 
@@ -54,7 +54,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
 	kunmap_atomic(map);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return 0;
 }
@@ -69,7 +69,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+	err = i915_gem_object_prepare_read(obj, &needs_clflush);
 	if (err)
 		return err;
 
@@ -83,7 +83,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	*v = *cpu;
 
 	kunmap_atomic(map);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 43d014472ac6..d66c6c3acb2d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -354,7 +354,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	unsigned int n, m, need_flush;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);
+	err = i915_gem_object_prepare_write(obj, &need_flush);
 	if (err)
 		return err;
 
@@ -369,7 +369,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 		kunmap_atomic(map);
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
 	obj->write_domain = 0;
 	return 0;
@@ -381,7 +381,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 	unsigned int n, m, needs_flush;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
 		return err;
 
@@ -419,7 +419,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 			break;
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return err;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 31/45] drm/i915: Move more GEM objects under gem/
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (28 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 30/45] drm/i915: Move GEM domain management " Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 32/45] drm/i915: Pull scatterlist utils out of i915_gem.h Chris Wilson
                   ` (21 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Continuing the theme of separating out the GEM clutter.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 | 26 +++++------
 drivers/gpu/drm/i915/Makefile.header-test     |  2 -
 .../gpu/drm/i915/{ => gem}/i915_gem_clflush.c | 27 +++--------
 drivers/gpu/drm/i915/gem/i915_gem_clflush.h   | 20 +++++++++
 .../gpu/drm/i915/{ => gem}/i915_gem_context.c | 27 ++---------
 .../gpu/drm/i915/{ => gem}/i915_gem_context.h | 22 +--------
 .../i915/{ => gem}/i915_gem_context_types.h   |  0
 .../gpu/drm/i915/{ => gem}/i915_gem_dmabuf.c  | 28 +++---------
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  2 +-
 .../drm/i915/{ => gem}/i915_gem_execbuffer.c  | 37 ++++-----------
 .../drm/i915/{ => gem}/i915_gem_internal.c    | 33 +++++---------
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 10 ++++-
 drivers/gpu/drm/i915/{ => gem}/i915_gem_pm.c  |  2 +-
 drivers/gpu/drm/i915/{ => gem}/i915_gem_pm.h  |  0
 .../drm/i915/{ => gem}/i915_gem_shrinker.c    | 25 ++---------
 .../gpu/drm/i915/{ => gem}/i915_gem_stolen.c  | 33 ++++----------
 .../gpu/drm/i915/{ => gem}/i915_gem_tiling.c  | 31 +++----------
 .../gpu/drm/i915/{ => gem}/i915_gem_userptr.c | 30 +++----------
 drivers/gpu/drm/i915/{ => gem}/i915_gemfs.c   | 25 ++---------
 drivers/gpu/drm/i915/gem/i915_gemfs.h         | 16 +++++++
 .../{ => gem}/selftests/huge_gem_object.c     | 22 +--------
 .../drm/i915/gem/selftests/huge_gem_object.h  | 27 +++++++++++
 .../drm/i915/{ => gem}/selftests/huge_pages.c | 34 +++++---------
 .../{ => gem}/selftests/i915_gem_coherency.c  | 26 ++---------
 .../{ => gem}/selftests/i915_gem_context.c    | 40 +++++------------
 .../{ => gem}/selftests/i915_gem_dmabuf.c     | 26 ++---------
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  2 +-
 .../{ => gem}/selftests/i915_gem_object.c     | 28 +++---------
 .../i915/{ => gem}/selftests/igt_gem_utils.c  |  2 +-
 .../i915/{ => gem}/selftests/igt_gem_utils.h  |  0
 .../i915/{ => gem}/selftests/mock_context.c   | 24 ++--------
 .../gpu/drm/i915/gem/selftests/mock_context.h | 24 ++++++++++
 .../i915/{ => gem}/selftests/mock_dmabuf.c    | 22 +--------
 .../gpu/drm/i915/gem/selftests/mock_dmabuf.h  | 22 +++++++++
 .../{ => gem}/selftests/mock_gem_object.h     |  7 ++-
 drivers/gpu/drm/i915/gt/intel_context.c       |  4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  3 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  2 +
 drivers/gpu/drm/i915/gt/intel_lrc.h           | 14 +++---
 drivers/gpu/drm/i915/gt/intel_reset.c         |  2 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  3 ++
 drivers/gpu/drm/i915/gt/mock_engine.c         |  3 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  7 +--
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |  7 ++-
 .../gpu/drm/i915/gt/selftest_workarounds.c    |  6 ++-
 drivers/gpu/drm/i915/gvt/mmio_context.c       |  1 +
 drivers/gpu/drm/i915/gvt/scheduler.c          |  5 ++-
 drivers/gpu/drm/i915/i915_debugfs.c           |  2 +-
 drivers/gpu/drm/i915/i915_drv.c               |  1 +
 drivers/gpu/drm/i915/i915_drv.h               |  2 +-
 drivers/gpu/drm/i915/i915_gem.c               | 11 ++---
 drivers/gpu/drm/i915/i915_gem_clflush.h       | 36 ---------------
 drivers/gpu/drm/i915/i915_gem_evict.c         |  2 +
 drivers/gpu/drm/i915/i915_gemfs.h             | 34 --------------
 drivers/gpu/drm/i915/i915_globals.c           |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |  2 +
 drivers/gpu/drm/i915/i915_perf.c              |  2 +
 drivers/gpu/drm/i915/i915_request.c           |  3 ++
 drivers/gpu/drm/i915/intel_display.c          |  1 -
 drivers/gpu/drm/i915/intel_guc_submission.c   |  2 +
 drivers/gpu/drm/i915/intel_overlay.c          |  2 +
 .../gpu/drm/i915/selftests/huge_gem_object.h  | 45 -------------------
 drivers/gpu/drm/i915/selftests/i915_active.c  |  2 +
 drivers/gpu/drm/i915/selftests/i915_gem.c     |  6 ++-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  6 ++-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  3 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |  5 ++-
 .../gpu/drm/i915/selftests/i915_timeline.c    |  1 +
 drivers/gpu/drm/i915/selftests/i915_vma.c     |  3 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  2 +
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |  3 +-
 drivers/gpu/drm/i915/selftests/igt_spinner.h  |  2 +-
 drivers/gpu/drm/i915/selftests/intel_guc.c    |  1 +
 drivers/gpu/drm/i915/selftests/mock_context.h | 42 -----------------
 drivers/gpu/drm/i915/selftests/mock_dmabuf.h  | 41 -----------------
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  5 ++-
 drivers/gpu/drm/i915/selftests/mock_request.c |  2 +-
 77 files changed, 338 insertions(+), 692 deletions(-)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_clflush.c (77%)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_clflush.h
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context.c (98%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context.h (85%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context_types.h (100%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_dmabuf.c (86%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_execbuffer.c (98%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_internal.c (81%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_pm.c (99%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_pm.h (100%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_shrinker.c (93%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_stolen.c (93%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_tiling.c (90%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_userptr.c (94%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gemfs.c (50%)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gemfs.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/huge_gem_object.c (70%)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/huge_pages.c (97%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_coherency.c (87%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_context.c (96%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_dmabuf.c (87%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_object.c (61%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/igt_gem_utils.c (95%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/igt_gem_utils.h (100%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/mock_context.c (63%)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_context.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/mock_dmabuf.c (73%)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/mock_gem_object.h (65%)
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.h
 delete mode 100644 drivers/gpu/drm/i915/i915_gemfs.h
 delete mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.h
 delete mode 100644 drivers/gpu/drm/i915/selftests/mock_context.h
 delete mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 35561c0d80f9..cfefb544b613 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -87,33 +87,33 @@ i915-y += $(gt-y)
 # GEM (Graphics Execution Management) code
 obj-y += gem/
 gem-y += \
+	gem/i915_gem_clflush.o \
+	gem/i915_gem_context.o \
+	gem/i915_gem_dmabuf.o \
 	gem/i915_gem_domain.o \
+	gem/i915_gem_execbuffer.o \
+	gem/i915_gem_internal.o \
 	gem/i915_gem_object.o \
 	gem/i915_gem_mman.o \
 	gem/i915_gem_pages.o \
 	gem/i915_gem_phys.o \
-	gem/i915_gem_shmem.o
+	gem/i915_gem_pm.o \
+	gem/i915_gem_shmem.o \
+	gem/i915_gem_shrinker.o \
+	gem/i915_gem_stolen.o \
+	gem/i915_gem_tiling.o \
+	gem/i915_gem_userptr.o \
+	gem/i915_gemfs.o
 i915-y += \
 	  $(gem-y) \
 	  i915_active.o \
 	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
-	  i915_gem_clflush.o \
-	  i915_gem_context.o \
-	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
-	  i915_gem_execbuffer.o \
 	  i915_gem_fence_reg.o \
 	  i915_gem_gtt.o \
-	  i915_gem_internal.o \
 	  i915_gem.o \
-	  i915_gem_pm.o \
 	  i915_gem_render_state.o \
-	  i915_gem_shrinker.o \
-	  i915_gem_stolen.o \
-	  i915_gem_tiling.o \
-	  i915_gem_userptr.o \
-	  i915_gemfs.o \
 	  i915_globals.o \
 	  i915_query.o \
 	  i915_request.o \
@@ -198,10 +198,10 @@ i915-y += dvo_ch7017.o \
 # Post-mortem debug and GPU hang state capture
 i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
 i915-$(CONFIG_DRM_I915_SELFTEST) += \
+	gem/selftests/igt_gem_utils.o \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o \
 	selftests/igt_flush_test.o \
-	selftests/igt_gem_utils.o \
 	selftests/igt_live_test.o \
 	selftests/igt_reset.o \
 	selftests/igt_spinner.o
diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
index 702e3a7ade4c..735f81a41514 100644
--- a/drivers/gpu/drm/i915/Makefile.header-test
+++ b/drivers/gpu/drm/i915/Makefile.header-test
@@ -4,8 +4,6 @@
 # Test the headers are compilable as standalone units
 header_test := \
 	i915_active_types.h \
-	i915_gem_context_types.h \
-	i915_gem_pm.h \
 	i915_priolist_types.h \
 	i915_scheduler_types.h \
 	i915_timeline_types.h \
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
similarity index 77%
rename from drivers/gpu/drm/i915/i915_gem_clflush.c
rename to drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 8e74c23cbd91..093bfff55a96 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -1,31 +1,14 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "i915_drv.h"
-#include "intel_frontbuffer.h"
 #include "i915_gem_clflush.h"
 
+#include "../i915_drv.h"
+#include "../intel_frontbuffer.h"
+
 static DEFINE_SPINLOCK(clflush_lock);
 
 struct clflush {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
new file mode 100644
index 000000000000..e6c382973129
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CLFLUSH_H__
+#define __I915_GEM_CLFLUSH_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
+#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
similarity index 98%
rename from drivers/gpu/drm/i915/i915_gem_context.c
rename to drivers/gpu/drm/i915/gem/i915_gem_context.c
index cbba8f4bd786..378040732797 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1,28 +1,7 @@
 /*
- * Copyright © 2011-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Ben Widawsky <ben@bwidawsk.net>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2011-2012 Intel Corporation
  */
 
 /*
@@ -92,7 +71,7 @@
 
 #include "gt/intel_lrc_reg.h"
 
-#include "i915_drv.h"
+#include "i915_gem_context.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
similarity index 85%
rename from drivers/gpu/drm/i915/i915_gem_context.h
rename to drivers/gpu/drm/i915/gem/i915_gem_context.h
index 9ad4a6362438..630392c77e48 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #ifndef __I915_GEM_CONTEXT_H__
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
similarity index 100%
rename from drivers/gpu/drm/i915/i915_gem_context_types.h
rename to drivers/gpu/drm/i915/gem/i915_gem_context_types.h
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
similarity index 86%
rename from drivers/gpu/drm/i915/i915_gem_dmabuf.c
rename to drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 5a101a9462d8..9b75dd8c267d 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -1,34 +1,16 @@
 /*
- * Copyright 2012 Red Hat Inc
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
+ * SPDX-License-Identifier: MIT
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *	Dave Airlie <airlied@redhat.com>
+ * Copyright 2012 Red Hat Inc
  */
 
 #include <linux/dma-buf.h>
+#include <linux/highmem.h>
 #include <linux/reservation.h>
 
+#include "i915_gem_object.h"
 
-#include "i915_drv.h"
+#include "../i915_drv.h"
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index eee421e3021c..5e1429f8e910 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -4,11 +4,11 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
+#include "i915_gem_clflush.h"
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
-#include "../i915_gem_clflush.h"
 #include "../i915_gem_gtt.h"
 #include "../i915_vma.h"
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
similarity index 98%
rename from drivers/gpu/drm/i915/i915_gem_execbuffer.c
rename to drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index fc68c9096e94..a2761029f0c3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1,29 +1,7 @@
 /*
- * Copyright © 2008,2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Chris Wilson <chris@chris-wilson.co.uk>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008,2010 Intel Corporation
  */
 
 #include <linux/intel-iommu.h>
@@ -35,13 +13,16 @@
 #include <drm/i915_drm.h>
 
 #include "gem/i915_gem_ioctls.h"
+#include "gt/intel_context.h"
 #include "gt/intel_gt_pm.h"
 
-#include "i915_drv.h"
+#include "i915_gem_ioctls.h"
 #include "i915_gem_clflush.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
+#include "i915_gem_context.h"
+
+#include "../i915_trace.h"
+#include "../intel_drv.h"
+#include "../intel_frontbuffer.h"
 
 enum {
 	FORCE_CPU_RELOC = 1,
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
similarity index 81%
rename from drivers/gpu/drm/i915/i915_gem_internal.c
rename to drivers/gpu/drm/i915/gem/i915_gem_internal.c
index ab627ed1269c..d40adb3bbe29 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -1,29 +1,20 @@
 /*
- * Copyright © 2014-2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2014-2016 Intel Corporation
  */
 
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/swiotlb.h>
+
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+#include "../i915_gem.h"
+#include "../i915_utils.h"
 
 #define QUIET (__GFP_NORETRY | __GFP_NOWARN)
 #define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 005e17b3acce..04928ae415eb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,11 +22,12 @@
  *
  */
 
+#include "i915_gem_context.h"
+#include "i915_gem_clflush.h"
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
 #include "../i915_globals.h"
-#include "../i915_gem_clflush.h"
 #include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
@@ -443,3 +444,10 @@ int __init i915_global_objects_init(void)
 	i915_global_register(&global.base);
 	return 0;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/huge_gem_object.c"
+#include "selftests/huge_pages.c"
+#include "selftests/i915_gem_object.c"
+#include "selftests/i915_gem_coherency.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
similarity index 99%
rename from drivers/gpu/drm/i915/i915_gem_pm.c
rename to drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3554d55dae35..052cafd96223 100644
--- a/drivers/gpu/drm/i915/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -4,10 +4,10 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "gem/i915_gem_pm.h"
 #include "gt/intel_gt_pm.h"
 
 #include "i915_drv.h"
-#include "i915_gem_pm.h"
 #include "i915_globals.h"
 
 static void i915_gem_park(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
similarity index 100%
rename from drivers/gpu/drm/i915/i915_gem_pm.h
rename to drivers/gpu/drm/i915/gem/i915_gem_pm.h
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
similarity index 93%
rename from drivers/gpu/drm/i915/i915_gem_shrinker.c
rename to drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 2c7aefb3e101..d551b78af65d 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2008-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008-2015 Intel Corporation
  */
 
 #include <linux/oom.h>
@@ -32,8 +14,7 @@
 #include <linux/vmalloc.h>
 #include <drm/i915_drm.h>
 
-#include "i915_drv.h"
-#include "i915_trace.h"
+#include "../i915_trace.h"
 
 static bool shrinker_lock(struct drm_i915_private *i915,
 			  unsigned int flags,
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
similarity index 93%
rename from drivers/gpu/drm/i915/i915_gem_stolen.c
rename to drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 0a8082cfc761..b0961f2c568b 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -1,33 +1,16 @@
 /*
- * Copyright © 2008-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Chris Wilson <chris@chris-wilson.co.uk>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008-2012 Intel Corporation
  */
 
+#include <linux/errno.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_mm.h>
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
+
+#include "../i915_drv.h"
 
 /*
  * The BIOS typically reserves some of the system's memory for the exclusive
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
similarity index 90%
rename from drivers/gpu/drm/i915/i915_gem_tiling.c
rename to drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index 86d6d92ccbc9..90f6305a8aac 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -1,37 +1,18 @@
 /*
- * Copyright © 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008 Intel Corporation
  */
 
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_ioctls.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
 
-#include "i915_drv.h"
+#include "../i915_drv.h"
+#include "../i915_gem.h"
 
 /**
  * DOC: buffer object tiling
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
similarity index 94%
rename from drivers/gpu/drm/i915/i915_gem_userptr.c
rename to drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index b9c753f413da..007a2b8cac8b 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2012-2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2012-2014 Intel Corporation
  */
 
 #include <linux/mmu_context.h>
@@ -30,11 +12,11 @@
 
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_ioctls.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
 
-#include "i915_drv.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
+#include "../i915_trace.h"
+#include "../intel_drv.h"
 
 struct i915_mm_struct {
 	struct mm_struct *mm;
diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
similarity index 50%
rename from drivers/gpu/drm/i915/i915_gemfs.c
rename to drivers/gpu/drm/i915/gem/i915_gemfs.c
index 888b7d3f04c3..fecdbc19def4 100644
--- a/drivers/gpu/drm/i915/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -1,34 +1,17 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 
-#include "i915_drv.h"
 #include "i915_gemfs.h"
 
+#include "../i915_drv.h"
+
 int i915_gemfs_init(struct drm_i915_private *i915)
 {
 	struct file_system_type *type;
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h
new file mode 100644
index 000000000000..2a1e59af3e4a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h
@@ -0,0 +1,16 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#ifndef __I915_GEMFS_H__
+#define __I915_GEMFS_H__
+
+struct drm_i915_private;
+
+int i915_gemfs_init(struct drm_i915_private *i915);
+
+void i915_gemfs_fini(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
similarity index 70%
rename from drivers/gpu/drm/i915/selftests/huge_gem_object.c
rename to drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 419fd4d6a8f0..824f3761314c 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "huge_gem_object.h"
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
new file mode 100644
index 000000000000..549c1394bcdc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __HUGE_GEM_OBJECT_H
+#define __HUGE_GEM_OBJECT_H
+
+struct drm_i915_gem_object *
+huge_gem_object(struct drm_i915_private *i915,
+		phys_addr_t phys_size,
+		dma_addr_t dma_size);
+
+static inline phys_addr_t
+huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
+{
+	return obj->scratch;
+}
+
+static inline dma_addr_t
+huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
+{
+	return obj->base.size;
+}
+
+#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
similarity index 97%
rename from drivers/gpu/drm/i915/selftests/huge_pages.c
rename to drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index b22b8249dfbd..88ca4ed430ee 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1,34 +1,20 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
-#include "../i915_selftest.h"
-
 #include <linux/prime_numbers.h>
 
+#include "../../i915_selftest.h"
+#include "../i915_gem_pm.h"
+
 #include "igt_gem_utils.h"
-#include "mock_drm.h"
-#include "i915_random.h"
+#include "mock_context.h"
+
+#include "../../selftests/mock_drm.h"
+#include "../../selftests/mock_gem_device.h"
+#include "../../selftests/i915_random.h"
 
 static const unsigned int page_sizes[] = {
 	I915_GTT_PAGE_SIZE_2M,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
similarity index 87%
rename from drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index cb25b5fc8027..b6613caa6f45 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -1,31 +1,13 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_selftest.h"
-#include "i915_random.h"
+#include "../../i915_selftest.h"
+#include "../../selftests/i915_random.h"
 
 static int cpu_set(struct drm_i915_gem_object *obj,
 		   unsigned long offset,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
similarity index 96%
rename from drivers/gpu/drm/i915/selftests/i915_gem_context.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index d66c6c3acb2d..39be30b9ef7b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1,42 +1,26 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/prime_numbers.h>
 
+#include "gem/i915_gem_pm.h"
 #include "gt/intel_reset.h"
 #include "i915_selftest.h"
 
-#include "i915_random.h"
-#include "igt_flush_test.h"
-#include "igt_gem_utils.h"
-#include "igt_live_test.h"
-#include "igt_reset.h"
-#include "igt_spinner.h"
+#include "gem/selftests/igt_gem_utils.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_live_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/mock_drm.h"
+#include "selftests/mock_gem_device.h"
 
-#include "mock_drm.h"
-#include "mock_gem_device.h"
 #include "huge_gem_object.h"
+#include "igt_gem_utils.h"
 
 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
similarity index 87%
rename from drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index cc65a503e2f0..fc4dd92f4496 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -1,30 +1,12 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include "../../i915_selftest.h"
 
-#include "mock_gem_device.h"
+#include "../../selftests/mock_gem_device.h"
 #include "mock_dmabuf.h"
 
 static int igt_dmabuf_export(void *arg)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 031e7ad875e3..b3ee4ee08b18 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -7,8 +7,8 @@
 #include <linux/prime_numbers.h>
 
 #include "gt/intel_gt_pm.h"
+#include "huge_gem_object.h"
 #include "i915_selftest.h"
-#include "selftests/huge_gem_object.h"
 #include "selftests/igt_flush_test.h"
 
 struct tile {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
similarity index 61%
rename from drivers/gpu/drm/i915/selftests/i915_gem_object.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
index a3dd2f1be95b..4266ad03ee47 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -1,32 +1,14 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include "../../i915_selftest.h"
 
-#include "igt_flush_test.h"
-#include "mock_gem_device.h"
 #include "huge_gem_object.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_gem_device.h"
 
 static int igt_gem_object(void *arg)
 {
diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
similarity index 95%
rename from drivers/gpu/drm/i915/selftests/igt_gem_utils.c
rename to drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index 16891b1a3e50..b96713ed13e8 100644
--- a/drivers/gpu/drm/i915/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -7,10 +7,10 @@
 #include "igt_gem_utils.h"
 
 #include "gt/intel_context.h"
+#include "i915_request.h"
 
 #include "../i915_gem_context.h"
 #include "../i915_gem_pm.h"
-#include "../i915_request.h"
 
 struct i915_request *
 igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
similarity index 100%
rename from drivers/gpu/drm/i915/selftests/igt_gem_utils.h
rename to drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
similarity index 63%
rename from drivers/gpu/drm/i915/selftests/mock_context.c
rename to drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 10e67c931ed1..d373d7238622 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -1,29 +1,11 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "mock_context.h"
-#include "mock_gtt.h"
+#include "../../selftests/mock_gtt.h"
 
 struct i915_gem_context *
 mock_context(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
new file mode 100644
index 000000000000..0b926653914f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_CONTEXT_H
+#define __MOCK_CONTEXT_H
+
+void mock_init_contexts(struct drm_i915_private *i915);
+
+struct i915_gem_context *
+mock_context(struct drm_i915_private *i915,
+	     const char *name);
+
+void mock_context_close(struct i915_gem_context *ctx);
+
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct drm_file *file);
+
+struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
+void kernel_context_close(struct i915_gem_context *ctx);
+
+#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
similarity index 73%
rename from drivers/gpu/drm/i915/selftests/mock_dmabuf.c
rename to drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
index ca682caf1062..b9e059d4328a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "mock_dmabuf.h"
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
new file mode 100644
index 000000000000..f0f8bbd82dfc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_DMABUF_H__
+#define __MOCK_DMABUF_H__
+
+#include <linux/dma-buf.h>
+
+struct mock_dmabuf {
+	int npages;
+	struct page *pages[];
+};
+
+static struct mock_dmabuf *to_mock(struct dma_buf *buf)
+{
+	return buf->priv;
+}
+
+#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
similarity index 65%
rename from drivers/gpu/drm/i915/selftests/mock_gem_object.h
rename to drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
index 20acdbee7bd0..370360b4a148 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
@@ -1,4 +1,9 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
 #ifndef __MOCK_GEM_OBJECT_H__
 #define __MOCK_GEM_OBJECT_H__
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 1f1761fc6597..7e2b18ddda19 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -4,8 +4,10 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+
 #include "i915_drv.h"
-#include "i915_gem_context.h"
 #include "i915_globals.h"
 
 #include "intel_context.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index b2cd6c6785be..25a1328d9d27 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -24,10 +24,13 @@
 
 #include <drm/drm_print.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
+#include "intel_context.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a833b6c14516..524de9eaa44c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -133,6 +133,8 @@
  */
 #include <linux/interrupt.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "i915_vgpu.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index e029aee87adf..c2bba82bcc16 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -24,7 +24,15 @@
 #ifndef _INTEL_LRC_H_
 #define _INTEL_LRC_H_
 
-#include "intel_engine.h"
+#include <linux/types.h>
+
+struct drm_printer;
+
+struct drm_i915_private;
+struct i915_gem_context;
+struct i915_request;
+struct intel_context;
+struct intel_engine_cs;
 
 /* Execlists regs */
 #define RING_ELSP(base)				_MMIO((base) + 0x230)
@@ -96,10 +104,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine);
  */
 #define LRC_HEADER_PAGES LRC_PPHWSP_PN
 
-struct drm_printer;
-
-struct drm_i915_private;
-
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
 void intel_lr_context_reset(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 3424d28650af..59baa20a3299 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -7,6 +7,8 @@
 #include <linux/sched/mm.h>
 #include <linux/stop_machine.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
 #include "intel_engine_pm.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 0b503b82db0f..8faa9bd9edc2 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -31,9 +31,12 @@
 
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "i915_trace.h"
+#include "intel_context.h"
 #include "intel_reset.h"
 #include "intel_workarounds.h"
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 2941916b37bf..6d7562769eb2 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -22,8 +22,9 @@
  *
  */
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
-#include "i915_gem_context.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index dab3d30c9c73..4b4589b92521 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -24,18 +24,19 @@
 
 #include <linux/kthread.h>
 
+#include "gem/i915_gem_context.h"
 #include "intel_engine_pm.h"
 
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
 #include "selftests/igt_flush_test.h"
-#include "selftests/igt_gem_utils.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_wedge_me.h"
-
-#include "selftests/mock_context.h"
 #include "selftests/mock_drm.h"
 
+#include "gem/selftests/mock_context.h"
+#include "gem/selftests/igt_gem_utils.h"
+
 #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
 
 struct hang {
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 68d10038f3c9..fb32b265a49e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -6,15 +6,18 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gem/i915_gem_pm.h"
 #include "gt/intel_reset.h"
+
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
 #include "selftests/igt_flush_test.h"
-#include "selftests/igt_gem_utils.h"
 #include "selftests/igt_live_test.h"
 #include "selftests/igt_spinner.h"
 #include "selftests/lib_sw_fence.h"
-#include "selftests/mock_context.h"
+
+#include "gem/selftests/igt_gem_utils.h"
+#include "gem/selftests/mock_context.h"
 
 static int live_sanitycheck(void *arg)
 {
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 9f7680b9984b..40b6df911d8d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -4,17 +4,19 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include "gem/i915_gem_pm.h"
 #include "i915_selftest.h"
 #include "intel_reset.h"
 
 #include "selftests/igt_flush_test.h"
-#include "selftests/igt_gem_utils.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_spinner.h"
 #include "selftests/igt_wedge_me.h"
-#include "selftests/mock_context.h"
 #include "selftests/mock_drm.h"
 
+#include "gem/selftests/igt_gem_utils.h"
+#include "gem/selftests/mock_context.h"
+
 static const struct wo_register {
 	enum intel_platform platform;
 	u32 reg;
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index b8823495022b..40af340f6863 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -34,6 +34,7 @@
  */
 
 #include "i915_drv.h"
+#include "gt/intel_context.h"
 #include "gvt.h"
 #include "trace.h"
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 2deffb3e6ab7..c9d3b52d776f 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -35,8 +35,11 @@
 
 #include <linux/kthread.h>
 
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+
 #include "i915_drv.h"
-#include "i915_gem_pm.h"
 #include "gvt.h"
 
 #define RING_CTX_OFF(x) \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 466becbb99c6..b2a77f2f777f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,9 +32,9 @@
 #include <drm/drm_debugfs.h>
 #include <drm/drm_fourcc.h>
 
+#include "gem/i915_gem_context.h"
 #include "gt/intel_reset.h"
 
-#include "i915_gem_context.h"
 #include "intel_dp.h"
 #include "intel_drv.h"
 #include "intel_fbc.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index cd8d0bd38367..cab18ab9e19f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,6 +47,7 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_reset.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 72f2f04d51d9..65f474a824fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -78,7 +78,7 @@
 #include "intel_wopcm.h"
 
 #include "i915_gem.h"
-#include "i915_gem_context.h"
+#include "gem/i915_gem_context_types.h"
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc9c9776999a..343a8a837363 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,7 +38,11 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include "gem/i915_gem_clflush.h"
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
+#include "gem/i915_gem_pm.h"
+#include "gem/i915_gemfs.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_mocs.h"
@@ -46,9 +50,6 @@
 #include "gt/intel_workarounds.h"
 
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_gemfs.h"
-#include "i915_gem_pm.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
@@ -2343,9 +2344,5 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
-#include "selftests/huge_gem_object.c"
-#include "selftests/huge_pages.c"
-#include "selftests/i915_gem_object.c"
-#include "selftests/i915_gem_coherency.c"
 #include "selftests/i915_gem.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
deleted file mode 100644
index f390247561b3..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __I915_GEM_CLFLUSH_H__
-#define __I915_GEM_CLFLUSH_H__
-
-struct drm_i915_private;
-struct drm_i915_gem_object;
-
-bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-			     unsigned int flags);
-#define I915_CLFLUSH_FORCE BIT(0)
-#define I915_CLFLUSH_SYNC BIT(1)
-
-#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 0bdb3e072ba5..a5783c4cb98b 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -28,6 +28,8 @@
 
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "i915_trace.h"
diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h
deleted file mode 100644
index cca8bdc5b93e..000000000000
--- a/drivers/gpu/drm/i915/i915_gemfs.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __I915_GEMFS_H__
-#define __I915_GEMFS_H__
-
-struct drm_i915_private;
-
-int i915_gemfs_init(struct drm_i915_private *i915);
-
-void i915_gemfs_fini(struct drm_i915_private *i915);
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index db52a58eadcc..2d5fcba98841 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -8,7 +8,7 @@
 #include <linux/workqueue.h>
 
 #include "i915_active.h"
-#include "i915_gem_context.h"
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_object.h"
 #include "i915_globals.h"
 #include "i915_request.h"
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f51ff683dd2e..fb424c8d7262 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -36,6 +36,8 @@
 
 #include <drm/drm_print.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_gpu_error.h"
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 379fd89a180f..2e33a9b4eae7 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -195,6 +195,8 @@
 #include <linux/sizes.h>
 #include <linux/uuid.h>
 
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
 #include "gt/intel_lrc_reg.h"
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 55357b4a5fff..dc5bce2c0504 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,9 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/signal.h>
 
+#include "gem/i915_gem_context.h"
+#include "gt/intel_context.h"
+
 #include "i915_active.h"
 #include "i915_drv.h"
 #include "i915_globals.h"
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c3d1d38ccf4d..78c3587ebabf 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -45,7 +45,6 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
 #include "i915_trace.h"
 #include "intel_atomic_plane.h"
 #include "intel_color.h"
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index ed94001028f2..16226b2ab608 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -26,6 +26,8 @@
 #include <trace/events/dma_fence.h>
 
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_context.h"
+#include "gem/i915_gem_context.h"
 
 #include "intel_guc_submission.h"
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 5c496b11ab5c..81a0db52c588 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -28,6 +28,8 @@
 #include <drm/i915_drm.h>
 #include <drm/drm_fourcc.h>
 
+#include "gem/i915_gem_pm.h"
+
 #include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_drv.h"
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h
deleted file mode 100644
index a6133a9e8029..000000000000
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __HUGE_GEM_OBJECT_H
-#define __HUGE_GEM_OBJECT_H
-
-struct drm_i915_gem_object *
-huge_gem_object(struct drm_i915_private *i915,
-		phys_addr_t phys_size,
-		dma_addr_t dma_size);
-
-static inline phys_addr_t
-huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
-{
-	return obj->scratch;
-}
-
-static inline dma_addr_t
-huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
-{
-	return obj->base.size;
-}
-
-#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index eee838dc0634..2af2b0ba9147 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -9,6 +9,8 @@
 #include "igt_flush_test.h"
 #include "lib_sw_fence.h"
 
+#include "../gem/i915_gem_pm.h"
+
 struct live_active {
 	struct i915_active base;
 	bool retired;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index c6a9bff85311..96221972c3f8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -8,9 +8,11 @@
 
 #include "../i915_selftest.h"
 
-#include "igt_gem_utils.h"
 #include "igt_flush_test.h"
-#include "mock_context.h"
+#include "mock_drm.h"
+
+#include "../gem/selftests/igt_gem_utils.h"
+#include "../gem/selftests/mock_context.h"
 
 static int switch_to_context(struct drm_i915_private *i915,
 			     struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 4fc6e5445dd1..f0c320694277 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -24,12 +24,14 @@
 
 #include "../i915_selftest.h"
 
-#include "igt_gem_utils.h"
 #include "lib_sw_fence.h"
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+#include "../gem/i915_gem_pm.h"
+#include "../gem/selftests/igt_gem_utils.h"
+#include "../gem/selftests/mock_context.h"
+
 static void quirk_add(struct drm_i915_gem_object *obj,
 		      struct list_head *objects)
 {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 9cca66e4420a..e1c8e34b8405 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -28,10 +28,11 @@
 #include "../i915_selftest.h"
 #include "i915_random.h"
 
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static void cleanup_freed_objects(struct drm_i915_private *i915)
 {
 	/*
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 8fe9a43c99d9..66c79965bde8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -29,10 +29,13 @@
 #include "igt_live_test.h"
 #include "lib_sw_fence.h"
 
-#include "mock_context.h"
+#include "gem/i915_gem_pm.h"
+
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static int igt_add_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index ff9ebe50fae8..82c9be5be92f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -6,6 +6,7 @@
 
 #include <linux/prime_numbers.h>
 
+#include "../gem/i915_gem_pm.h"
 #include "../i915_selftest.h"
 #include "i915_random.h"
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index fc594b030f5a..131d0d9a6cd1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -27,9 +27,10 @@
 #include "../i915_selftest.h"
 
 #include "mock_gem_device.h"
-#include "mock_context.h"
 #include "mock_gtt.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static bool assert_vma(struct i915_vma *vma,
 		       struct drm_i915_gem_object *obj,
 		       struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index e42f3c58536a..34af282c3c93 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -9,6 +9,8 @@
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
 
+#include "../gem/i915_gem_context.h"
+
 int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 {
 	int ret = i915_terminally_wedged(i915) ? -EIO : 0;
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index ece8a8a0d3b0..45688e6698b7 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -4,9 +4,10 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "igt_gem_utils.h"
 #include "igt_spinner.h"
 
+#include "../gem/selftests/igt_gem_utils.h"
+
 int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915)
 {
 	unsigned int mode;
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h
index d312e7cdab68..2b10c78da26b 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.h
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h
@@ -9,11 +9,11 @@
 
 #include "../i915_selftest.h"
 
+#include "gem/i915_gem_context.h"
 #include "gt/intel_engine.h"
 
 #include "../i915_drv.h"
 #include "../i915_request.h"
-#include "../i915_gem_context.h"
 
 struct igt_spinner {
 	struct drm_i915_private *i915;
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
index b05a21eaa8f4..f7eb483f9eae 100644
--- a/drivers/gpu/drm/i915/selftests/intel_guc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -23,6 +23,7 @@
  */
 
 #include "../i915_selftest.h"
+#include "../gem/i915_gem_pm.h"
 
 /* max doorbell number + negative test for each client type */
 #define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM)
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h
deleted file mode 100644
index 29b9d60a158b..000000000000
--- a/drivers/gpu/drm/i915/selftests/mock_context.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __MOCK_CONTEXT_H
-#define __MOCK_CONTEXT_H
-
-void mock_init_contexts(struct drm_i915_private *i915);
-
-struct i915_gem_context *
-mock_context(struct drm_i915_private *i915,
-	     const char *name);
-
-void mock_context_close(struct i915_gem_context *ctx);
-
-struct i915_gem_context *
-live_context(struct drm_i915_private *i915, struct drm_file *file);
-
-struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
-void kernel_context_close(struct i915_gem_context *ctx);
-
-#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h
deleted file mode 100644
index ec80613159b9..000000000000
--- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __MOCK_DMABUF_H__
-#define __MOCK_DMABUF_H__
-
-#include <linux/dma-buf.h>
-
-struct mock_dmabuf {
-	int npages;
-	struct page *pages[];
-};
-
-static struct mock_dmabuf *to_mock(struct dma_buf *buf)
-{
-	return buf->priv;
-}
-
-#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index e4033d0576c4..576bb11b7858 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -27,13 +27,14 @@
 
 #include "gt/mock_engine.h"
 
-#include "mock_context.h"
 #include "mock_request.h"
 #include "mock_gem_device.h"
-#include "mock_gem_object.h"
 #include "mock_gtt.h"
 #include "mock_uncore.h"
 
+#include "gem/selftests/mock_context.h"
+#include "gem/selftests/mock_gem_object.h"
+
 void mock_device_flush(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index b99f7576153c..9390fc09984b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -22,9 +22,9 @@
  *
  */
 
+#include "gem/selftests/igt_gem_utils.h"
 #include "gt/mock_engine.h"
 
-#include "igt_gem_utils.h"
 #include "mock_request.h"
 
 struct i915_request *
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 32/45] drm/i915: Pull scatterlist utils out of i915_gem.h
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (29 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 31/45] drm/i915: Move more GEM objects under gem/ Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 33/45] lockdep: Swap storage for pin_count and refereneces Chris Wilson
                   ` (20 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Out scatterlist utility routines can be pulled out of i915_gem.h for a
bit more decluttering.

v2: Push I915_GTT_PAGE_SIZE out of i915_scatterlist itself and into the
caller.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_phys.c      |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |   1 +
 .../drm/i915/gem/selftests/huge_gem_object.c  |   2 +
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |   2 +
 drivers/gpu/drm/i915/i915_drv.h               | 110 ---------------
 drivers/gpu/drm/i915/i915_gem.c               |  30 +----
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |   2 +
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           |   4 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |   1 +
 drivers/gpu/drm/i915/i915_scatterlist.c       |  39 ++++++
 drivers/gpu/drm/i915/i915_scatterlist.h       | 127 ++++++++++++++++++
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   1 +
 drivers/gpu/drm/i915/selftests/scatterlist.c  |   1 +
 19 files changed, 188 insertions(+), 141 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_scatterlist.c
 create mode 100644 drivers/gpu/drm/i915/i915_scatterlist.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cfefb544b613..8e70d5972195 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -44,6 +44,7 @@ i915-y += i915_drv.o \
 	  i915_irq.o \
 	  i915_params.o \
 	  i915_pci.o \
+	  i915_scatterlist.o \
 	  i915_suspend.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 9b75dd8c267d..4e7efe159531 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -11,6 +11,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index d40adb3bbe29..d072d0cbce06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -14,6 +14,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_gem.h"
+#include "../i915_scatterlist.h"
 #include "../i915_utils.h"
 
 #define QUIET (__GFP_NORETRY | __GFP_NOWARN)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 452aba0a3359..21635d0ac95e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -7,6 +7,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 1bf3e0afcba2..22d185301578 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -16,6 +16,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 78df86d1db7a..2eee1af88bcb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -10,6 +10,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 /*
  * Move pages to appropriate lru and release the pagevec, decrementing the
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 007a2b8cac8b..0137baa409e1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -15,6 +15,7 @@
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 
+#include "../i915_scatterlist.h"
 #include "../i915_trace.h"
 #include "../intel_drv.h"
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 824f3761314c..c03781f8b435 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -4,6 +4,8 @@
  * Copyright © 2016 Intel Corporation
  */
 
+#include "../../i915_scatterlist.h"
+
 #include "huge_gem_object.h"
 
 static void huge_free_pages(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index fc4dd92f4496..da6eb6ecdf68 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -9,6 +9,8 @@
 #include "../../selftests/mock_gem_device.h"
 #include "mock_dmabuf.h"
 
+#include "../../i915_drv.h"
+
 static int igt_dmabuf_export(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 65f474a824fb..cec8410ed80b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2167,111 +2167,6 @@ enum hdmi_force_audio {
 	GENMASK(INTEL_FRONTBUFFER_BITS_PER_PIPE * ((pipe) + 1) - 1, \
 		INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))
 
-/*
- * Optimised SGL iterator for GEM objects
- */
-static __always_inline struct sgt_iter {
-	struct scatterlist *sgp;
-	union {
-		unsigned long pfn;
-		dma_addr_t dma;
-	};
-	unsigned int curr;
-	unsigned int max;
-} __sgt_iter(struct scatterlist *sgl, bool dma) {
-	struct sgt_iter s = { .sgp = sgl };
-
-	if (s.sgp) {
-		s.max = s.curr = s.sgp->offset;
-		s.max += s.sgp->length;
-		if (dma)
-			s.dma = sg_dma_address(s.sgp);
-		else
-			s.pfn = page_to_pfn(sg_page(s.sgp));
-	}
-
-	return s;
-}
-
-static inline struct scatterlist *____sg_next(struct scatterlist *sg)
-{
-	++sg;
-	if (unlikely(sg_is_chain(sg)))
-		sg = sg_chain_ptr(sg);
-	return sg;
-}
-
-/**
- * __sg_next - return the next scatterlist entry in a list
- * @sg:		The current sg entry
- *
- * Description:
- *   If the entry is the last, return NULL; otherwise, step to the next
- *   element in the array (@sg@+1). If that's a chain pointer, follow it;
- *   otherwise just return the pointer to the current element.
- **/
-static inline struct scatterlist *__sg_next(struct scatterlist *sg)
-{
-	return sg_is_last(sg) ? NULL : ____sg_next(sg);
-}
-
-/**
- * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
- * @__dmap:	DMA address (output)
- * @__iter:	'struct sgt_iter' (iterator state, internal)
- * @__sgt:	sg_table to iterate over (input)
- */
-#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
-	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
-	     ((__dmap) = (__iter).dma + (__iter).curr);			\
-	     (((__iter).curr += I915_GTT_PAGE_SIZE) >= (__iter).max) ?	\
-	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
-
-/**
- * for_each_sgt_page - iterate over the pages of the given sg_table
- * @__pp:	page pointer (output)
- * @__iter:	'struct sgt_iter' (iterator state, internal)
- * @__sgt:	sg_table to iterate over (input)
- */
-#define for_each_sgt_page(__pp, __iter, __sgt)				\
-	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
-	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
-	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
-	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
-	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
-
-bool i915_sg_trim(struct sg_table *orig_st);
-
-static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
-{
-	unsigned int page_sizes;
-
-	page_sizes = 0;
-	while (sg) {
-		GEM_BUG_ON(sg->offset);
-		GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
-		page_sizes |= sg->length;
-		sg = __sg_next(sg);
-	}
-
-	return page_sizes;
-}
-
-static inline unsigned int i915_sg_segment_size(void)
-{
-	unsigned int size = swiotlb_max_segment();
-
-	if (size == 0)
-		return SCATTERLIST_MAX_SEGMENT;
-
-	size = rounddown(size, PAGE_SIZE);
-	/* swiotlb_max_segment_size can return 1 byte when it means one page. */
-	if (size < PAGE_SIZE)
-		size = PAGE_SIZE;
-
-	return size;
-}
-
 #define INTEL_INFO(dev_priv)	(&(dev_priv)->__info)
 #define RUNTIME_INFO(dev_priv)	(&(dev_priv)->__runtime)
 #define DRIVER_CAPS(dev_priv)	(&(dev_priv)->caps)
@@ -2881,11 +2776,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
-static inline int __sg_page_count(const struct scatterlist *sg)
-{
-	return sg->length >> PAGE_SHIFT;
-}
-
 static inline int __must_check
 i915_mutex_lock_interruptible(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 343a8a837363..7fa1236677b9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -50,6 +50,7 @@
 #include "gt/intel_workarounds.h"
 
 #include "i915_drv.h"
+#include "i915_scatterlist.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
@@ -1061,34 +1062,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	}
 }
 
-bool i915_sg_trim(struct sg_table *orig_st)
-{
-	struct sg_table new_st;
-	struct scatterlist *sg, *new_sg;
-	unsigned int i;
-
-	if (orig_st->nents == orig_st->orig_nents)
-		return false;
-
-	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
-		return false;
-
-	new_sg = new_st.sgl;
-	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
-		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
-		sg_dma_address(new_sg) = sg_dma_address(sg);
-		sg_dma_len(new_sg) = sg_dma_len(sg);
-
-		new_sg = sg_next(new_sg);
-	}
-	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
-
-	sg_free_table(orig_st);
-
-	*orig_st = new_st;
-	return true;
-}
-
 static unsigned long to_wait_timeout(s64 timeout_ns)
 {
 	if (timeout_ns < 0)
@@ -2342,7 +2315,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
 #include "selftests/i915_gem.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 3084f52e3372..2e9e32330aaa 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -22,7 +22,9 @@
  */
 
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "i915_scatterlist.h"
 
 /**
  * DOC: fence register handling
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8f5db787b7f2..d3eba67d4bf9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -36,8 +36,9 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
-#include "i915_vgpu.h"
+#include "i915_scatterlist.h"
 #include "i915_trace.h"
+#include "i915_vgpu.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 593079f30fbe..8e661c76d235 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -40,6 +40,7 @@
 
 #include "gt/intel_reset.h"
 #include "i915_request.h"
+#include "i915_scatterlist.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 
@@ -162,7 +163,8 @@ typedef u64 gen8_ppgtt_pml4e_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
-struct sg_table;
+#define for_each_sgt_dma(__dmap, __iter, __sgt) \
+	__for_each_sgt_dma(__dmap, __iter, __sgt, I915_GTT_PAGE_SIZE)
 
 struct intel_rotation_info {
 	struct intel_rotation_plane_info {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index fb424c8d7262..f919cf1a8c41 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -40,6 +40,7 @@
 
 #include "i915_gpu_error.h"
 #include "i915_drv.h"
+#include "i915_scatterlist.h"
 
 static inline const struct intel_engine_cs *
 engine_lookup(const struct drm_i915_private *i915, unsigned int id)
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
new file mode 100644
index 000000000000..cc6b3846a8c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_scatterlist.h"
+
+bool i915_sg_trim(struct sg_table *orig_st)
+{
+	struct sg_table new_st;
+	struct scatterlist *sg, *new_sg;
+	unsigned int i;
+
+	if (orig_st->nents == orig_st->orig_nents)
+		return false;
+
+	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
+		return false;
+
+	new_sg = new_st.sgl;
+	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
+		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
+		sg_dma_address(new_sg) = sg_dma_address(sg);
+		sg_dma_len(new_sg) = sg_dma_len(sg);
+
+		new_sg = sg_next(new_sg);
+	}
+	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
+
+	sg_free_table(orig_st);
+
+	*orig_st = new_st;
+	return true;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/scatterlist.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
new file mode 100644
index 000000000000..6617963df9ed
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -0,0 +1,127 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef I915_SCATTERLIST_H
+#define I915_SCATTERLIST_H
+
+#include <linux/pfn.h>
+#include <linux/scatterlist.h>
+#include <linux/swiotlb.h>
+
+#include "i915_gem.h"
+
+/*
+ * Optimised SGL iterator for GEM objects
+ */
+static __always_inline struct sgt_iter {
+	struct scatterlist *sgp;
+	union {
+		unsigned long pfn;
+		dma_addr_t dma;
+	};
+	unsigned int curr;
+	unsigned int max;
+} __sgt_iter(struct scatterlist *sgl, bool dma) {
+	struct sgt_iter s = { .sgp = sgl };
+
+	if (s.sgp) {
+		s.max = s.curr = s.sgp->offset;
+		s.max += s.sgp->length;
+		if (dma)
+			s.dma = sg_dma_address(s.sgp);
+		else
+			s.pfn = page_to_pfn(sg_page(s.sgp));
+	}
+
+	return s;
+}
+
+static inline int __sg_page_count(const struct scatterlist *sg)
+{
+	return sg->length >> PAGE_SHIFT;
+}
+
+static inline struct scatterlist *____sg_next(struct scatterlist *sg)
+{
+	++sg;
+	if (unlikely(sg_is_chain(sg)))
+		sg = sg_chain_ptr(sg);
+	return sg;
+}
+
+/**
+ * __sg_next - return the next scatterlist entry in a list
+ * @sg:		The current sg entry
+ *
+ * Description:
+ *   If the entry is the last, return NULL; otherwise, step to the next
+ *   element in the array (@sg@+1). If that's a chain pointer, follow it;
+ *   otherwise just return the pointer to the current element.
+ **/
+static inline struct scatterlist *__sg_next(struct scatterlist *sg)
+{
+	return sg_is_last(sg) ? NULL : ____sg_next(sg);
+}
+
+/**
+ * __for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
+ * @__dmap:	DMA address (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ * @__step:	step size
+ */
+#define __for_each_sgt_dma(__dmap, __iter, __sgt, __step)		\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
+	     ((__dmap) = (__iter).dma + (__iter).curr);			\
+	     (((__iter).curr += (__step)) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
+
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @__pp:	page pointer (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ */
+#define for_each_sgt_page(__pp, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
+	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
+	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
+	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
+
+static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
+{
+	unsigned int page_sizes;
+
+	page_sizes = 0;
+	while (sg) {
+		GEM_BUG_ON(sg->offset);
+		GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
+		page_sizes |= sg->length;
+		sg = __sg_next(sg);
+	}
+
+	return page_sizes;
+}
+
+static inline unsigned int i915_sg_segment_size(void)
+{
+	unsigned int size = swiotlb_max_segment();
+
+	if (size == 0)
+		return SCATTERLIST_MAX_SEGMENT;
+
+	size = rounddown(size, PAGE_SIZE);
+	/* swiotlb_max_segment_size can return 1 byte when it means one page. */
+	if (size < PAGE_SIZE)
+		size = PAGE_SIZE;
+
+	return size;
+}
+
+bool i915_sg_trim(struct sg_table *orig_st);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 131d0d9a6cd1..b0a4296a0504 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -30,6 +30,7 @@
 #include "mock_gtt.h"
 
 #include "../gem/selftests/mock_context.h"
+#include "../i915_scatterlist.h"
 
 static bool assert_vma(struct i915_vma *vma,
 		       struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c
index cd6d2a16071f..5f0c48a9f051 100644
--- a/drivers/gpu/drm/i915/selftests/scatterlist.c
+++ b/drivers/gpu/drm/i915/selftests/scatterlist.c
@@ -25,6 +25,7 @@
 #include <linux/random.h>
 
 #include "../i915_selftest.h"
+#include "../i915_utils.h"
 
 #define PFN_BIAS (1 << 10)
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 33/45] lockdep: Swap storage for pin_count and refereneces
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (30 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 32/45] drm/i915: Pull scatterlist utils out of i915_gem.h Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 34/45] drm/i915: Move GEM object domain management from struct_mutex to local Chris Wilson
                   ` (19 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

As a lockmap takes a reference for every ww_mutex used together, this
can be an arbitrarily large number and under control of userspace --
easily overflowing the limit of 4096.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/linux/lockdep.h  |  4 ++--
 kernel/locking/lockdep.c | 15 +++++++++------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 79c3873d58ac..2dc4a61d7355 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -262,8 +262,8 @@ struct held_lock {
 	unsigned int read:2;        /* see lock_acquire() comment */
 	unsigned int check:1;       /* see lock_acquire() comment */
 	unsigned int hardirqs_off:1;
-	unsigned int references:12;					/* 32 bits */
-	unsigned int pin_count;
+	unsigned int pin_count:12;					/* 32 bits */
+	unsigned int references;
 };
 
 /*
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index e221be724fe8..e83bfc520a28 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3613,9 +3613,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 		if (hlock->class_idx == class_idx && nest_lock) {
 			if (hlock->references) {
 				/*
-				 * Check: unsigned int references:12, overflow.
+				 * Check: unsigned int references overflow.
 				 */
-				if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
+				if (DEBUG_LOCKS_WARN_ON(hlock->references == UINT_MAX))
 					return 0;
 
 				hlock->references++;
@@ -4053,11 +4053,14 @@ static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock)
 
 		if (match_held_lock(hlock, lock)) {
 			/*
-			 * Grab 16bits of randomness; this is sufficient to not
-			 * be guessable and still allows some pin nesting in
-			 * our u32 pin_count.
+			 * Grab 6bits of randomness; this is barely sufficient
+			 * to not be guessable and still allows some 32 levels
+			 * of pin nesting in our u12 pin_count.
 			 */
-			cookie.val = 1 + (prandom_u32() >> 16);
+			cookie.val = 1 + (prandom_u32() >> 26);
+			if (DEBUG_LOCKS_WARN_ON(hlock->pin_count + cookie.val >= 1 << 12))
+				return NIL_COOKIE;
+
 			hlock->pin_count += cookie.val;
 			return cookie;
 		}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 34/45] drm/i915: Move GEM object domain management from struct_mutex to local
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (31 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 33/45] lockdep: Swap storage for pin_count and refereneces Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 35/45] drm/i915: Move GEM object waiting to its own file Chris Wilson
                   ` (18 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Use the per-object local lock to control the cache domain of the
individual GEM objects, not struct_mutex. This is a huge leap forward
for us in terms of object-level synchronisation; execbuffers are
coordinated using the ww_mutex and pread/pwrite is finally fully
serialised again.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  10 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  70 +++++-----
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 123 ++++++++++++------
 drivers/gpu/drm/i915/gem/i915_gem_fence.c     |  97 ++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  14 ++
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |   7 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  12 +-
 .../i915/gem/selftests/i915_gem_coherency.c   |  12 ++
 .../drm/i915/gem/selftests/i915_gem_context.c |  20 +++
 .../drm/i915/gem/selftests/i915_gem_mman.c    |   6 +
 .../drm/i915/gem/selftests/i915_gem_phys.c    |   4 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |   4 +
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |   2 +
 .../gpu/drm/i915/gt/selftest_workarounds.c    |   6 +
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   2 +
 drivers/gpu/drm/i915/gvt/scheduler.c          |   8 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c        |  23 ++--
 drivers/gpu/drm/i915/i915_gem.c               | 122 +++++++++--------
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   5 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |   2 +
 drivers/gpu/drm/i915/i915_vma.c               |   8 +-
 drivers/gpu/drm/i915/i915_vma.h               |  12 ++
 drivers/gpu/drm/i915/intel_display.c          |   5 +
 drivers/gpu/drm/i915/intel_guc_log.c          |   6 +-
 drivers/gpu/drm/i915/intel_overlay.c          |  25 ++--
 drivers/gpu/drm/i915/intel_uc_fw.c            |   6 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |   4 +
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |   2 +
 31 files changed, 444 insertions(+), 180 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_fence.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 8e70d5972195..e30622229812 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -93,6 +93,7 @@ gem-y += \
 	gem/i915_gem_dmabuf.o \
 	gem/i915_gem_domain.o \
 	gem/i915_gem_execbuffer.o \
+	gem/i915_gem_fence.o \
 	gem/i915_gem_internal.o \
 	gem/i915_gem_object.o \
 	gem/i915_gem_mman.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 093bfff55a96..efab47250588 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -96,6 +96,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 {
 	struct clflush *clflush;
 
+	assert_object_held(obj);
+
 	/*
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
@@ -145,9 +147,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 						true, I915_FENCE_TIMEOUT,
 						I915_FENCE_GFP);
 
-		reservation_object_lock(obj->resv, NULL);
 		reservation_object_add_excl_fence(obj->resv, &clflush->dma);
-		reservation_object_unlock(obj->resv);
 
 		i915_sw_fence_commit(&clflush->wait);
 	} else if (obj->mm.pages) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 4e7efe159531..50981ea513f0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -152,7 +152,6 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
 	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
 	int err;
 
@@ -160,12 +159,12 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 	if (err)
 		return err;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out;
 
 	err = i915_gem_object_set_to_cpu_domain(obj, write);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 out:
 	i915_gem_object_unpin_pages(obj);
@@ -175,19 +174,18 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
 	int err;
 
 	err = i915_gem_object_pin_pages(obj);
 	if (err)
 		return err;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out;
 
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 out:
 	i915_gem_object_unpin_pages(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 5e1429f8e910..4c84bb911d6c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -31,9 +31,9 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 	if (!READ_ONCE(obj->pin_global))
 		return;
 
-	mutex_lock(&obj->base.dev->struct_mutex);
+	i915_gem_object_lock(obj);
 	__i915_gem_object_flush_for_display(obj);
-	mutex_unlock(&obj->base.dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 }
 
 /**
@@ -49,11 +49,10 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
@@ -111,11 +110,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
@@ -181,7 +179,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	if (obj->cache_level == cache_level)
 		return 0;
@@ -230,7 +228,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		 */
 		ret = i915_gem_object_wait(obj,
 					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_LOCKED |
 					   I915_WAIT_ALL,
 					   MAX_SCHEDULE_TIMEOUT);
 		if (ret)
@@ -374,12 +371,16 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 	if (ret)
 		goto out;
 
-	ret = i915_gem_object_set_cache_level(obj, level);
-	mutex_unlock(&dev->struct_mutex);
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret == 0) {
+		ret = i915_gem_object_set_cache_level(obj, level);
+		i915_gem_object_unlock(obj);
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
 
 out:
 	i915_gem_object_put(obj);
@@ -401,7 +402,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	/* Mark the global pin early so that we account for the
 	 * display coherency whilst setting up the cache domains.
@@ -486,16 +487,18 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 void
 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	assert_object_held(obj);
 
-	if (WARN_ON(vma->obj->pin_global == 0))
+	if (WARN_ON(obj->pin_global == 0))
 		return;
 
-	if (--vma->obj->pin_global == 0)
+	if (--obj->pin_global == 0)
 		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
-	i915_gem_object_bump_inactive_ggtt(vma->obj);
+	i915_gem_object_bump_inactive_ggtt(obj);
 
 	i915_vma_unpin(vma);
 }
@@ -513,11 +516,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
@@ -639,7 +641,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto out;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out_unpin;
 
@@ -653,7 +655,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	/* And bump the LRU for this access */
 	i915_gem_object_bump_inactive_ggtt(obj);
 
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 	if (write_domain != 0)
 		intel_fb_obj_invalidate(obj,
@@ -676,22 +678,23 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	*needs_clflush = 0;
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
 	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED,
+				   I915_WAIT_INTERRUPTIBLE,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -719,6 +722,8 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
 	return ret;
 }
 
@@ -727,23 +732,24 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	*needs_clflush = 0;
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   I915_WAIT_ALL,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -780,5 +786,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index a2761029f0c3..080f69358224 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1076,7 +1076,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (use_cpu_reloc(cache, obj))
 			return NULL;
 
+		i915_gem_object_lock(obj);
 		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		i915_gem_object_unlock(obj);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1165,6 +1167,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
 		*addr = value;
 }
 
+static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	i915_vma_lock(vma);
+
+	if (obj->cache_dirty & ~obj->cache_coherent)
+		i915_gem_clflush_object(obj, 0);
+	obj->write_domain = 0;
+
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+	i915_vma_unlock(vma);
+
+	return err;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -1176,15 +1198,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	u32 *cmd;
 	int err;
 
-	if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) {
-		obj = vma->obj;
-		if (obj->cache_dirty & ~obj->cache_coherent)
-			i915_gem_clflush_object(obj, 0);
-		obj->write_domain = 0;
-	}
-
-	GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU);
-
 	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
@@ -1213,7 +1226,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_unpin;
 	}
 
-	err = i915_request_await_object(rq, vma->obj, true);
+	err = reloc_move_to_gpu(rq, vma);
 	if (err)
 		goto err_request;
 
@@ -1221,14 +1234,12 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 					batch->node.start, PAGE_SIZE,
 					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
 	if (err)
-		goto err_request;
+		goto skip_request;
 
+	i915_vma_lock(batch);
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
 	err = i915_vma_move_to_active(batch, rq, 0);
-	if (err)
-		goto skip_request;
-
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
@@ -1837,24 +1848,59 @@ static int eb_relocate(struct i915_execbuffer *eb)
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
+	struct ww_acquire_ctx acquire;
 	unsigned int i;
-	int err;
+	int err = 0;
+
+	ww_acquire_init(&acquire, &reservation_ww_class);
 
 	for (i = 0; i < count; i++) {
+		struct i915_vma *vma = eb->vma[i];
+
+		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
+		if (!err)
+			continue;
+
+		GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */
+
+		if (err == -EDEADLK) {
+			GEM_BUG_ON(i == 0);
+			do {
+				int j = i - 1;
+
+				ww_mutex_unlock(&eb->vma[j]->resv->lock);
+
+				swap(eb->flags[i], eb->flags[j]);
+				swap(eb->vma[i],  eb->vma[j]);
+				eb->vma[i]->exec_flags = &eb->flags[i];
+			} while (--i);
+			GEM_BUG_ON(vma != eb->vma[0]);
+			vma->exec_flags = &eb->flags[0];
+
+			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
+							       &acquire);
+		}
+		if (err)
+			break;
+	}
+	ww_acquire_done(&acquire);
+
+	while (i--) {
 		unsigned int flags = eb->flags[i];
 		struct i915_vma *vma = eb->vma[i];
 		struct drm_i915_gem_object *obj = vma->obj;
 
+		assert_vma_held(vma);
+
 		if (flags & EXEC_OBJECT_CAPTURE) {
 			struct i915_capture_list *capture;
 
 			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
-			if (unlikely(!capture))
-				return -ENOMEM;
-
-			capture->next = eb->request->capture_list;
-			capture->vma = eb->vma[i];
-			eb->request->capture_list = capture;
+			if (capture) {
+				capture->next = eb->request->capture_list;
+				capture->vma = vma;
+				eb->request->capture_list = capture;
+			}
 		}
 
 		/*
@@ -1874,24 +1920,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 				flags &= ~EXEC_OBJECT_ASYNC;
 		}
 
-		if (flags & EXEC_OBJECT_ASYNC)
-			continue;
-
-		err = i915_request_await_object
-			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
-		if (err)
-			return err;
-	}
+		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
+			err = i915_request_await_object
+				(eb->request, obj, flags & EXEC_OBJECT_WRITE);
+		}
 
-	for (i = 0; i < count; i++) {
-		unsigned int flags = eb->flags[i];
-		struct i915_vma *vma = eb->vma[i];
+		if (err == 0)
+			err = i915_vma_move_to_active(vma, eb->request, flags);
 
-		err = i915_vma_move_to_active(vma, eb->request, flags);
-		if (unlikely(err)) {
-			i915_request_skip(eb->request, err);
-			return err;
-		}
+		i915_vma_unlock(vma);
 
 		__eb_unreserve_vma(vma, flags);
 		vma->exec_flags = NULL;
@@ -1899,12 +1936,20 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
 			i915_vma_put(vma);
 	}
+	ww_acquire_fini(&acquire);
+
+	if (unlikely(err))
+		goto err_skip;
+
 	eb->exec = NULL;
 
 	/* Unconditionally flush any chipset caches (for streaming writes). */
 	i915_gem_chipset_flush(eb->i915);
-
 	return 0;
+
+err_skip:
+	i915_request_skip(eb->request, err);
+	return err;
 }
 
 static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
new file mode 100644
index 000000000000..aa2efdc57c92
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
@@ -0,0 +1,97 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+struct stub_fence {
+	struct dma_fence dma;
+	struct i915_sw_fence chain;
+};
+
+static int __i915_sw_fence_call
+stub_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), chain);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		dma_fence_signal(&stub->dma);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&stub->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static const char *stub_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *stub_timeline_name(struct dma_fence *fence)
+{
+	return "object";
+}
+
+static void stub_release(struct dma_fence *fence)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), dma);
+
+	i915_sw_fence_fini(&stub->chain);
+
+	BUILD_BUG_ON(offsetof(typeof(*stub), dma));
+	dma_fence_free(&stub->dma);
+}
+
+static const struct dma_fence_ops stub_fence_ops = {
+	.get_driver_name = stub_driver_name,
+	.get_timeline_name = stub_timeline_name,
+	.release = stub_release,
+};
+
+struct dma_fence *
+i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
+{
+	struct stub_fence *stub;
+
+	assert_object_held(obj);
+
+	stub = kmalloc(sizeof(*stub), GFP_KERNEL);
+	if (!stub)
+		return NULL;
+
+	i915_sw_fence_init(&stub->chain, stub_notify);
+	dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock,
+		       to_i915(obj->base.dev)->mm.unordered_timeline,
+		       0);
+
+	if (i915_sw_fence_await_reservation(&stub->chain,
+					    obj->resv, NULL,
+					    true, I915_FENCE_TIMEOUT,
+					    I915_FENCE_GFP) < 0)
+		goto err;
+
+	reservation_object_add_excl_fence(obj->resv, &stub->dma);
+
+	return &stub->dma;
+
+err:
+	stub_release(&stub->dma);
+	return NULL;
+}
+
+void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
+				  struct dma_fence *fence)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), dma);
+
+	i915_sw_fence_commit(&stub->chain);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 04928ae415eb..df79e2eead62 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -379,6 +379,8 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_vma *vma;
 
+	assert_object_held(obj);
+
 	if (!(obj->write_domain & flush_domains))
 		return;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 980587e420e6..509d145d808a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -99,16 +99,29 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
 	__drm_gem_object_put(&obj->base);
 }
 
+#define assert_object_held(obj) reservation_object_assert_held((obj)->resv)
+
 static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
 {
 	reservation_object_lock(obj->resv, NULL);
 }
 
+static inline int
+i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
+{
+	return reservation_object_lock_interruptible(obj->resv, NULL);
+}
+
 static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
 {
 	reservation_object_unlock(obj->resv);
 }
 
+struct dma_fence *
+i915_gem_object_lock_fence(struct drm_i915_gem_object *obj);
+void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
+				  struct dma_fence *fence);
+
 static inline void
 i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
 {
@@ -367,6 +380,7 @@ static inline void
 i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
 {
 	i915_gem_object_unpin_pages(obj);
+	i915_gem_object_unlock(obj);
 }
 
 static inline struct intel_engine_cs *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 052cafd96223..7e3511773fc1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -209,12 +209,13 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
 	 * machine in an unusable condition.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	for (phase = phases; *phase; phase++) {
-		list_for_each_entry(obj, *phase, mm.link)
+		list_for_each_entry(obj, *phase, mm.link) {
+			i915_gem_object_lock(obj);
 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+			i915_gem_object_unlock(obj);
+		}
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	intel_uc_sanitize(i915);
 	i915_gem_sanitize(i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 88ca4ed430ee..5a89cbab3b2e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -959,10 +959,6 @@ static int gpu_write(struct i915_vma *vma,
 
 	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
 
-	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
-	if (err)
-		return err;
-
 	batch = gpu_write_dw(vma, dword * sizeof(u32), value);
 	if (IS_ERR(batch))
 		return PTR_ERR(batch);
@@ -973,13 +969,19 @@ static int gpu_write(struct i915_vma *vma,
 		goto err_batch;
 	}
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto err_request;
 
 	i915_gem_object_set_active_reference(batch->obj);
 
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_lock(vma);
+	err = i915_gem_object_set_to_gtt_domain(vma->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto err_request;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index b6613caa6f45..22fe4b6fd506 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -78,7 +78,9 @@ static int gtt_set(struct drm_i915_gem_object *obj,
 	u32 __iomem *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -105,7 +107,9 @@ static int gtt_get(struct drm_i915_gem_object *obj,
 	u32 __iomem *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -131,7 +135,9 @@ static int wc_set(struct drm_i915_gem_object *obj,
 	u32 *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -152,7 +158,9 @@ static int wc_get(struct drm_i915_gem_object *obj,
 	u32 *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -176,7 +184,9 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	u32 *cs;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -215,7 +225,9 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	}
 	intel_ring_advance(rq, cs);
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	i915_vma_unpin(vma);
 
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 39be30b9ef7b..73a448f0f3fe 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -209,7 +209,9 @@ gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
 	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
@@ -261,7 +263,9 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -302,11 +306,15 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -754,7 +762,9 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -780,11 +790,15 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -1345,7 +1359,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -1440,7 +1456,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -1449,7 +1467,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 
 	i915_request_add(rq);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index b3ee4ee08b18..226abf516a24 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -110,7 +110,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(view.partial.size > nreal);
 		cond_resched();
 
+		i915_gem_object_lock(obj);
 		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		i915_gem_object_unlock(obj);
 		if (err) {
 			pr_err("Failed to flush to GTT write domain; err=%d\n",
 			       err);
@@ -142,7 +144,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		if (offset >= obj->base.size)
 			continue;
 
+		i915_gem_object_lock(obj);
 		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+		i915_gem_object_unlock(obj);
 
 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
@@ -344,7 +348,9 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 		return PTR_ERR(rq);
 	}
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 
 	i915_request_add(rq);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index b76b503b3999..8ecf8d7bd083 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -46,9 +46,9 @@ static int mock_phys_object(void *arg)
 	}
 
 	/* Make the object dirty so that put_pages must do copy back the data */
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_object_unlock(obj);
 	if (err) {
 		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
 		       err);
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 4b4589b92521..45745e36b5f4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -115,7 +115,9 @@ static int move_to_active(struct i915_vma *vma,
 {
 	int err;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
+	i915_vma_unlock(vma);
 	if (err)
 		return err;
 
@@ -1298,7 +1300,9 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 		}
 	}
 
+	i915_vma_lock(arg.vma);
 	err = i915_vma_move_to_active(arg.vma, rq, flags);
+	i915_vma_unlock(arg.vma);
 
 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
 		i915_vma_unpin_fence(arg.vma);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index fb32b265a49e..9143c3eda2ec 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1099,11 +1099,13 @@ static int smoke_submit(struct preempt_smoke *smoke,
 	}
 
 	if (vma) {
+		i915_vma_lock(vma);
 		err = rq->engine->emit_bb_start(rq,
 						vma->node.start,
 						PAGE_SIZE, 0);
 		if (!err)
 			err = i915_vma_move_to_active(vma, rq, 0);
+		i915_vma_unlock(vma);
 	}
 
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 40b6df911d8d..d4ba5296b2c8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -111,7 +111,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 		goto err_pin;
 	}
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto err_req;
 
@@ -188,8 +190,10 @@ static int check_whitelist(struct i915_gem_context *ctx,
 		return PTR_ERR(results);
 
 	err = 0;
+	i915_gem_object_lock(results);
 	igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
 		err = i915_gem_object_set_to_cpu_domain(results, false);
+	i915_gem_object_unlock(results);
 	if (i915_terminally_wedged(ctx->i915))
 		err = -EIO;
 	if (err)
@@ -360,7 +364,9 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx)
 	if (err)
 		goto err_obj;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err_obj;
 
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 1ee881825e38..f6d15683868a 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -2840,7 +2840,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 		goto put_obj;
 	}
 
+	i915_gem_object_lock(obj);
 	ret = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (ret) {
 		gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
 		goto unmap_src;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index c9d3b52d776f..9f6c119983d4 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -492,18 +492,18 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			}
 
 			ret = i915_gem_object_set_to_gtt_domain(bb->obj,
-					false);
+								false);
 			if (ret)
 				goto err;
 
-			i915_gem_object_finish_access(bb->obj);
-			bb->accessing = false;
-
 			ret = i915_vma_move_to_active(bb->vma,
 						      workload->req,
 						      0);
 			if (ret)
 				goto err;
+
+			i915_gem_object_finish_access(bb->obj);
+			bb->accessing = false;
 		}
 	}
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index c893bd4eb2c8..a28bcd2d7c09 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1058,19 +1058,20 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	void *dst, *src;
 	int ret;
 
-	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
-	if (ret) {
-		dst = ERR_PTR(ret);
-		goto unpin_src;
-	}
-
 	dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
+	i915_gem_object_finish_access(dst_obj);
 	if (IS_ERR(dst))
-		goto unpin_dst;
+		return dst;
+
+	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+	if (ret) {
+		i915_gem_object_unpin_map(dst_obj);
+		return ERR_PTR(ret);
+	}
 
 	src = ERR_PTR(-ENODEV);
 	if (src_needs_clflush &&
@@ -1116,13 +1117,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		}
 	}
 
+	i915_gem_object_finish_access(src_obj);
+
 	/* dst_obj is returned with vmap pinned */
 	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
 
-unpin_dst:
-	i915_gem_object_finish_access(dst_obj);
-unpin_src:
-	i915_gem_object_finish_access(src_obj);
 	return dst;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7fa1236677b9..32fdc1977afe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -103,19 +103,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 	LIST_HEAD(still_in_list);
-	int ret;
+	int ret = 0;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	/* Closed vma are removed from the obj->vma_list - but they may
-	 * still have an active binding on the object. To remove those we
-	 * must wait for all rendering to complete to the object (as unbinding
-	 * must anyway), and retire the requests.
-	 */
-	ret = i915_gem_object_set_to_cpu_domain(obj, false);
-	if (ret)
-		return ret;
-
 	spin_lock(&obj->vma.lock);
 	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
 						       struct i915_vma,
@@ -138,29 +129,17 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 			   unsigned int flags,
 			   long timeout)
 {
-	struct i915_request *rq;
-
 	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
 
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		return timeout;
 
-	if (!dma_fence_is_i915(fence))
-		return dma_fence_wait_timeout(fence,
-					      flags & I915_WAIT_INTERRUPTIBLE,
-					      timeout);
+	if (dma_fence_is_i915(fence))
+		return i915_request_wait(to_request(fence), flags, timeout);
 
-	rq = to_request(fence);
-	if (i915_request_completed(rq))
-		goto out;
-
-	timeout = i915_request_wait(rq, flags, timeout);
-
-out:
-	if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
-		i915_request_retire_upto(rq);
-
-	return timeout;
+	return dma_fence_wait_timeout(fence,
+				      flags & I915_WAIT_INTERRUPTIBLE,
+				      timeout);
 }
 
 static long
@@ -463,21 +442,22 @@ static int
 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 		     struct drm_i915_gem_pread *args)
 {
-	char __user *user_data;
-	u64 remain;
 	unsigned int needs_clflush;
 	unsigned int idx, offset;
+	struct dma_fence *fence;
+	char __user *user_data;
+	u64 remain;
 	int ret;
 
-	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
-	if (ret)
-		return ret;
-
 	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
-	mutex_unlock(&obj->base.dev->struct_mutex);
 	if (ret)
 		return ret;
 
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_finish_access(obj);
+	if (!fence)
+		return -ENOMEM;
+
 	remain = args->size;
 	user_data = u64_to_user_ptr(args->data_ptr);
 	offset = offset_in_page(args->offset);
@@ -495,7 +475,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	i915_gem_object_finish_access(obj);
+	i915_gem_object_unlock_fence(obj, fence);
 	return ret;
 }
 
@@ -531,8 +511,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
-	struct i915_vma *vma;
+	struct dma_fence *fence;
 	void __user *user_data;
+	struct i915_vma *vma;
 	u64 remain, offset;
 	int ret;
 
@@ -561,11 +542,24 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!node.allocated);
 	}
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
 
-	mutex_unlock(&i915->drm.struct_mutex);
+	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (ret) {
+		i915_gem_object_unlock(obj);
+		goto out_unpin;
+	}
+
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_unlock(obj);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto out_unpin;
+	}
 
 	user_data = u64_to_user_ptr(args->data_ptr);
 	remain = args->size;
@@ -603,8 +597,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		offset += page_length;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
+	mutex_lock(&i915->drm.struct_mutex);
 	if (node.allocated) {
 		wmb();
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
@@ -715,6 +710,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
+	struct dma_fence *fence;
 	struct i915_vma *vma;
 	u64 remain, offset;
 	void __user *user_data;
@@ -762,11 +758,24 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!node.allocated);
 	}
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
 
-	mutex_unlock(&i915->drm.struct_mutex);
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret) {
+		i915_gem_object_unlock(obj);
+		goto out_unpin;
+	}
+
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_unlock(obj);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto out_unpin;
+	}
 
 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 
@@ -811,8 +820,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	}
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
 
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
+	mutex_lock(&i915->drm.struct_mutex);
 	if (node.allocated) {
 		wmb();
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
@@ -858,23 +868,23 @@ static int
 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 		      const struct drm_i915_gem_pwrite *args)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	void __user *user_data;
-	u64 remain;
 	unsigned int partial_cacheline_write;
 	unsigned int needs_clflush;
 	unsigned int offset, idx;
+	struct dma_fence *fence;
+	void __user *user_data;
+	u64 remain;
 	int ret;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
 
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_finish_access(obj);
+	if (!fence)
+		return -ENOMEM;
+
 	/* If we don't overwrite a cacheline completely we need to be
 	 * careful to have up-to-date data by first clflushing. Don't
 	 * overcomplicate things and flush the entire patch.
@@ -902,7 +912,8 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	}
 
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
-	i915_gem_object_finish_access(obj);
+	i915_gem_object_unlock_fence(obj, fence);
+
 	return ret;
 }
 
@@ -1781,7 +1792,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 		if (err)
 			goto err_active;
 
+		i915_gem_object_lock(state->obj);
 		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
+		i915_gem_object_unlock(state->obj);
 		if (err)
 			goto err_active;
 
@@ -2228,12 +2241,13 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
 	i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
 	i915_gem_drain_freed_objects(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	for (phase = phases; *phase; phase++) {
-		list_for_each_entry(obj, *phase, mm.link)
+		list_for_each_entry(obj, *phase, mm.link) {
+			i915_gem_object_lock(obj);
 			WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
+			i915_gem_object_unlock(obj);
+		}
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d3eba67d4bf9..56be4b091bc7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3519,8 +3519,11 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		WARN_ON(i915_vma_bind(vma,
 				      obj ? obj->cache_level : 0,
 				      PIN_UPDATE));
-		if (obj)
+		if (obj) {
+			i915_gem_object_lock(obj);
 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+			i915_gem_object_unlock(obj);
+		}
 
 lock:
 		mutex_lock(&ggtt->vm.mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index f3b42b026fff..706ed71468e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -222,7 +222,9 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 			goto err_unpin;
 	}
 
+	i915_vma_lock(so.vma);
 	err = i915_vma_move_to_active(so.vma, rq, 0);
+	i915_vma_unlock(so.vma);
 err_unpin:
 	i915_vma_unpin(so.vma);
 err_vma:
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d4d308b6d1d8..3620d39c0bc8 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -847,13 +847,14 @@ void i915_vma_destroy(struct i915_vma *vma)
 {
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
-	GEM_BUG_ON(i915_vma_is_active(vma));
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
 	if (i915_vma_is_closed(vma))
 		list_del(&vma->closed_link);
 
 	WARN_ON(i915_vma_unbind(vma));
+	GEM_BUG_ON(i915_vma_is_active(vma));
+
 	__i915_vma_destroy(vma);
 }
 
@@ -915,12 +916,10 @@ static void export_fence(struct i915_vma *vma,
 	 * handle an error right now. Worst case should be missed
 	 * synchronisation leading to rendering corruption.
 	 */
-	reservation_object_lock(resv, NULL);
 	if (flags & EXEC_OBJECT_WRITE)
 		reservation_object_add_excl_fence(resv, &rq->fence);
 	else if (reservation_object_reserve_shared(resv, 1) == 0)
 		reservation_object_add_shared_fence(resv, &rq->fence);
-	reservation_object_unlock(resv);
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -929,7 +928,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	assert_vma_held(vma);
+	assert_object_held(obj);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 7950fa96ee86..71ac7ee8620a 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -295,6 +295,18 @@ void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
 void i915_vma_destroy(struct i915_vma *vma);
 
+#define assert_vma_held(vma) reservation_object_assert_held((vma)->resv)
+
+static inline void i915_vma_lock(struct i915_vma *vma)
+{
+	reservation_object_lock(vma->resv, NULL);
+}
+
+static inline void i915_vma_unlock(struct i915_vma *vma)
+{
+	reservation_object_unlock(vma->resv);
+}
+
 int __i915_vma_do_pin(struct i915_vma *vma,
 		      u64 size, u64 alignment, u64 flags);
 static inline int __must_check
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 78c3587ebabf..55312a481574 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2091,6 +2091,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	 * pin/unpin/fence and not more.
 	 */
 	wakeref = intel_runtime_pm_get(dev_priv);
+	i915_gem_object_lock(obj);
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
@@ -2145,6 +2146,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 err:
 	atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
 
+	i915_gem_object_unlock(obj);
 	intel_runtime_pm_put(dev_priv, wakeref);
 	return vma;
 }
@@ -2153,9 +2155,12 @@ void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
+	i915_gem_object_lock(vma->obj);
 	if (flags & PLANE_HAS_FENCE)
 		i915_vma_unpin_fence(vma);
 	i915_gem_object_unpin_from_display_plane(vma);
+	i915_gem_object_unlock(vma->obj);
+
 	i915_vma_put(vma);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 7146524264dd..67eadc82c396 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -343,8 +343,6 @@ static void capture_logs_work(struct work_struct *work)
 
 static int guc_log_map(struct intel_guc_log *log)
 {
-	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	void *vaddr;
 	int ret;
 
@@ -353,9 +351,9 @@ static int guc_log_map(struct intel_guc_log *log)
 	if (!log->vma)
 		return -ENODEV;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	i915_gem_object_lock(log->vma->obj);
 	ret = i915_gem_object_set_to_wc_domain(log->vma->obj, true);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	i915_gem_object_unlock(log->vma->obj);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 81a0db52c588..f05c545b5efc 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -763,8 +763,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
+	i915_gem_object_lock(new_bo);
 	vma = i915_gem_object_pin_to_display_plane(new_bo,
 						   0, NULL, PIN_MAPPABLE);
+	i915_gem_object_unlock(new_bo);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out_pin_section;
@@ -1303,15 +1305,20 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
 
 static int get_registers(struct intel_overlay *overlay, bool use_phys)
 {
+	struct drm_i915_private *i915 = overlay->i915;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create_stolen(overlay->i915, PAGE_SIZE);
+	mutex_lock(&i915->drm.struct_mutex);
+
+	obj = i915_gem_object_create_stolen(i915, PAGE_SIZE);
 	if (obj == NULL)
-		obj = i915_gem_object_create_internal(overlay->i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto err_unlock;
+	}
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (IS_ERR(vma)) {
@@ -1332,10 +1339,13 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 	}
 
 	overlay->reg_bo = obj;
+	mutex_unlock(&i915->drm.struct_mutex);
 	return 0;
 
 err_put_bo:
 	i915_gem_object_put(obj);
+err_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1361,18 +1371,16 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 
 	INIT_ACTIVE_REQUEST(&overlay->last_flip);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
 	if (ret)
 		goto out_free;
 
+	i915_gem_object_lock(overlay->reg_bo);
 	ret = i915_gem_object_set_to_gtt_domain(overlay->reg_bo, true);
+	i915_gem_object_unlock(overlay->reg_bo);
 	if (ret)
 		goto out_reg_bo;
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	memset_io(overlay->regs, 0, sizeof(struct overlay_registers));
 	update_polyphase_filter(overlay->regs);
 	update_reg_attrs(overlay, overlay->regs);
@@ -1384,7 +1392,6 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 out_reg_bo:
 	i915_gem_object_put(overlay->reg_bo);
 out_free:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	kfree(overlay);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index 3257a054cb0b..e0042650726a 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -246,15 +246,13 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
 			 intel_uc_fw_type_repr(uc_fw->type),
 			 intel_uc_fw_status_repr(uc_fw->load_status));
 
-	intel_uc_fw_ggtt_bind(uc_fw);
-
 	/* Call custom loader */
+	intel_uc_fw_ggtt_bind(uc_fw);
 	err = xfer(uc_fw);
+	intel_uc_fw_ggtt_unbind(uc_fw);
 	if (err)
 		goto fail;
 
-	intel_uc_fw_ggtt_unbind(uc_fw);
-
 	uc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS;
 	DRM_DEBUG_DRIVER("%s fw load %s\n",
 			 intel_uc_fw_type_repr(uc_fw->type),
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 66c79965bde8..5c9dbcaa3a33 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -875,7 +875,9 @@ static int live_all_engines(void *arg)
 			i915_gem_object_set_active_reference(batch->obj);
 		}
 
+		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
+		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
 		i915_request_get(request[id]);
@@ -990,7 +992,9 @@ static int live_sequential_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
+		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
+		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
 		i915_gem_object_set_active_reference(batch->obj);
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 45688e6698b7..d845d1dc7ce7 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -76,7 +76,9 @@ static int move_to_active(struct i915_vma *vma,
 {
 	int err;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
+	i915_vma_unlock(vma);
 	if (err)
 		return err;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 35/45] drm/i915: Move GEM object waiting to its own file
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (32 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 34/45] drm/i915: Move GEM object domain management from struct_mutex to local Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 36/45] drm/i915: Move GEM object busy checking " Chris Wilson
                   ` (17 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Continuing the decluttering of i915_gem.c by moving the object wait
decomposition into its own file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile              |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h |   8 +
 drivers/gpu/drm/i915/gem/i915_gem_wait.c   | 276 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h            |  17 --
 drivers/gpu/drm/i915/i915_gem.c            | 254 -------------------
 5 files changed, 285 insertions(+), 271 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_wait.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e30622229812..c65f4829eb83 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -105,6 +105,7 @@ gem-y += \
 	gem/i915_gem_stolen.o \
 	gem/i915_gem_tiling.o \
 	gem/i915_gem_userptr.o \
+	gem/i915_gem_wait.o \
 	gem/i915_gemfs.o
 i915-y += \
 	  $(gem-y) \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 509d145d808a..23bca003fbfb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -436,4 +436,12 @@ static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
 		obj->cache_dirty = true;
 }
 
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+			 unsigned int flags,
+			 long timeout);
+int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+				  unsigned int flags,
+				  const struct i915_sched_attr *attr);
+#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
+
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
new file mode 100644
index 000000000000..c1d966af879f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -0,0 +1,276 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/dma-fence-array.h>
+
+#include "gt/intel_engine.h"
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+static long
+i915_gem_object_wait_fence(struct dma_fence *fence,
+			   unsigned int flags,
+			   long timeout)
+{
+	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return timeout;
+
+	if (dma_fence_is_i915(fence))
+		return i915_request_wait(to_request(fence), flags, timeout);
+
+	return dma_fence_wait_timeout(fence,
+				      flags & I915_WAIT_INTERRUPTIBLE,
+				      timeout);
+}
+
+static long
+i915_gem_object_wait_reservation(struct reservation_object *resv,
+				 unsigned int flags,
+				 long timeout)
+{
+	unsigned int seq = __read_seqcount_begin(&resv->seq);
+	struct dma_fence *excl;
+	bool prune_fences = false;
+
+	if (flags & I915_WAIT_ALL) {
+		struct dma_fence **shared;
+		unsigned int count, i;
+		int ret;
+
+		ret = reservation_object_get_fences_rcu(resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			timeout = i915_gem_object_wait_fence(shared[i],
+							     flags, timeout);
+			if (timeout < 0)
+				break;
+
+			dma_fence_put(shared[i]);
+		}
+
+		for (; i < count; i++)
+			dma_fence_put(shared[i]);
+		kfree(shared);
+
+		/*
+		 * If both shared fences and an exclusive fence exist,
+		 * then by construction the shared fences must be later
+		 * than the exclusive fence. If we successfully wait for
+		 * all the shared fences, we know that the exclusive fence
+		 * must all be signaled. If all the shared fences are
+		 * signaled, we can prune the array and recover the
+		 * floating references on the fences/requests.
+		 */
+		prune_fences = count && timeout >= 0;
+	} else {
+		excl = reservation_object_get_excl_rcu(resv);
+	}
+
+	if (excl && timeout >= 0)
+		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
+
+	dma_fence_put(excl);
+
+	/*
+	 * Opportunistically prune the fences iff we know they have *all* been
+	 * signaled and that the reservation object has not been changed (i.e.
+	 * no new fences have been added).
+	 */
+	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
+		if (reservation_object_trylock(resv)) {
+			if (!__read_seqcount_retry(&resv->seq, seq))
+				reservation_object_add_excl_fence(resv, NULL);
+			reservation_object_unlock(resv);
+		}
+	}
+
+	return timeout;
+}
+
+static void __fence_set_priority(struct dma_fence *fence,
+				 const struct i915_sched_attr *attr)
+{
+	struct i915_request *rq;
+	struct intel_engine_cs *engine;
+
+	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
+		return;
+
+	rq = to_request(fence);
+	engine = rq->engine;
+
+	local_bh_disable();
+	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
+	if (engine->schedule)
+		engine->schedule(rq, attr);
+	rcu_read_unlock();
+	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
+}
+
+static void fence_set_priority(struct dma_fence *fence,
+			       const struct i915_sched_attr *attr)
+{
+	/* Recurse once into a fence-array */
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+		int i;
+
+		for (i = 0; i < array->num_fences; i++)
+			__fence_set_priority(array->fences[i], attr);
+	} else {
+		__fence_set_priority(fence, attr);
+	}
+}
+
+int
+i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+			      unsigned int flags,
+			      const struct i915_sched_attr *attr)
+{
+	struct dma_fence *excl;
+
+	if (flags & I915_WAIT_ALL) {
+		struct dma_fence **shared;
+		unsigned int count, i;
+		int ret;
+
+		ret = reservation_object_get_fences_rcu(obj->resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			fence_set_priority(shared[i], attr);
+			dma_fence_put(shared[i]);
+		}
+
+		kfree(shared);
+	} else {
+		excl = reservation_object_get_excl_rcu(obj->resv);
+	}
+
+	if (excl) {
+		fence_set_priority(excl, attr);
+		dma_fence_put(excl);
+	}
+	return 0;
+}
+
+/**
+ * Waits for rendering to the object to be completed
+ * @obj: i915 gem object
+ * @flags: how to wait (under a lock, for all rendering or just for writes etc)
+ * @timeout: how long to wait
+ */
+int
+i915_gem_object_wait(struct drm_i915_gem_object *obj,
+		     unsigned int flags,
+		     long timeout)
+{
+	might_sleep();
+	GEM_BUG_ON(timeout < 0);
+
+	timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
+	return timeout < 0 ? timeout : 0;
+}
+
+static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
+{
+	/* nsecs_to_jiffies64() does not guard against overflow */
+	if (NSEC_PER_SEC % HZ &&
+	    div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
+		return MAX_JIFFY_OFFSET;
+
+	return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
+}
+
+static unsigned long to_wait_timeout(s64 timeout_ns)
+{
+	if (timeout_ns < 0)
+		return MAX_SCHEDULE_TIMEOUT;
+
+	if (timeout_ns == 0)
+		return 0;
+
+	return nsecs_to_jiffies_timeout(timeout_ns);
+}
+
+/**
+ * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
+ *
+ * Returns 0 if successful, else an error is returned with the remaining time in
+ * the timeout parameter.
+ *  -ETIME: object is still busy after timeout
+ *  -ERESTARTSYS: signal interrupted the wait
+ *  -ENONENT: object doesn't exist
+ * Also possible, but rare:
+ *  -EAGAIN: incomplete, restart syscall
+ *  -ENOMEM: damn
+ *  -ENODEV: Internal IRQ fail
+ *  -E?: The add request failed
+ *
+ * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
+ * non-zero timeout parameter the wait ioctl will wait for the given number of
+ * nanoseconds on an object becoming unbusy. Since the wait itself does so
+ * without holding struct_mutex the object may become re-busied before this
+ * function completes. A similar but shorter * race condition exists in the busy
+ * ioctl
+ */
+int
+i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_gem_wait *args = data;
+	struct drm_i915_gem_object *obj;
+	ktime_t start;
+	long ret;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	obj = i915_gem_object_lookup(file, args->bo_handle);
+	if (!obj)
+		return -ENOENT;
+
+	start = ktime_get();
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
+				   to_wait_timeout(args->timeout_ns));
+
+	if (args->timeout_ns > 0) {
+		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
+		if (args->timeout_ns < 0)
+			args->timeout_ns = 0;
+
+		/*
+		 * Apparently ktime isn't accurate enough and occasionally has a
+		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+		 * things up to make the test happy. We allow up to 1 jiffy.
+		 *
+		 * This is a regression from the timespec->ktime conversion.
+		 */
+		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+			args->timeout_ns = 0;
+
+		/* Asked to wait beyond the jiffie/scheduler precision? */
+		if (ret == -ETIME && args->timeout_ns)
+			ret = -EAGAIN;
+	}
+
+	i915_gem_object_put(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cec8410ed80b..0d1ce4eb6699 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2830,13 +2830,6 @@ void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
 vm_fault_t i915_gem_fault(struct vm_fault *vmf);
-int i915_gem_object_wait(struct drm_i915_gem_object *obj,
-			 unsigned int flags,
-			 long timeout);
-int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
-				  unsigned int flags,
-				  const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
@@ -3288,16 +3281,6 @@ static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m)
 	return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1);
 }
 
-static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
-{
-	/* nsecs_to_jiffies64() does not guard against overflow */
-	if (NSEC_PER_SEC % HZ &&
-	    div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
-		return MAX_JIFFY_OFFSET;
-
-        return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
-}
-
 /*
  * If you need to wait X milliseconds between events A and B, but event B
  * doesn't happen exactly after event A, you record the timestamp (jiffies) of
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 32fdc1977afe..467273dd2d4a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -124,178 +124,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
-static long
-i915_gem_object_wait_fence(struct dma_fence *fence,
-			   unsigned int flags,
-			   long timeout)
-{
-	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return timeout;
-
-	if (dma_fence_is_i915(fence))
-		return i915_request_wait(to_request(fence), flags, timeout);
-
-	return dma_fence_wait_timeout(fence,
-				      flags & I915_WAIT_INTERRUPTIBLE,
-				      timeout);
-}
-
-static long
-i915_gem_object_wait_reservation(struct reservation_object *resv,
-				 unsigned int flags,
-				 long timeout)
-{
-	unsigned int seq = __read_seqcount_begin(&resv->seq);
-	struct dma_fence *excl;
-	bool prune_fences = false;
-
-	if (flags & I915_WAIT_ALL) {
-		struct dma_fence **shared;
-		unsigned int count, i;
-		int ret;
-
-		ret = reservation_object_get_fences_rcu(resv,
-							&excl, &count, &shared);
-		if (ret)
-			return ret;
-
-		for (i = 0; i < count; i++) {
-			timeout = i915_gem_object_wait_fence(shared[i],
-							     flags, timeout);
-			if (timeout < 0)
-				break;
-
-			dma_fence_put(shared[i]);
-		}
-
-		for (; i < count; i++)
-			dma_fence_put(shared[i]);
-		kfree(shared);
-
-		/*
-		 * If both shared fences and an exclusive fence exist,
-		 * then by construction the shared fences must be later
-		 * than the exclusive fence. If we successfully wait for
-		 * all the shared fences, we know that the exclusive fence
-		 * must all be signaled. If all the shared fences are
-		 * signaled, we can prune the array and recover the
-		 * floating references on the fences/requests.
-		 */
-		prune_fences = count && timeout >= 0;
-	} else {
-		excl = reservation_object_get_excl_rcu(resv);
-	}
-
-	if (excl && timeout >= 0)
-		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
-
-	dma_fence_put(excl);
-
-	/*
-	 * Opportunistically prune the fences iff we know they have *all* been
-	 * signaled and that the reservation object has not been changed (i.e.
-	 * no new fences have been added).
-	 */
-	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
-		if (reservation_object_trylock(resv)) {
-			if (!__read_seqcount_retry(&resv->seq, seq))
-				reservation_object_add_excl_fence(resv, NULL);
-			reservation_object_unlock(resv);
-		}
-	}
-
-	return timeout;
-}
-
-static void __fence_set_priority(struct dma_fence *fence,
-				 const struct i915_sched_attr *attr)
-{
-	struct i915_request *rq;
-	struct intel_engine_cs *engine;
-
-	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
-		return;
-
-	rq = to_request(fence);
-	engine = rq->engine;
-
-	local_bh_disable();
-	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(rq, attr);
-	rcu_read_unlock();
-	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
-}
-
-static void fence_set_priority(struct dma_fence *fence,
-			       const struct i915_sched_attr *attr)
-{
-	/* Recurse once into a fence-array */
-	if (dma_fence_is_array(fence)) {
-		struct dma_fence_array *array = to_dma_fence_array(fence);
-		int i;
-
-		for (i = 0; i < array->num_fences; i++)
-			__fence_set_priority(array->fences[i], attr);
-	} else {
-		__fence_set_priority(fence, attr);
-	}
-}
-
-int
-i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
-			      unsigned int flags,
-			      const struct i915_sched_attr *attr)
-{
-	struct dma_fence *excl;
-
-	if (flags & I915_WAIT_ALL) {
-		struct dma_fence **shared;
-		unsigned int count, i;
-		int ret;
-
-		ret = reservation_object_get_fences_rcu(obj->resv,
-							&excl, &count, &shared);
-		if (ret)
-			return ret;
-
-		for (i = 0; i < count; i++) {
-			fence_set_priority(shared[i], attr);
-			dma_fence_put(shared[i]);
-		}
-
-		kfree(shared);
-	} else {
-		excl = reservation_object_get_excl_rcu(obj->resv);
-	}
-
-	if (excl) {
-		fence_set_priority(excl, attr);
-		dma_fence_put(excl);
-	}
-	return 0;
-}
-
-/**
- * Waits for rendering to the object to be completed
- * @obj: i915 gem object
- * @flags: how to wait (under a lock, for all rendering or just for writes etc)
- * @timeout: how long to wait
- */
-int
-i915_gem_object_wait(struct drm_i915_gem_object *obj,
-		     unsigned int flags,
-		     long timeout)
-{
-	might_sleep();
-	GEM_BUG_ON(timeout < 0);
-
-	timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
-	return timeout < 0 ? timeout : 0;
-}
-
 static int
 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 		     struct drm_i915_gem_pwrite *args,
@@ -1073,88 +901,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	}
 }
 
-static unsigned long to_wait_timeout(s64 timeout_ns)
-{
-	if (timeout_ns < 0)
-		return MAX_SCHEDULE_TIMEOUT;
-
-	if (timeout_ns == 0)
-		return 0;
-
-	return nsecs_to_jiffies_timeout(timeout_ns);
-}
-
-/**
- * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
- * @dev: drm device pointer
- * @data: ioctl data blob
- * @file: drm file pointer
- *
- * Returns 0 if successful, else an error is returned with the remaining time in
- * the timeout parameter.
- *  -ETIME: object is still busy after timeout
- *  -ERESTARTSYS: signal interrupted the wait
- *  -ENONENT: object doesn't exist
- * Also possible, but rare:
- *  -EAGAIN: incomplete, restart syscall
- *  -ENOMEM: damn
- *  -ENODEV: Internal IRQ fail
- *  -E?: The add request failed
- *
- * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
- * non-zero timeout parameter the wait ioctl will wait for the given number of
- * nanoseconds on an object becoming unbusy. Since the wait itself does so
- * without holding struct_mutex the object may become re-busied before this
- * function completes. A similar but shorter * race condition exists in the busy
- * ioctl
- */
-int
-i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
-{
-	struct drm_i915_gem_wait *args = data;
-	struct drm_i915_gem_object *obj;
-	ktime_t start;
-	long ret;
-
-	if (args->flags != 0)
-		return -EINVAL;
-
-	obj = i915_gem_object_lookup(file, args->bo_handle);
-	if (!obj)
-		return -ENOENT;
-
-	start = ktime_get();
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_PRIORITY |
-				   I915_WAIT_ALL,
-				   to_wait_timeout(args->timeout_ns));
-
-	if (args->timeout_ns > 0) {
-		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
-		if (args->timeout_ns < 0)
-			args->timeout_ns = 0;
-
-		/*
-		 * Apparently ktime isn't accurate enough and occasionally has a
-		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
-		 * things up to make the test happy. We allow up to 1 jiffy.
-		 *
-		 * This is a regression from the timespec->ktime conversion.
-		 */
-		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
-			args->timeout_ns = 0;
-
-		/* Asked to wait beyond the jiffie/scheduler precision? */
-		if (ret == -ETIME && args->timeout_ns)
-			ret = -EAGAIN;
-	}
-
-	i915_gem_object_put(obj);
-	return ret;
-}
-
 static int wait_for_engines(struct drm_i915_private *i915)
 {
 	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 36/45] drm/i915: Move GEM object busy checking to its own file
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (33 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 35/45] drm/i915: Move GEM object waiting to its own file Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 37/45] drm/i915: Move GEM client throttling " Chris Wilson
                   ` (16 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Continuing the decluttering of i915_gem.c by moving the object busy
checking into its own file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile            |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_busy.c | 138 +++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.c          | 128 ---------------------
 3 files changed, 139 insertions(+), 128 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_busy.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c65f4829eb83..655616b77abc 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -88,6 +88,7 @@ i915-y += $(gt-y)
 # GEM (Graphics Execution Management) code
 obj-y += gem/
 gem-y += \
+	gem/i915_gem_busy.o \
 	gem/i915_gem_clflush.o \
 	gem/i915_gem_context.o \
 	gem/i915_gem_dmabuf.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
new file mode 100644
index 000000000000..5a5eda3003e9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -0,0 +1,138 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "gt/intel_engine.h"
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+static __always_inline u32 __busy_read_flag(u8 id)
+{
+	if (id == (u8)I915_ENGINE_CLASS_INVALID)
+		return 0xffff0000u;
+
+	GEM_BUG_ON(id >= 16);
+	return 0x10000u << id;
+}
+
+static __always_inline u32 __busy_write_id(u8 id)
+{
+	/*
+	 * The uABI guarantees an active writer is also amongst the read
+	 * engines. This would be true if we accessed the activity tracking
+	 * under the lock, but as we perform the lookup of the object and
+	 * its activity locklessly we can not guarantee that the last_write
+	 * being active implies that we have set the same engine flag from
+	 * last_read - hence we always set both read and write busy for
+	 * last_write.
+	 */
+	if (id == (u8)I915_ENGINE_CLASS_INVALID)
+		return 0xffffffffu;
+
+	return (id + 1) | __busy_read_flag(id);
+}
+
+static __always_inline unsigned int
+__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
+{
+	const struct i915_request *rq;
+
+	/*
+	 * We have to check the current hw status of the fence as the uABI
+	 * guarantees forward progress. We could rely on the idle worker
+	 * to eventually flush us, but to minimise latency just ask the
+	 * hardware.
+	 *
+	 * Note we only report on the status of native fences.
+	 */
+	if (!dma_fence_is_i915(fence))
+		return 0;
+
+	/* opencode to_request() in order to avoid const warnings */
+	rq = container_of(fence, const struct i915_request, fence);
+	if (i915_request_completed(rq))
+		return 0;
+
+	/* Beware type-expansion follies! */
+	BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
+	return flag(rq->engine->uabi_class);
+}
+
+static __always_inline unsigned int
+busy_check_reader(const struct dma_fence *fence)
+{
+	return __busy_set_if_active(fence, __busy_read_flag);
+}
+
+static __always_inline unsigned int
+busy_check_writer(const struct dma_fence *fence)
+{
+	if (!fence)
+		return 0;
+
+	return __busy_set_if_active(fence, __busy_write_id);
+}
+
+int
+i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_busy *args = data;
+	struct drm_i915_gem_object *obj;
+	struct reservation_object_list *list;
+	unsigned int seq;
+	int err;
+
+	err = -ENOENT;
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj)
+		goto out;
+
+	/*
+	 * A discrepancy here is that we do not report the status of
+	 * non-i915 fences, i.e. even though we may report the object as idle,
+	 * a call to set-domain may still stall waiting for foreign rendering.
+	 * This also means that wait-ioctl may report an object as busy,
+	 * where busy-ioctl considers it idle.
+	 *
+	 * We trade the ability to warn of foreign fences to report on which
+	 * i915 engines are active for the object.
+	 *
+	 * Alternatively, we can trade that extra information on read/write
+	 * activity with
+	 *	args->busy =
+	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
+	 * to report the overall busyness. This is what the wait-ioctl does.
+	 *
+	 */
+retry:
+	seq = raw_read_seqcount(&obj->resv->seq);
+
+	/* Translate the exclusive fence to the READ *and* WRITE engine */
+	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
+
+	/* Translate shared fences to READ set of engines */
+	list = rcu_dereference(obj->resv->fence);
+	if (list) {
+		unsigned int shared_count = list->shared_count, i;
+
+		for (i = 0; i < shared_count; ++i) {
+			struct dma_fence *fence =
+				rcu_dereference(list->shared[i]);
+
+			args->busy |= busy_check_reader(fence);
+		}
+	}
+
+	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
+		goto retry;
+
+	err = 0;
+out:
+	rcu_read_unlock();
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 467273dd2d4a..2f1e6dd78dc1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1118,134 +1118,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
-static __always_inline u32 __busy_read_flag(u8 id)
-{
-	if (id == (u8)I915_ENGINE_CLASS_INVALID)
-		return 0xffff0000u;
-
-	GEM_BUG_ON(id >= 16);
-	return 0x10000u << id;
-}
-
-static __always_inline u32 __busy_write_id(u8 id)
-{
-	/*
-	 * The uABI guarantees an active writer is also amongst the read
-	 * engines. This would be true if we accessed the activity tracking
-	 * under the lock, but as we perform the lookup of the object and
-	 * its activity locklessly we can not guarantee that the last_write
-	 * being active implies that we have set the same engine flag from
-	 * last_read - hence we always set both read and write busy for
-	 * last_write.
-	 */
-	if (id == (u8)I915_ENGINE_CLASS_INVALID)
-		return 0xffffffffu;
-
-	return (id + 1) | __busy_read_flag(id);
-}
-
-static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
-{
-	const struct i915_request *rq;
-
-	/*
-	 * We have to check the current hw status of the fence as the uABI
-	 * guarantees forward progress. We could rely on the idle worker
-	 * to eventually flush us, but to minimise latency just ask the
-	 * hardware.
-	 *
-	 * Note we only report on the status of native fences.
-	 */
-	if (!dma_fence_is_i915(fence))
-		return 0;
-
-	/* opencode to_request() in order to avoid const warnings */
-	rq = container_of(fence, const struct i915_request, fence);
-	if (i915_request_completed(rq))
-		return 0;
-
-	/* Beware type-expansion follies! */
-	BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
-	return flag(rq->engine->uabi_class);
-}
-
-static __always_inline unsigned int
-busy_check_reader(const struct dma_fence *fence)
-{
-	return __busy_set_if_active(fence, __busy_read_flag);
-}
-
-static __always_inline unsigned int
-busy_check_writer(const struct dma_fence *fence)
-{
-	if (!fence)
-		return 0;
-
-	return __busy_set_if_active(fence, __busy_write_id);
-}
-
-int
-i915_gem_busy_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file)
-{
-	struct drm_i915_gem_busy *args = data;
-	struct drm_i915_gem_object *obj;
-	struct reservation_object_list *list;
-	unsigned int seq;
-	int err;
-
-	err = -ENOENT;
-	rcu_read_lock();
-	obj = i915_gem_object_lookup_rcu(file, args->handle);
-	if (!obj)
-		goto out;
-
-	/*
-	 * A discrepancy here is that we do not report the status of
-	 * non-i915 fences, i.e. even though we may report the object as idle,
-	 * a call to set-domain may still stall waiting for foreign rendering.
-	 * This also means that wait-ioctl may report an object as busy,
-	 * where busy-ioctl considers it idle.
-	 *
-	 * We trade the ability to warn of foreign fences to report on which
-	 * i915 engines are active for the object.
-	 *
-	 * Alternatively, we can trade that extra information on read/write
-	 * activity with
-	 *	args->busy =
-	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
-	 * to report the overall busyness. This is what the wait-ioctl does.
-	 *
-	 */
-retry:
-	seq = raw_read_seqcount(&obj->resv->seq);
-
-	/* Translate the exclusive fence to the READ *and* WRITE engine */
-	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
-
-	/* Translate shared fences to READ set of engines */
-	list = rcu_dereference(obj->resv->fence);
-	if (list) {
-		unsigned int shared_count = list->shared_count, i;
-
-		for (i = 0; i < shared_count; ++i) {
-			struct dma_fence *fence =
-				rcu_dereference(list->shared[i]);
-
-			args->busy |= busy_check_reader(fence);
-		}
-	}
-
-	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
-		goto retry;
-
-	err = 0;
-out:
-	rcu_read_unlock();
-	return err;
-}
-
 int
 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 37/45] drm/i915: Move GEM client throttling to its own file
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (34 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 36/45] drm/i915: Move GEM object busy checking " Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 38/45] drm/i915: Drop the deferred active reference Chris Wilson
                   ` (15 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Continuing the decluttering of i915_gem.c by moving the client self
throttling into its own file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_throttle.c | 74 ++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h              |  6 --
 drivers/gpu/drm/i915/i915_gem.c              | 58 ---------------
 4 files changed, 75 insertions(+), 64 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_throttle.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 655616b77abc..1ae067daafa3 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -104,6 +104,7 @@ gem-y += \
 	gem/i915_gem_shmem.o \
 	gem/i915_gem_shrinker.o \
 	gem/i915_gem_stolen.o \
+	gem/i915_gem_throttle.o \
 	gem/i915_gem_tiling.o \
 	gem/i915_gem_userptr.o \
 	gem/i915_gem_wait.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
new file mode 100644
index 000000000000..491bc28c175d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -0,0 +1,74 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/jiffies.h>
+
+#include <drm/drm_file.h>
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+/*
+ * 20ms is a fairly arbitrary limit (greater than the average frame time)
+ * chosen to prevent the CPU getting more than a frame ahead of the GPU
+ * (when using lax throttling for the frontbuffer). We also use it to
+ * offer free GPU waitboosts for severely congested workloads.
+ */
+#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
+
+/*
+ * Throttle our rendering by waiting until the ring has completed our requests
+ * emitted over 20 msec ago.
+ *
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
+ * This should get us reasonable parallelism between CPU and GPU but also
+ * relatively low latency when blocking on a particular request to finish.
+ */
+int
+i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
+	struct i915_request *request, *target = NULL;
+	long ret;
+
+	/* ABI: return -EIO if already wedged */
+	ret = i915_terminally_wedged(to_i915(dev));
+	if (ret)
+		return ret;
+
+	spin_lock(&file_priv->mm.lock);
+	list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
+		if (time_after_eq(request->emitted_jiffies, recent_enough))
+			break;
+
+		if (target) {
+			list_del(&target->client_link);
+			target->file_priv = NULL;
+		}
+
+		target = request;
+	}
+	if (target)
+		i915_request_get(target);
+	spin_unlock(&file_priv->mm.lock);
+
+	if (!target)
+		return 0;
+
+	ret = i915_request_wait(target,
+				I915_WAIT_INTERRUPTIBLE,
+				MAX_SCHEDULE_TIMEOUT);
+	i915_request_put(target);
+
+	return ret < 0 ? ret : 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0d1ce4eb6699..dcfc8a54d63c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -211,12 +211,6 @@ struct drm_i915_file_private {
 	struct {
 		spinlock_t lock;
 		struct list_head request_list;
-/* 20ms is a fairly arbitrary limit (greater than the average frame time)
- * chosen to prevent the CPU getting more than a frame ahead of the GPU
- * (when using lax throttling for the frontbuffer). We also use it to
- * offer free GPU waitboosts for severely congested workloads.
- */
-#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
 	} mm;
 
 	struct idr context_idr;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2f1e6dd78dc1..2a9e8ecf2926 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -988,57 +988,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	return 0;
 }
 
-/* Throttle our rendering by waiting until the ring has completed our requests
- * emitted over 20 msec ago.
- *
- * Note that if we were to use the current jiffies each time around the loop,
- * we wouldn't escape the function with any frames outstanding if the time to
- * render a frame was over 20ms.
- *
- * This should get us reasonable parallelism between CPU and GPU but also
- * relatively low latency when blocking on a particular request to finish.
- */
-static int
-i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
-	struct i915_request *request, *target = NULL;
-	long ret;
-
-	/* ABI: return -EIO if already wedged */
-	ret = i915_terminally_wedged(dev_priv);
-	if (ret)
-		return ret;
-
-	spin_lock(&file_priv->mm.lock);
-	list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
-		if (time_after_eq(request->emitted_jiffies, recent_enough))
-			break;
-
-		if (target) {
-			list_del(&target->client_link);
-			target->file_priv = NULL;
-		}
-
-		target = request;
-	}
-	if (target)
-		i915_request_get(target);
-	spin_unlock(&file_priv->mm.lock);
-
-	if (target == NULL)
-		return 0;
-
-	ret = i915_request_wait(target,
-				I915_WAIT_INTERRUPTIBLE,
-				MAX_SCHEDULE_TIMEOUT);
-	i915_request_put(target);
-
-	return ret < 0 ? ret : 0;
-}
-
 struct i915_vma *
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
@@ -1118,13 +1067,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
-int
-i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	return i915_gem_ring_throttle(dev, file_priv);
-}
-
 int
 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 38/45] drm/i915: Drop the deferred active reference
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (35 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 37/45] drm/i915: Move GEM client throttling " Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-05-01 10:34   ` Matthew Auld
  2019-04-25  9:19 ` [PATCH 39/45] drm/i915: Move object close under its own lock Chris Wilson
                   ` (14 subsequent siblings)
  51 siblings, 1 reply; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

An old optimisation to reduce the number of atomics per batch sadly
relies on struct_mutex for coordination. In order to remove struct_mutex
from serialising object/context closing, always taking and releasing an
active reference on first use / last use greatly simplifies the locking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 13 +---------
 drivers/gpu/drm/i915/gem/i915_gem_object.h    | 24 +------------------
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  8 -------
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  3 +--
 .../i915/gem/selftests/i915_gem_coherency.c   |  4 ++--
 .../drm/i915/gem/selftests/i915_gem_context.c | 11 +++++----
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  2 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  2 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  3 +--
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  9 +------
 .../gpu/drm/i915/gt/selftest_workarounds.c    |  3 ---
 drivers/gpu/drm/i915/gvt/scheduler.c          |  2 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c    |  2 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |  2 +-
 drivers/gpu/drm/i915/i915_vma.c               | 15 +++++-------
 drivers/gpu/drm/i915/selftests/i915_request.c |  8 -------
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |  9 +------
 18 files changed, 26 insertions(+), 96 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 378040732797..b61c63618b87 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -112,7 +112,7 @@ static void lut_close(struct i915_gem_context *ctx)
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
 
 		vma->open_count--;
-		__i915_gem_object_release_unless_active(vma->obj);
+		i915_vma_put(vma);
 	}
 	rcu_read_unlock();
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index df79e2eead62..c178cf7614c6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -156,7 +156,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 		list_del(&lut->ctx_link);
 
 		i915_lut_handle_free(lut);
-		__i915_gem_object_release_unless_active(obj);
+		i915_gem_object_put(obj);
 	}
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -348,17 +348,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
 }
 
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!i915_gem_object_has_active_reference(obj) &&
-	    i915_gem_object_is_active(obj))
-		i915_gem_object_set_active_reference(obj);
-	else
-		i915_gem_object_put(obj);
-}
-
 static inline enum fb_op_origin
 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 23bca003fbfb..dcb765cd97a3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -161,31 +161,9 @@ i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
 static inline bool
 i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
 {
-	return obj->active_count;
+	return READ_ONCE(obj->active_count);
 }
 
-static inline bool
-i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
-{
-	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
-
 static inline bool
 i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 24abdd6999b5..133581339d81 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -120,14 +120,6 @@ struct drm_i915_gem_object {
 	struct list_head batch_pool_link;
 	I915_SELFTEST_DECLARE(struct list_head st_link);
 
-	unsigned long flags;
-
-	/**
-	 * Have we taken a reference for the object for incomplete GPU
-	 * activity?
-	 */
-#define I915_BO_ACTIVE_REF 0
-
 	/*
 	 * Is the object to be mapped as read-only to the GPU
 	 * Only honoured if hardware has relevant pte bit
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 5a89cbab3b2e..9e211aa044ea 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -975,8 +975,6 @@ static int gpu_write(struct i915_vma *vma,
 	if (err)
 		goto err_request;
 
-	i915_gem_object_set_active_reference(batch->obj);
-
 	i915_vma_lock(vma);
 	err = i915_gem_object_set_to_gtt_domain(vma->obj, false);
 	if (err == 0)
@@ -995,6 +993,7 @@ static int gpu_write(struct i915_vma *vma,
 err_batch:
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
+	i915_vma_put(batch);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 22fe4b6fd506..ae9586645150 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -365,7 +365,7 @@ static int igt_gem_coherency(void *arg)
 						}
 					}
 
-					__i915_gem_object_release_unless_active(obj);
+					i915_gem_object_put(obj);
 				}
 			}
 		}
@@ -377,7 +377,7 @@ static int igt_gem_coherency(void *arg)
 	return err;
 
 put_object:
-	__i915_gem_object_release_unless_active(obj);
+	i915_gem_object_put(obj);
 	goto unlock;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 73a448f0f3fe..f37a844189a2 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -318,14 +318,14 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		goto skip_request;
 
-	i915_gem_object_set_active_reference(batch->obj);
+	i915_request_add(rq);
+
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
+	i915_vma_put(batch);
 
 	i915_vma_unpin(vma);
 
-	i915_request_add(rq);
-
 	return 0;
 
 skip_request:
@@ -802,9 +802,9 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (err)
 		goto skip_request;
 
-	i915_gem_object_set_active_reference(batch->obj);
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
+	i915_vma_put(batch);
 
 	i915_vma_unpin(vma);
 
@@ -820,6 +820,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	i915_request_add(rq);
 err_batch:
 	i915_vma_unpin(batch);
+	i915_vma_put(batch);
 err_vma:
 	i915_vma_unpin(vma);
 
@@ -1365,9 +1366,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto skip_request;
 
-	i915_gem_object_set_active_reference(obj);
 	i915_vma_unpin(vma);
 	i915_vma_close(vma);
+	i915_vma_put(vma);
 
 	i915_request_add(rq);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 226abf516a24..6e21d83dddeb 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -354,8 +354,8 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 
 	i915_request_add(rq);
 
-	__i915_gem_object_release_unless_active(obj);
 	i915_vma_unpin(vma);
+	i915_gem_object_put(obj); /* leave it only alive via its active ref */
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 25a1328d9d27..5eef7d3bf7e3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -458,7 +458,7 @@ static void cleanup_status_page(struct intel_engine_cs *engine)
 		i915_vma_unpin(vma);
 
 	i915_gem_object_unpin_map(vma->obj);
-	__i915_gem_object_release_unless_active(vma->obj);
+	i915_gem_object_put(vma->obj);
 }
 
 static int pin_ggtt_status_page(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 8faa9bd9edc2..62ad585531b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1316,10 +1316,9 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 void intel_ring_free(struct kref *ref)
 {
 	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
-	struct drm_i915_gem_object *obj = ring->vma->obj;
 
 	i915_vma_close(ring->vma);
-	__i915_gem_object_release_unless_active(obj);
+	i915_vma_put(ring->vma);
 
 	i915_timeline_put(ring->timeline);
 	kfree(ring);
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 45745e36b5f4..910f96231934 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -118,15 +118,8 @@ static int move_to_active(struct i915_vma *vma,
 	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
 	i915_vma_unlock(vma);
-	if (err)
-		return err;
-
-	if (!i915_gem_object_has_active_reference(vma->obj)) {
-		i915_gem_object_get(vma->obj);
-		i915_gem_object_set_active_reference(vma->obj);
-	}
 
-	return 0;
+	return err;
 }
 
 static struct i915_request *
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index d4ba5296b2c8..db40d4f805ea 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -135,9 +135,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	}
 	intel_ring_advance(rq, cs);
 
-	i915_gem_object_get(result);
-	i915_gem_object_set_active_reference(result);
-
 	i915_request_add(rq);
 	i915_vma_unpin(vma);
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 9f6c119983d4..e881ff527bcb 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -583,7 +583,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 				i915_vma_unpin(bb->vma);
 				i915_vma_close(bb->vma);
 			}
-			__i915_gem_object_release_unless_active(bb->obj);
+			i915_gem_object_put(bb->obj);
 		}
 		list_del(&bb->list);
 		kfree(bb);
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index f3890b664e3f..56adfdcaed3e 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -55,7 +55,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 		list_for_each_entry_safe(obj, next,
 					 &pool->cache_list[n],
 					 batch_pool_link)
-			__i915_gem_object_release_unless_active(obj);
+			i915_gem_object_put(obj);
 
 		INIT_LIST_HEAD(&pool->cache_list[n]);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 706ed71468e8..4ee032072d4f 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -230,6 +230,6 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 err_vma:
 	i915_vma_close(so.vma);
 err_obj:
-	__i915_gem_object_release_unless_active(so.obj);
+	i915_gem_object_put(so.obj);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 3620d39c0bc8..16d47f1f645a 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -115,10 +115,7 @@ static void __i915_vma_retire(struct i915_active *ref)
 	 */
 	obj_bump_mru(obj);
 
-	if (i915_gem_object_has_active_reference(obj)) {
-		i915_gem_object_clear_active_reference(obj);
-		i915_gem_object_put(obj);
-	}
+	i915_gem_object_put(obj); /* and drop the active reference */
 }
 
 static struct i915_vma *
@@ -443,7 +440,7 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags)
 	if (flags & I915_VMA_RELEASE_MAP)
 		i915_gem_object_unpin_map(obj);
 
-	__i915_gem_object_release_unless_active(obj);
+	i915_gem_object_put(obj);
 }
 
 bool i915_vma_misplaced(const struct i915_vma *vma,
@@ -940,12 +937,12 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!vma->active.count)
-		obj->active_count++;
+	if (!vma->active.count && !obj->active_count++)
+		i915_gem_object_get(obj); /* once more for the active ref */
 
 	if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
-		if (!vma->active.count)
-			obj->active_count--;
+		if (!vma->active.count && !--obj->active_count)
+			i915_gem_object_put(obj);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 5c9dbcaa3a33..950a2c55aa96 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -870,11 +870,6 @@ static int live_all_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
-		if (!i915_gem_object_has_active_reference(batch->obj)) {
-			i915_gem_object_get(batch->obj);
-			i915_gem_object_set_active_reference(batch->obj);
-		}
-
 		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
 		i915_vma_unlock(batch);
@@ -997,9 +992,6 @@ static int live_sequential_engines(void *arg)
 		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
-		i915_gem_object_set_active_reference(batch->obj);
-		i915_vma_get(batch);
-
 		i915_request_get(request[id]);
 		i915_request_add(request[id]);
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index d845d1dc7ce7..02306aa83d2a 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -79,15 +79,8 @@ static int move_to_active(struct i915_vma *vma,
 	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
 	i915_vma_unlock(vma);
-	if (err)
-		return err;
-
-	if (!i915_gem_object_has_active_reference(vma->obj)) {
-		i915_gem_object_get(vma->obj);
-		i915_gem_object_set_active_reference(vma->obj);
-	}
 
-	return 0;
+	return err;
 }
 
 struct i915_request *
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 39/45] drm/i915: Move object close under its own lock
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (36 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 38/45] drm/i915: Drop the deferred active reference Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:19 ` [PATCH 40/45] drm/i915: Rename intel_context.active to .inflight Chris Wilson
                   ` (13 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Use i915_gem_object_lock() to guard the LUT and active reference to
allow us to break free of struct_mutex for handling GEM_CLOSE.

Testcase: igt/gem_close_race
Testcase: igt/gem_exec_parallel
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 76 ++++++++++---------
 .../gpu/drm/i915/gem/i915_gem_context_types.h | 12 +--
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 23 ++++--
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 38 ++++++----
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 -
 .../gpu/drm/i915/gem/selftests/mock_context.c |  1 -
 drivers/gpu/drm/i915/i915_drv.h               |  4 +-
 drivers/gpu/drm/i915/i915_gem.c               |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  1 +
 drivers/gpu/drm/i915/i915_timeline.c          | 13 ++--
 drivers/gpu/drm/i915/i915_vma.c               | 42 ++++++----
 drivers/gpu/drm/i915/i915_vma.h               | 17 ++---
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 13 files changed, 132 insertions(+), 98 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index b61c63618b87..d4f13278d5b6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -95,24 +95,42 @@ void i915_lut_handle_free(struct i915_lut_handle *lut)
 
 static void lut_close(struct i915_gem_context *ctx)
 {
-	struct i915_lut_handle *lut, *ln;
 	struct radix_tree_iter iter;
 	void __rcu **slot;
 
-	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
-		list_del(&lut->obj_link);
-		i915_lut_handle_free(lut);
-	}
-	INIT_LIST_HEAD(&ctx->handles_list);
+	lockdep_assert_held(&ctx->mutex);
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
-
-		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
-
-		vma->open_count--;
-		i915_vma_put(vma);
+		struct drm_i915_gem_object *obj = vma->obj;
+		struct i915_lut_handle *lut;
+		bool found = false;
+
+		rcu_read_unlock();
+		i915_gem_object_lock(obj);
+		list_for_each_entry(lut, &obj->lut_list, obj_link) {
+			if (lut->ctx != ctx)
+				continue;
+
+			if (lut->handle != iter.index)
+				continue;
+
+			list_del(&lut->obj_link);
+			i915_lut_handle_free(lut);
+			found = true;
+			break;
+		}
+		i915_gem_object_unlock(obj);
+		rcu_read_lock();
+
+		if (found) {
+			radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+			if (atomic_dec_and_test(&vma->open_count) &&
+			    !i915_vma_is_ggtt(vma))
+				i915_vma_close(vma);
+			i915_gem_object_put(obj);
+		}
 	}
 	rcu_read_unlock();
 }
@@ -238,15 +256,9 @@ static void free_engines(struct i915_gem_engines *e)
 	__free_engines(e, e->num_engines);
 }
 
-static void free_engines_rcu(struct work_struct *wrk)
+static void free_engines_rcu(struct rcu_head *rcu)
 {
-	struct i915_gem_engines *e =
-		container_of(wrk, struct i915_gem_engines, rcu.work);
-	struct drm_i915_private *i915 = e->i915;
-
-	mutex_lock(&i915->drm.struct_mutex);
-	free_engines(e);
-	mutex_unlock(&i915->drm.struct_mutex);
+	free_engines(container_of(rcu, struct i915_gem_engines, rcu));
 }
 
 static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
@@ -259,7 +271,7 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 	if (!e)
 		return ERR_PTR(-ENOMEM);
 
-	e->i915 = ctx->i915;
+	init_rcu_head(&e->rcu);
 	for_each_engine(engine, ctx->i915, id) {
 		struct intel_context *ce;
 
@@ -347,7 +359,10 @@ void i915_gem_context_release(struct kref *ref)
 
 static void context_close(struct i915_gem_context *ctx)
 {
+	mutex_lock(&ctx->mutex);
+
 	i915_gem_context_set_closed(ctx);
+	ctx->file_priv = ERR_PTR(-EBADF);
 
 	/*
 	 * This context will never again be assinged to HW, so we can
@@ -362,7 +377,7 @@ static void context_close(struct i915_gem_context *ctx)
 	 */
 	lut_close(ctx);
 
-	ctx->file_priv = ERR_PTR(-EBADF);
+	mutex_unlock(&ctx->mutex);
 	i915_gem_context_put(ctx);
 }
 
@@ -417,7 +432,6 @@ __create_context(struct drm_i915_private *dev_priv)
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
 
 	/* NB: Mark all slices as needing a remap so that when the context first
@@ -760,9 +774,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	return 0;
 
 err_ctx:
-	mutex_lock(&i915->drm.struct_mutex);
 	context_close(ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
 err:
 	idr_destroy(&file_priv->vm_idr);
 	idr_destroy(&file_priv->context_idr);
@@ -775,8 +787,6 @@ void i915_gem_context_close(struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex);
-
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
 	mutex_destroy(&file_priv->context_idr_lock);
@@ -1084,7 +1094,9 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 		goto unlock;
 
 	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
+	mutex_lock(&ctx->mutex);
 	lut_close(ctx);
+	mutex_unlock(&ctx->mutex);
 
 	old = __set_ppgtt(ctx, ppgtt);
 
@@ -1599,7 +1611,7 @@ set_engines(struct i915_gem_context *ctx,
 	if (!set.engines)
 		return -ENOMEM;
 
-	set.engines->i915 = ctx->i915;
+	init_rcu_head(&set.engines->rcu);
 	for (n = 0; n < num_engines; n++) {
 		struct i915_engine_class_instance ci;
 		struct intel_engine_cs *engine;
@@ -1653,8 +1665,7 @@ set_engines(struct i915_gem_context *ctx,
 	rcu_swap_protected(ctx->engines, set.engines, 1);
 	mutex_unlock(&ctx->engines_mutex);
 
-	INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu);
-	queue_rcu_work(system_wq, &set.engines->rcu);
+	call_rcu(&set.engines->rcu, free_engines_rcu);
 
 	return 0;
 }
@@ -1852,7 +1863,7 @@ static int clone_engines(struct i915_gem_context *dst,
 	if (!clone)
 		goto err_unlock;
 
-	clone->i915 = dst->i915;
+	init_rcu_head(&clone->rcu);
 	for (n = 0; n < e->num_engines; n++) {
 		struct intel_engine_cs *engine;
 
@@ -2124,9 +2135,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	return 0;
 
 err_ctx:
-	mutex_lock(&dev->struct_mutex);
 	context_close(ext_data.ctx);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
@@ -2151,10 +2160,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	if (!ctx)
 		return -ENOENT;
 
-	mutex_lock(&dev->struct_mutex);
 	context_close(ctx);
-	mutex_unlock(&dev->struct_mutex);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index fb965ded2508..3db7448b9732 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -30,8 +30,7 @@ struct i915_timeline;
 struct intel_ring;
 
 struct i915_gem_engines {
-	struct rcu_work rcu;
-	struct drm_i915_private *i915;
+	struct rcu_head rcu;
 	unsigned int num_engines;
 	struct intel_context *engines[];
 };
@@ -192,17 +191,12 @@ struct i915_gem_context {
 	/** remap_slice: Bitmask of cache lines that need remapping */
 	u8 remap_slice;
 
-	/** handles_vma: rbtree to look up our context specific obj/vma for
+	/**
+	 * handles_vma: rbtree to look up our context specific obj/vma for
 	 * the user handle. (user handles are per fd, but the binding is
 	 * per vm, which may be one per context or shared with the global GTT)
 	 */
 	struct radix_tree_root handles_vma;
-
-	/** handles_list: reverse list of all the rbtree entries in use for
-	 * this context, which allows us to free all the allocations on
-	 * context close.
-	 */
-	struct list_head handles_list;
 };
 
 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 080f69358224..5133787480ea 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -802,9 +802,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	unsigned int i, batch;
 	int err;
 
-	if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
-		return -ENOENT;
-
 	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
 		return -EIO;
 
@@ -813,6 +810,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
 	batch = eb_batch_index(eb);
 
+	mutex_lock(&eb->gem_context->mutex);
+	if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
+		err = -ENOENT;
+		goto err_ctx;
+	}
+
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
 		struct i915_lut_handle *lut;
@@ -847,12 +850,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		}
 
 		/* transfer ref to ctx */
-		if (!vma->open_count++)
+		if (!atomic_fetch_inc(&vma->open_count))
 			i915_vma_reopen(vma);
-		list_add(&lut->obj_link, &obj->lut_list);
-		list_add(&lut->ctx_link, &eb->gem_context->handles_list);
-		lut->ctx = eb->gem_context;
 		lut->handle = handle;
+		lut->ctx = eb->gem_context;
+
+		i915_gem_object_lock(obj);
+		list_add(&lut->obj_link, &obj->lut_list);
+		i915_gem_object_unlock(obj);
 
 add_vma:
 		err = eb_add_vma(eb, i, batch, vma);
@@ -865,6 +870,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
 	}
 
+	mutex_unlock(&eb->gem_context->mutex);
+
 	eb->args->flags |= __EXEC_VALIDATED;
 	return eb_reserve(eb);
 
@@ -872,6 +879,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	i915_gem_object_put(obj);
 err_vma:
 	eb->vma[i] = NULL;
+err_ctx:
+	mutex_unlock(&eb->gem_context->mutex);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index c178cf7614c6..29deae2eec93 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -127,39 +127,47 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
-	struct drm_i915_private *i915 = to_i915(gem->dev);
 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
 	struct drm_i915_file_private *fpriv = file->driver_priv;
 	struct i915_lut_handle *lut, *ln;
+	LIST_HEAD(close);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
+	i915_gem_object_lock(obj);
 	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
 		struct i915_gem_context *ctx = lut->ctx;
-		struct i915_vma *vma;
 
-		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
 		if (ctx->file_priv != fpriv)
 			continue;
 
-		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
-		GEM_BUG_ON(vma->obj != obj);
+		i915_gem_context_get(ctx);
+		list_move(&lut->obj_link, &close);
+	}
+	i915_gem_object_unlock(obj);
+
+	list_for_each_entry_safe(lut, ln, &close, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
 
-		/* We allow the process to have multiple handles to the same
+		/*
+		 * We allow the process to have multiple handles to the same
 		 * vma, in the same fd namespace, by virtue of flink/open.
 		 */
-		GEM_BUG_ON(!vma->open_count);
-		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
 
-		list_del(&lut->obj_link);
-		list_del(&lut->ctx_link);
+		mutex_lock(&ctx->mutex);
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+		if (vma) {
+			GEM_BUG_ON(vma->obj != obj);
+			GEM_BUG_ON(!atomic_read(&vma->open_count));
+			if (atomic_dec_and_test(&vma->open_count) &&
+			    !i915_vma_is_ggtt(vma))
+				i915_vma_close(vma);
+		}
+		mutex_unlock(&ctx->mutex);
 
+		i915_gem_context_put(lut->ctx);
 		i915_lut_handle_free(lut);
 		i915_gem_object_put(obj);
 	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 133581339d81..4a5332662bf9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -24,7 +24,6 @@ struct drm_i915_gem_object;
  */
 struct i915_lut_handle {
 	struct list_head obj_link;
-	struct list_head ctx_link;
 	struct i915_gem_context *ctx;
 	u32 handle;
 };
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index d373d7238622..080c44509ba9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -30,7 +30,6 @@ mock_context(struct drm_i915_private *i915,
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
 	mutex_init(&ctx->mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dcfc8a54d63c..e328b8dd8f47 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2000,10 +2000,12 @@ struct drm_i915_private {
 		} timelines;
 
 		struct list_head active_rings;
-		struct list_head closed_vma;
 
 		struct intel_wakeref wakeref;
 
+		struct list_head closed_vma;
+		spinlock_t closed_lock; /* guards the list of closed_vma */
+
 		/**
 		 * Is the GPU currently considered idle, or busy executing
 		 * userspace requests? Whilst idle, we allow runtime power
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2a9e8ecf2926..90923fa6603d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1732,6 +1732,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
+	spin_lock_init(&dev_priv->gt.closed_lock);
 
 	i915_gem_init__mm(dev_priv);
 	i915_gem_init__pm(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 56be4b091bc7..f26eaa81e7fa 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1929,6 +1929,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 
 	INIT_LIST_HEAD(&vma->obj_link);
+	INIT_LIST_HEAD(&vma->closed_link);
 
 	mutex_lock(&vma->vm->mutex);
 	list_add(&vma->vm_link, &vma->vm->unbound_list);
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 5fbea0892f33..000e1a9b6750 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -61,7 +61,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 
 	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
 
-	spin_lock(&gt->hwsp_lock);
+	spin_lock_irq(&gt->hwsp_lock);
 
 	/* hwsp_free_list only contains HWSP that have available cachelines */
 	hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
@@ -69,7 +69,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 	if (!hwsp) {
 		struct i915_vma *vma;
 
-		spin_unlock(&gt->hwsp_lock);
+		spin_unlock_irq(&gt->hwsp_lock);
 
 		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
 		if (!hwsp)
@@ -86,7 +86,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 		hwsp->free_bitmap = ~0ull;
 		hwsp->gt = gt;
 
-		spin_lock(&gt->hwsp_lock);
+		spin_lock_irq(&gt->hwsp_lock);
 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
 	}
 
@@ -96,7 +96,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 	if (!hwsp->free_bitmap)
 		list_del(&hwsp->free_link);
 
-	spin_unlock(&gt->hwsp_lock);
+	spin_unlock_irq(&gt->hwsp_lock);
 
 	GEM_BUG_ON(hwsp->vma->private != hwsp);
 	return hwsp->vma;
@@ -105,8 +105,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
 {
 	struct i915_gt_timelines *gt = hwsp->gt;
+	unsigned long flags;
 
-	spin_lock(&gt->hwsp_lock);
+	spin_lock_irqsave(&gt->hwsp_lock, flags);
 
 	/* As a cacheline becomes available, publish the HWSP on the freelist */
 	if (!hwsp->free_bitmap)
@@ -122,7 +123,7 @@ static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
 		kfree(hwsp);
 	}
 
-	spin_unlock(&gt->hwsp_lock);
+	spin_unlock_irqrestore(&gt->hwsp_lock, flags);
 }
 
 static void __idle_cacheline_free(struct i915_timeline_cacheline *cl)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 16d47f1f645a..392c2757c217 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -143,6 +143,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
+	INIT_LIST_HEAD(&vma->closed_link);
+
 	if (view && view->type != I915_GGTT_VIEW_NORMAL) {
 		vma->ggtt_view = *view;
 		if (view->type == I915_GGTT_VIEW_PARTIAL) {
@@ -785,10 +787,10 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 
 void i915_vma_close(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_private *i915 = vma->vm->i915;
+	unsigned long flags;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
-	vma->flags |= I915_VMA_CLOSED;
 
 	/*
 	 * We defer actually closing, unbinding and destroying the VMA until
@@ -802,17 +804,26 @@ void i915_vma_close(struct i915_vma *vma)
 	 * causing us to rebind the VMA once more. This ends up being a lot
 	 * of wasted work for the steady state.
 	 */
-	list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma);
+	spin_lock_irqsave(&i915->gt.closed_lock, flags);
+	list_add(&vma->closed_link, &i915->gt.closed_vma);
+	spin_unlock_irqrestore(&i915->gt.closed_lock, flags);
 }
 
-void i915_vma_reopen(struct i915_vma *vma)
+static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_private *i915 = vma->vm->i915;
 
-	if (vma->flags & I915_VMA_CLOSED) {
-		vma->flags &= ~I915_VMA_CLOSED;
-		list_del(&vma->closed_link);
-	}
+	if (!i915_vma_is_closed(vma))
+		return;
+
+	spin_lock_irq(&i915->gt.closed_lock);
+	list_del_init(&vma->closed_link);
+	spin_unlock_irq(&i915->gt.closed_lock);
+}
+
+void i915_vma_reopen(struct i915_vma *vma)
+{
+	__i915_vma_remove_closed(vma);
 }
 
 static void __i915_vma_destroy(struct i915_vma *vma)
@@ -846,8 +857,7 @@ void i915_vma_destroy(struct i915_vma *vma)
 
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
-	if (i915_vma_is_closed(vma))
-		list_del(&vma->closed_link);
+	__i915_vma_remove_closed(vma);
 
 	WARN_ON(i915_vma_unbind(vma));
 	GEM_BUG_ON(i915_vma_is_active(vma));
@@ -859,12 +869,16 @@ void i915_vma_parked(struct drm_i915_private *i915)
 {
 	struct i915_vma *vma, *next;
 
+	spin_lock_irq(&i915->gt.closed_lock);
 	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
-		GEM_BUG_ON(!i915_vma_is_closed(vma));
+		list_del_init(&vma->closed_link);
+		spin_unlock_irq(&i915->gt.closed_lock);
+
 		i915_vma_destroy(vma);
-	}
 
-	GEM_BUG_ON(!list_empty(&i915->gt.closed_vma));
+		spin_lock_irq(&i915->gt.closed_lock);
+	}
+	spin_unlock_irq(&i915->gt.closed_lock);
 }
 
 static void __i915_vma_iounmap(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 71ac7ee8620a..f0884cc8b2d3 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -69,7 +69,7 @@ struct i915_vma {
 	 * handles (but same file) for execbuf, i.e. the number of aliases
 	 * that exist in the ctx->handle_vmas LUT for this vma.
 	 */
-	unsigned int open_count;
+	atomic_t open_count;
 	unsigned long flags;
 	/**
 	 * How many users have pinned this object in GTT space.
@@ -104,10 +104,9 @@ struct i915_vma {
 
 #define I915_VMA_GGTT		BIT(11)
 #define I915_VMA_CAN_FENCE	BIT(12)
-#define I915_VMA_CLOSED		BIT(13)
-#define I915_VMA_USERFAULT_BIT	14
+#define I915_VMA_USERFAULT_BIT	13
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(15)
+#define I915_VMA_GGTT_WRITE	BIT(14)
 
 	struct i915_active active;
 	struct i915_active_request last_fence;
@@ -190,11 +189,6 @@ static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
 	return vma->flags & I915_VMA_CAN_FENCE;
 }
 
-static inline bool i915_vma_is_closed(const struct i915_vma *vma)
-{
-	return vma->flags & I915_VMA_CLOSED;
-}
-
 static inline bool i915_vma_set_userfault(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
@@ -211,6 +205,11 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
+static inline bool i915_vma_is_closed(const struct i915_vma *vma)
+{
+	return !list_empty(&vma->closed_link);
+}
+
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 576bb11b7858..ca8d73e6414e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -204,6 +204,7 @@ struct drm_i915_private *mock_gem_device(void)
 
 	INIT_LIST_HEAD(&i915->gt.active_rings);
 	INIT_LIST_HEAD(&i915->gt.closed_vma);
+	spin_lock_init(&i915->gt.closed_lock);
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 40/45] drm/i915: Rename intel_context.active to .inflight
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (37 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 39/45] drm/i915: Move object close under its own lock Chris Wilson
@ 2019-04-25  9:19 ` Chris Wilson
  2019-04-25  9:20 ` [PATCH 41/45] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
                   ` (12 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Rename the engine this HW context is currently active upon (that we are
flying upon) to disambiguate between the mixture of different active
terms (and prevent conflict in future patches).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 22 +++++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index d5a7dbd0daee..47f2970ab5b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -36,7 +36,7 @@ struct intel_context {
 
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
-	struct intel_engine_cs *active;
+	struct intel_engine_cs *inflight;
 
 	struct list_head signal_link;
 	struct list_head signals;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 524de9eaa44c..b277c0750943 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -459,7 +459,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
-		GEM_BUG_ON(rq->hw_context->active);
+		GEM_BUG_ON(rq->hw_context->inflight);
 
 		/*
 		 * Push the request back into the queue for later resubmission.
@@ -556,11 +556,11 @@ execlists_user_end(struct intel_engine_execlists *execlists)
 static inline void
 execlists_context_schedule_in(struct i915_request *rq)
 {
-	GEM_BUG_ON(rq->hw_context->active);
+	GEM_BUG_ON(rq->hw_context->inflight);
 
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 	intel_engine_context_in(rq->engine);
-	rq->hw_context->active = rq->engine;
+	rq->hw_context->inflight = rq->engine;
 }
 
 static void kick_siblings(struct i915_request *rq)
@@ -575,7 +575,7 @@ static void kick_siblings(struct i915_request *rq)
 static inline void
 execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 {
-	rq->hw_context->active = NULL;
+	rq->hw_context->inflight = NULL;
 	intel_engine_context_out(rq->engine);
 	execlists_context_status_change(rq, status);
 	trace_i915_request_out(rq);
@@ -816,7 +816,7 @@ static bool virtual_matches(const struct virtual_engine *ve,
 			    const struct i915_request *rq,
 			    const struct intel_engine_cs *engine)
 {
-	const struct intel_engine_cs *active;
+	const struct intel_engine_cs *inflight;
 
 	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
 		return false;
@@ -830,8 +830,8 @@ static bool virtual_matches(const struct virtual_engine *ve,
 	 * we reuse the register offsets). This is a very small
 	 * hystersis on the greedy seelction algorithm.
 	 */
-	active = READ_ONCE(ve->context.active);
-	if (active && active != engine)
+	inflight = READ_ONCE(ve->context.inflight);
+	if (inflight && inflight != engine)
 		return false;
 
 	return true;
@@ -1003,7 +1003,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				u32 *regs = ve->context.lrc_reg_state;
 				unsigned int n;
 
-				GEM_BUG_ON(READ_ONCE(ve->context.active));
+				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
 				virtual_update_register_offsets(regs, engine);
 
 				/*
@@ -1478,7 +1478,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 	 * had the chance to run yet; let it run before we teardown the
 	 * reference it may use.
 	 */
-	engine = READ_ONCE(ce->active);
+	engine = READ_ONCE(ce->inflight);
 	if (unlikely(engine)) {
 		unsigned long flags;
 
@@ -1486,7 +1486,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 		process_csb(engine);
 		spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
-		GEM_BUG_ON(READ_ONCE(ce->active));
+		GEM_BUG_ON(READ_ONCE(ce->inflight));
 	}
 
 	i915_gem_context_unpin_hw_id(ce->gem_context);
@@ -3087,7 +3087,7 @@ static void virtual_context_destroy(struct kref *kref)
 	unsigned int n;
 
 	GEM_BUG_ON(ve->request);
-	GEM_BUG_ON(ve->context.active);
+	GEM_BUG_ON(ve->context.inflight);
 
 	for (n = 0; n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 41/45] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (38 preceding siblings ...)
  2019-04-25  9:19 ` [PATCH 40/45] drm/i915: Rename intel_context.active to .inflight Chris Wilson
@ 2019-04-25  9:20 ` Chris Wilson
  2019-04-25  9:20 ` [PATCH 42/45] drm/i915: Stop retiring along engine Chris Wilson
                   ` (11 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:20 UTC (permalink / raw)
  To: intel-gfx

We need to keep the context image pinned in memory until after the GPU
has finished writing into it. Since it continues to write as we signal
the final breadcrumb, we need to keep it pinned until the request after
it is complete. Currently we know the order in which requests execute on
each engine, and so to remove that presumption we need to identify a
request/context-switch we know must occur after our completion. Any
request queued after the signal must imply a context switch, for
simplicity we use a fresh request from the kernel context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 24 ++----
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  1 -
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        | 17 ++++
 drivers/gpu/drm/i915/gt/intel_context.c       | 80 +++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_context.h       |  3 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |  6 +-
 drivers/gpu/drm/i915/gt/intel_engine.h        |  2 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 23 +-----
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 13 +--
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 62 ++------------
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 44 +---------
 drivers/gpu/drm/i915/gt/mock_engine.c         | 11 +--
 drivers/gpu/drm/i915/i915_active.c            | 81 ++++++++++++++++++-
 drivers/gpu/drm/i915/i915_active.h            |  5 ++
 drivers/gpu/drm/i915/i915_active_types.h      |  3 +
 drivers/gpu/drm/i915/i915_gem.c               |  4 -
 drivers/gpu/drm/i915/i915_request.c           | 15 ----
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 -
 19 files changed, 213 insertions(+), 184 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d4f13278d5b6..3d0a7af096f6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -676,17 +676,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for_each_engine(engine, dev_priv, id)
-		intel_engine_lost_context(engine);
-}
-
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -1174,10 +1163,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	if (ret)
 		goto out_add;
 
-	ret = gen8_emit_rpcs_config(rq, ce, sseu);
-	if (ret)
-		goto out_add;
-
 	/*
 	 * Guarantee context image and the timeline remains pinned until the
 	 * modifying request is retired by setting the ce activity tracker.
@@ -1185,9 +1170,12 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	 * But we only need to take one pin on the account of it. Or in other
 	 * words transfer the pinned ce object to tracked active request.
 	 */
-	if (!i915_active_request_isset(&ce->active_tracker))
-		__intel_context_pin(ce);
-	__i915_active_request_set(&ce->active_tracker, rq);
+	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+	ret = i915_active_ref(&ce->active, rq->fence.context, rq);
+	if (ret)
+		goto out_add;
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 out_add:
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 630392c77e48..9691dd062f72 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -134,7 +134,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 
 /* i915_gem_context.c */
 int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
 void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 7e3511773fc1..6e8f86de1ed9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -10,6 +10,22 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		struct i915_active_request *active =
+			container_of((struct list_head *)node,
+				     typeof(*active), link);
+
+		INIT_LIST_HEAD(&active->link);
+		RCU_INIT_POINTER(active->request, NULL);
+
+		active->retire(active, NULL);
+	}
+}
+
 static void i915_gem_park(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -33,6 +49,7 @@ static void i915_gem_park(struct drm_i915_private *i915)
 		}
 		tasklet_kill(&engine->execlists.tasklet);
 
+		call_idle_barriers(engine); /* cleanup after wedging */
 		i915_gem_batch_pool_fini(&engine->batch_pool);
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 7e2b18ddda19..0102f6bb62ec 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
 
 		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
 
-		intel_context_get(ce);
 		smp_mb__before_atomic(); /* flush pin before it is visible */
 	}
 
@@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
 		ce->ops->unpin(ce);
 
 		i915_gem_context_put(ce->gem_context);
-		intel_context_put(ce);
+		intel_context_inactive(ce);
 	}
 
 	mutex_unlock(&ce->pin_mutex);
 	intel_context_put(ce);
 }
 
-static void intel_context_retire(struct i915_active_request *active,
-				 struct i915_request *rq)
+static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
 {
-	struct intel_context *ce =
-		container_of(active, typeof(*ce), active_tracker);
+	int err;
 
-	intel_context_unpin(ce);
+	err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
+	if (err)
+		return err;
+
+	/*
+	 * And mark it as a globally pinned object to let the shrinker know
+	 * it cannot reclaim the object until we release it.
+	 */
+	vma->obj->pin_global++;
+	vma->obj->mm.dirty = true;
+
+	return 0;
+}
+
+static void __context_unpin_state(struct i915_vma *vma)
+{
+	vma->obj->pin_global--;
+	__i915_vma_unpin(vma);
+}
+
+static void intel_context_retire(struct i915_active *active)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+	if (ce->state)
+		__context_unpin_state(ce->state);
+
+	intel_context_put(ce);
 }
 
 void
@@ -124,8 +148,46 @@ intel_context_init(struct intel_context *ce,
 
 	mutex_init(&ce->pin_mutex);
 
-	i915_active_request_init(&ce->active_tracker,
-				 NULL, intel_context_retire);
+	i915_active_init(ctx->i915, &ce->active, intel_context_retire);
+}
+
+int intel_context_active(struct intel_context *ce, unsigned long flags)
+{
+	int err;
+
+	if (!i915_active_acquire(&ce->active))
+		return 0;
+
+	intel_context_get(ce);
+
+	if (!ce->state)
+		return 0;
+
+	err = __context_pin_state(ce->state, flags);
+	if (err) {
+		i915_active_cancel(&ce->active);
+		intel_context_put(ce);
+		return err;
+	}
+
+	/* Preallocate tracking nodes */
+	if (!i915_gem_context_is_kernel(ce->gem_context)) {
+		err = i915_active_acquire_preallocate_barrier(&ce->active,
+							      ce->engine);
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+void intel_context_inactive(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
 }
 
 static void i915_global_context_shrink(void)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 63392c88cd98..e71629f7c2e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce)
 		ce->ops->exit(ce);
 }
 
+int intel_context_active(struct intel_context *ce, unsigned long flags);
+void intel_context_inactive(struct intel_context *ce);
+
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
 	kref_get(&ce->ref);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 47f2970ab5b7..92401d25764b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -53,10 +53,10 @@ struct intel_context {
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
 
 	/**
-	 * active_tracker: Active tracker for the external rq activity
-	 * on this intel_context object.
+	 * active: Active tracker for the rq activity (inc. external) on this
+	 * intel_context object.
 	 */
-	struct i915_active_request active_tracker;
+	struct i915_active active;
 
 	const struct intel_context_ops *ops;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index f5b0f27cecb6..89463e18cdcc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -468,8 +468,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-void intel_engine_lost_context(struct intel_engine_cs *engine);
-
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5eef7d3bf7e3..62914e796248 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -542,6 +542,8 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
+	init_llist_head(&engine->barrier_tasks);
+
 	err = init_status_page(engine);
 	if (err)
 		return err;
@@ -797,6 +799,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->preempt_context)
 		intel_context_unpin(engine->preempt_context);
 	intel_context_unpin(engine->kernel_context);
+	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
 	i915_timeline_fini(&engine->timeline);
 
@@ -1122,26 +1125,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
-/**
- * intel_engine_lost_context: called when the GPU is reset into unknown state
- * @engine: the engine
- *
- * We have either reset the GPU or otherwise about to lose state tracking of
- * the current GPU logical state (e.g. suspend). On next use, it is therefore
- * imperative that we make no presumptions about the current state and load
- * from scratch.
- */
-void intel_engine_lost_context(struct intel_engine_cs *engine)
-{
-	struct intel_context *ce;
-
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-}
-
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 3976aea3c1d1..2abc2c810e28 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -71,6 +71,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
+
+	i915_request_add_barriers(rq);
 	__i915_request_commit(rq);
 
 	return false;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index ae73c6596d08..bcede4ddc5ba 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -11,6 +11,7 @@
 #include <linux/irq_work.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/types.h>
 
 #include "i915_gem.h"
@@ -286,6 +287,7 @@ struct intel_engine_cs {
 	struct intel_ring *buffer;
 
 	struct i915_timeline timeline;
+	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
 	struct intel_context *preempt_context; /* pinned; optional */
@@ -433,17 +435,6 @@ struct intel_engine_cs {
 
 	struct intel_engine_execlists execlists;
 
-	/* Contexts are pinned whilst they are active on the GPU. The last
-	 * context executed remains active whilst the GPU is idle - the
-	 * switch away and write to the context object only occurs on the
-	 * next execution.  Contexts are only unpinned on retirement of the
-	 * following request ensuring that we can always write to the object
-	 * on the context switch even after idling. Across suspend, we switch
-	 * to the kernel context and trash it as the save may not happen
-	 * before the hardware is powered down.
-	 */
-	struct intel_context *last_retired_context;
-
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b277c0750943..07c7b6a5a596 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1441,60 +1441,11 @@ static void execlists_context_destroy(struct kref *kref)
 	intel_context_free(ce);
 }
 
-static int __context_pin(struct i915_vma *vma)
-{
-	unsigned int flags;
-	int err;
-
-	flags = PIN_GLOBAL | PIN_HIGH;
-	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
-	err = i915_vma_pin(vma, 0, 0, flags);
-	if (err)
-		return err;
-
-	vma->obj->pin_global++;
-	vma->obj->mm.dirty = true;
-
-	return 0;
-}
-
-static void __context_unpin(struct i915_vma *vma)
-{
-	vma->obj->pin_global--;
-	__i915_vma_unpin(vma);
-}
-
 static void execlists_context_unpin(struct intel_context *ce)
 {
-	struct intel_engine_cs *engine;
-
-	/*
-	 * The tasklet may still be using a pointer to our state, via an
-	 * old request. However, since we know we only unpin the context
-	 * on retirement of the following request, we know that the last
-	 * request referencing us will have had a completion CS interrupt.
-	 * If we see that it is still active, it means that the tasklet hasn't
-	 * had the chance to run yet; let it run before we teardown the
-	 * reference it may use.
-	 */
-	engine = READ_ONCE(ce->inflight);
-	if (unlikely(engine)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&engine->timeline.lock, flags);
-		process_csb(engine);
-		spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-		GEM_BUG_ON(READ_ONCE(ce->inflight));
-	}
-
 	i915_gem_context_unpin_hw_id(ce->gem_context);
-
-	intel_ring_unpin(ce->ring);
-
 	i915_gem_object_unpin_map(ce->state->obj);
-	__context_unpin(ce->state);
+	intel_ring_unpin(ce->ring);
 }
 
 static void
@@ -1531,7 +1482,10 @@ __execlists_context_pin(struct intel_context *ce,
 		goto err;
 	GEM_BUG_ON(!ce->state);
 
-	ret = __context_pin(ce->state);
+	ret = intel_context_active(ce,
+				   engine->i915->ggtt.pin_bias |
+				   PIN_OFFSET_BIAS |
+				   PIN_HIGH);
 	if (ret)
 		goto err;
 
@@ -1540,7 +1494,7 @@ __execlists_context_pin(struct intel_context *ce,
 					I915_MAP_OVERRIDE);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto unpin_vma;
+		goto unpin_active;
 	}
 
 	ret = intel_ring_pin(ce->ring);
@@ -1561,8 +1515,8 @@ __execlists_context_pin(struct intel_context *ce,
 	intel_ring_unpin(ce->ring);
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
-unpin_vma:
-	__context_unpin(ce->state);
+unpin_active:
+	intel_context_inactive(ce);
 err:
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 62ad585531b8..b539463931c0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1363,45 +1363,9 @@ static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
 		gen6_ppgtt_unpin(ppgtt);
 }
 
-static int __context_pin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-	int err;
-
-	vma = ce->state;
-	if (!vma)
-		return 0;
-
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-	if (err)
-		return err;
-
-	/*
-	 * And mark is as a globally pinned object to let the shrinker know
-	 * it cannot reclaim the object until we release it.
-	 */
-	vma->obj->pin_global++;
-	vma->obj->mm.dirty = true;
-
-	return 0;
-}
-
-static void __context_unpin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-
-	vma = ce->state;
-	if (!vma)
-		return;
-
-	vma->obj->pin_global--;
-	i915_vma_unpin(vma);
-}
-
 static void ring_context_unpin(struct intel_context *ce)
 {
 	__context_unpin_ppgtt(ce->gem_context);
-	__context_unpin(ce);
 }
 
 static struct i915_vma *
@@ -1491,18 +1455,18 @@ static int ring_context_pin(struct intel_context *ce)
 		ce->state = vma;
 	}
 
-	err = __context_pin(ce);
+	err = intel_context_active(ce, PIN_HIGH);
 	if (err)
 		return err;
 
 	err = __context_pin_ppgtt(ce->gem_context);
 	if (err)
-		goto err_unpin;
+		goto err_active;
 
 	return 0;
 
-err_unpin:
-	__context_unpin(ce);
+err_active:
+	intel_context_inactive(ce);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 6d7562769eb2..b7675ef18523 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -146,12 +146,18 @@ static void mock_context_destroy(struct kref *ref)
 
 static int mock_context_pin(struct intel_context *ce)
 {
+	int ret;
+
 	if (!ce->ring) {
 		ce->ring = mock_ring(ce->engine);
 		if (!ce->ring)
 			return -ENOMEM;
 	}
 
+	ret = intel_context_active(ce, PIN_HIGH);
+	if (ret)
+		return ret;
+
 	mock_timeline_pin(ce->ring->timeline);
 	return 0;
 }
@@ -328,14 +334,9 @@ void mock_engine_free(struct intel_engine_cs *engine)
 {
 	struct mock_engine *mock =
 		container_of(engine, typeof(*mock), base);
-	struct intel_context *ce;
 
 	GEM_BUG_ON(timer_pending(&mock->hw_delay));
 
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-
 	intel_context_unpin(engine->kernel_context);
 
 	intel_engine_fini_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 863ae12707ba..563299e50901 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -100,7 +100,7 @@ active_instance(struct i915_active *ref, u64 idx)
 		parent = *p;
 
 		node = rb_entry(parent, struct active_node, node);
-		if (node->timeline == idx)
+		if (node->timeline == idx && !IS_ERR(node->base.request))
 			goto replace;
 
 		if (node->timeline < idx)
@@ -157,6 +157,7 @@ void i915_active_init(struct drm_i915_private *i915,
 	ref->retire = retire;
 	ref->tree = RB_ROOT;
 	i915_active_request_init(&ref->last, NULL, last_retire);
+	init_llist_head(&ref->barriers);
 	ref->count = 0;
 }
 
@@ -263,6 +264,84 @@ void i915_active_fini(struct i915_active *ref)
 }
 #endif
 
+int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
+					    struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	unsigned long tmp;
+	int err = 0;
+
+	i915_active_acquire(ref);
+	for_each_engine_masked(engine, i915, engine->mask, tmp) {
+		struct intel_context *kctx = engine->kernel_context;
+		struct active_node *node;
+
+		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
+		if (unlikely(!node)) {
+			err = -ENOMEM;
+			break;
+		}
+
+		i915_active_request_init(&node->base,
+					 (void *)engine, node_retire);
+		node->timeline = kctx->ring->timeline->fence_context;
+		node->ref = ref;
+		ref->count++;
+
+		llist_add((struct llist_node *)&node->base.link,
+			  &ref->barriers);
+	}
+	i915_active_release(ref);
+
+	return err;
+}
+
+void i915_active_acquire_barrier(struct i915_active *ref)
+{
+	struct llist_node *pos, *next;
+
+	i915_active_acquire(ref);
+
+	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
+		struct intel_engine_cs *engine;
+		struct active_node *node;
+		struct rb_node **p, *parent;
+
+		node = container_of((struct list_head *)pos,
+				    typeof(*node), base.link);
+
+		engine = (void *)rcu_access_pointer(node->base.request);
+		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
+
+		parent = NULL;
+		p = &ref->tree.rb_node;
+		while (*p) {
+			parent = *p;
+			if (rb_entry(parent,
+				     struct active_node,
+				     node)->timeline < node->timeline)
+				p = &parent->rb_right;
+			else
+				p = &parent->rb_left;
+		}
+		rb_link_node(&node->node, parent, p);
+		rb_insert_color(&node->node, &ref->tree);
+
+		llist_add((struct llist_node *)&node->base.link,
+			  &engine->barrier_tasks);
+	}
+	i915_active_release(ref);
+}
+
+void i915_request_add_barriers(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
+		list_add_tail((struct list_head *)node, &rq->active_list);
+}
+
 int i915_active_request_set(struct i915_active_request *active,
 			    struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 7d758719ce39..d55d37673944 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -406,4 +406,9 @@ void i915_active_fini(struct i915_active *ref);
 static inline void i915_active_fini(struct i915_active *ref) { }
 #endif
 
+int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
+					    struct intel_engine_cs *engine);
+void i915_active_acquire_barrier(struct i915_active *ref);
+void i915_request_add_barriers(struct i915_request *rq);
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index b679253b53a5..c025991b9233 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -7,6 +7,7 @@
 #ifndef _I915_ACTIVE_TYPES_H_
 #define _I915_ACTIVE_TYPES_H_
 
+#include <linux/llist.h>
 #include <linux/rbtree.h>
 #include <linux/rcupdate.h>
 
@@ -31,6 +32,8 @@ struct i915_active {
 	unsigned int count;
 
 	void (*retire)(struct i915_active *ref);
+
+	struct llist_head barriers;
 };
 
 #endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 90923fa6603d..ad944e12d8be 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1153,10 +1153,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(i915, wakeref);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	i915_gem_contexts_lost(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index dc5bce2c0504..d69ea896ef87 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -213,18 +213,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	spin_unlock(&rq->lock);
 
 	local_irq_enable();
-
-	/*
-	 * The backing object for the context is done after switching to the
-	 * *next* context. Therefore we cannot retire the previous context until
-	 * the next context has already started running. However, since we
-	 * cannot take the required locks at i915_request_submit() we
-	 * defer the unpinning of the active context to now, retirement of
-	 * the subsequent request.
-	 */
-	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context);
-	engine->last_retired_context = rq->hw_context;
 }
 
 static void __retire_engine_upto(struct intel_engine_cs *engine,
@@ -753,9 +741,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
-	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(ce);
-
 	intel_context_mark_active(ce);
 	return rq;
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index ca8d73e6414e..23c02f33f96b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -56,7 +56,6 @@ static void mock_device_release(struct drm_device *dev)
 
 	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	drain_delayed_work(&i915->gem.retire_work);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 42/45] drm/i915: Stop retiring along engine
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (39 preceding siblings ...)
  2019-04-25  9:20 ` [PATCH 41/45] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
@ 2019-04-25  9:20 ` Chris Wilson
  2019-04-25  9:20 ` [PATCH 43/45] drm/i915: Replace engine->timeline with a plain list Chris Wilson
                   ` (10 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:20 UTC (permalink / raw)
  To: intel-gfx

We no longer track the execution order along the engine and so no longer
need to enforce ordering of retire along the engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 128 +++++++++++-----------------
 1 file changed, 52 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d69ea896ef87..89e4bd80e0bc 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -183,72 +183,23 @@ static void free_capture_list(struct i915_request *request)
 	}
 }
 
-static void __retire_engine_request(struct intel_engine_cs *engine,
-				    struct i915_request *rq)
-{
-	GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n",
-		  __func__, engine->name,
-		  rq->fence.context, rq->fence.seqno,
-		  hwsp_seqno(rq));
-
-	GEM_BUG_ON(!i915_request_completed(rq));
-
-	local_irq_disable();
-
-	spin_lock(&engine->timeline.lock);
-	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
-	list_del_init(&rq->link);
-	spin_unlock(&engine->timeline.lock);
-
-	spin_lock(&rq->lock);
-	i915_request_mark_complete(rq);
-	if (!i915_request_signaled(rq))
-		dma_fence_signal_locked(&rq->fence);
-	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-		i915_request_cancel_breadcrumb(rq);
-	if (rq->waitboost) {
-		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
-		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
-	}
-	spin_unlock(&rq->lock);
-
-	local_irq_enable();
-}
-
-static void __retire_engine_upto(struct intel_engine_cs *engine,
-				 struct i915_request *rq)
-{
-	struct i915_request *tmp;
-
-	if (list_empty(&rq->link))
-		return;
-
-	do {
-		tmp = list_first_entry(&engine->timeline.requests,
-				       typeof(*tmp), link);
-
-		GEM_BUG_ON(tmp->engine != engine);
-		__retire_engine_request(engine, tmp);
-	} while (tmp != rq);
-}
-
-static void i915_request_retire(struct i915_request *request)
+static bool i915_request_retire(struct i915_request *rq)
 {
 	struct i915_active_request *active, *next;
 
-	GEM_TRACE("%s fence %llx:%lld, current %d\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno,
-		  hwsp_seqno(request));
+	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	if (!i915_request_completed(rq))
+		return false;
 
-	lockdep_assert_held(&request->i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
-	GEM_BUG_ON(!i915_request_completed(request));
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
+		  rq->engine->name,
+		  rq->fence.context, rq->fence.seqno,
+		  hwsp_seqno(rq));
 
-	trace_i915_request_retire(request);
+	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+	trace_i915_request_retire(rq);
 
-	advance_ring(request);
-	free_capture_list(request);
+	advance_ring(rq);
 
 	/*
 	 * Walk through the active list, calling retire on each. This allows
@@ -260,7 +211,7 @@ static void i915_request_retire(struct i915_request *request)
 	 * pass along the auxiliary information (to avoid dereferencing
 	 * the node after the callback).
 	 */
-	list_for_each_entry_safe(active, next, &request->active_list, link) {
+	list_for_each_entry_safe(active, next, &rq->active_list, link) {
 		/*
 		 * In microbenchmarks or focusing upon time inside the kernel,
 		 * we may spend an inordinate amount of time simply handling
@@ -276,18 +227,39 @@ static void i915_request_retire(struct i915_request *request)
 		INIT_LIST_HEAD(&active->link);
 		RCU_INIT_POINTER(active->request, NULL);
 
-		active->retire(active, request);
+		active->retire(active, rq);
+	}
+
+	local_irq_disable();
+
+	spin_lock(&rq->engine->timeline.lock);
+	list_del(&rq->link);
+	spin_unlock(&rq->engine->timeline.lock);
+
+	spin_lock(&rq->lock);
+	i915_request_mark_complete(rq);
+	if (!i915_request_signaled(rq))
+		dma_fence_signal_locked(&rq->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
+		i915_request_cancel_breadcrumb(rq);
+	if (rq->waitboost) {
+		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
+		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
 	}
+	spin_unlock(&rq->lock);
+
+	local_irq_enable();
 
-	i915_request_remove_from_client(request);
+	intel_context_exit(rq->hw_context);
+	intel_context_unpin(rq->hw_context);
 
-	__retire_engine_upto(request->engine, request);
+	i915_request_remove_from_client(rq);
 
-	intel_context_exit(request->hw_context);
-	intel_context_unpin(request->hw_context);
+	free_capture_list(rq);
+	i915_sched_node_fini(&rq->sched);
+	i915_request_put(rq);
 
-	i915_sched_node_fini(&request->sched);
-	i915_request_put(request);
+	return true;
 }
 
 void i915_request_retire_upto(struct i915_request *rq)
@@ -309,9 +281,7 @@ void i915_request_retire_upto(struct i915_request *rq)
 	do {
 		tmp = list_first_entry(&ring->request_list,
 				       typeof(*tmp), ring_link);
-
-		i915_request_retire(tmp);
-	} while (tmp != rq);
+	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
 static void irq_execute_cb(struct irq_work *wrk)
@@ -594,12 +564,9 @@ static void ring_retire_requests(struct intel_ring *ring)
 {
 	struct i915_request *rq, *rn;
 
-	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) {
-		if (!i915_request_completed(rq))
+	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+		if (!i915_request_retire(rq))
 			break;
-
-		i915_request_retire(rq);
-	}
 }
 
 static noinline struct i915_request *
@@ -614,6 +581,15 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 	if (!gfpflags_allow_blocking(gfp))
 		goto out;
 
+	/* Move our oldest request to the slab-cache (if not in use!) */
+	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+	i915_request_retire(rq);
+
+	rq = kmem_cache_alloc(global.slab_requests,
+			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+	if (rq)
+		return rq;
+
 	/* Ratelimit ourselves to prevent oom from malicious clients */
 	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
 	cond_synchronize_rcu(rq->rcustate);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 43/45] drm/i915: Replace engine->timeline with a plain list
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (40 preceding siblings ...)
  2019-04-25  9:20 ` [PATCH 42/45] drm/i915: Stop retiring along engine Chris Wilson
@ 2019-04-25  9:20 ` Chris Wilson
  2019-04-25  9:20 ` [PATCH 44/45] drm/i915/execlists: Preempt-to-busy Chris Wilson
                   ` (9 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:20 UTC (permalink / raw)
  To: intel-gfx

To continue the onslaught of removing the assumption of a global
execution ordering, another casualty is the engine->timeline. Without an
actual timeline to track, it is overkill and we can replace it with a
much less grand plain list. We still need a list of requests inflight,
for the simple purpose of finding inflight requests (for retiring,
resetting, preemption etc).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  6 ++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 62 +++++++-------
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  6 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 84 +++++++++----------
 drivers/gpu/drm/i915/gt/intel_reset.c         | 10 +--
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 15 ++--
 drivers/gpu/drm/i915/gt/mock_engine.c         | 18 ++--
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |  4 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |  5 +-
 drivers/gpu/drm/i915/i915_request.c           | 43 +++-------
 drivers/gpu/drm/i915/i915_request.h           |  2 +-
 drivers/gpu/drm/i915/i915_scheduler.c         | 37 ++++----
 drivers/gpu/drm/i915/i915_timeline.c          |  1 -
 drivers/gpu/drm/i915/i915_timeline.h          | 19 -----
 drivers/gpu/drm/i915/i915_timeline_types.h    |  4 -
 drivers/gpu/drm/i915/intel_guc_submission.c   | 16 ++--
 .../gpu/drm/i915/selftests/mock_timeline.c    |  1 -
 17 files changed, 145 insertions(+), 188 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 89463e18cdcc..1654af266af5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -577,4 +577,10 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
 	return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
 }
 
+void intel_engine_init_active(struct intel_engine_cs *engine,
+			      unsigned int subclass);
+#define ENGINE_PHYSICAL	0
+#define ENGINE_MOCK	1
+#define ENGINE_VIRTUAL	2
+
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 62914e796248..12110e62b3fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -548,14 +548,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	if (err)
 		return err;
 
-	err = i915_timeline_init(engine->i915,
-				 &engine->timeline,
-				 engine->status_page.vma);
-	if (err)
-		goto err_hwsp;
-
-	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
-
+	intel_engine_init_active(engine, ENGINE_PHYSICAL);
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlists(engine);
 	intel_engine_init_hangcheck(engine);
@@ -568,10 +561,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
 
 	return 0;
-
-err_hwsp:
-	cleanup_status_page(engine);
-	return err;
 }
 
 /**
@@ -724,6 +713,27 @@ static int pin_context(struct i915_gem_context *ctx,
 	return 0;
 }
 
+void
+intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
+{
+	INIT_LIST_HEAD(&engine->active.requests);
+
+	spin_lock_init(&engine->active.lock);
+	lockdep_set_subclass(&engine->active.lock, subclass);
+
+	/*
+	 * Due to an interesting quirk in lockdep's internal debug tracking,
+	 * after setting a subclass we must ensure the lock is used. Otherwise,
+	 * nr_unused_locks is incremented once too often.
+	 */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	local_irq_disable();
+	lock_map_acquire(&engine->active.lock.dep_map);
+	lock_map_release(&engine->active.lock.dep_map);
+	local_irq_enable();
+#endif
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -787,6 +797,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
+	GEM_BUG_ON(!list_empty(&engine->active.requests));
+
 	cleanup_status_page(engine);
 
 	intel_engine_fini_breadcrumbs(engine);
@@ -801,8 +813,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	intel_context_unpin(engine->kernel_context);
 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
-	i915_timeline_fini(&engine->timeline);
-
 	intel_wa_list_free(&engine->ctx_wa_list);
 	intel_wa_list_free(&engine->wa_list);
 	intel_wa_list_free(&engine->whitelist);
@@ -1402,16 +1412,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
-	rq = list_first_entry(&engine->timeline.requests,
-			      struct i915_request, link);
-	if (&rq->link != &engine->timeline.requests)
-		print_request(m, rq, "\t\tfirst  ");
-
-	rq = list_last_entry(&engine->timeline.requests,
-			     struct i915_request, link);
-	if (&rq->link != &engine->timeline.requests)
-		print_request(m, rq, "\t\tlast   ");
-
 	rq = intel_engine_find_active_request(engine);
 	if (rq) {
 		print_request(m, rq, "\t\tactive ");
@@ -1492,7 +1492,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	if (!intel_engine_supports_stats(engine))
 		return -ENODEV;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	write_seqlock(&engine->stats.lock);
 
 	if (unlikely(engine->stats.enabled == ~0)) {
@@ -1518,7 +1518,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 unlock:
 	write_sequnlock(&engine->stats.lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	return err;
 }
@@ -1603,22 +1603,22 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
 	 * At all other times, we must assume the GPU is still running, but
 	 * we only care about the snapshot of this moment.
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	list_for_each_entry(request, &engine->timeline.requests, link) {
+	spin_lock_irqsave(&engine->active.lock, flags);
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (i915_request_completed(request))
 			continue;
 
 		if (!i915_request_started(request))
-			break;
+			continue;
 
 		/* More than one preemptible request may match! */
 		if (!match_ring(request))
-			break;
+			continue;
 
 		active = request;
 		break;
 	}
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	return active;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index bcede4ddc5ba..f2aecc4450fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -286,7 +286,11 @@ struct intel_engine_cs {
 
 	struct intel_ring *buffer;
 
-	struct i915_timeline timeline;
+	struct {
+		spinlock_t lock;
+		struct list_head requests;
+	} active;
+
 	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 07c7b6a5a596..48086602a0db 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -310,8 +310,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	 * Check against the first request in ELSP[1], it will, thanks to the
 	 * power of PI, be the highest priority of that context.
 	 */
-	if (!list_is_last(&rq->link, &engine->timeline.requests) &&
-	    rq_prio(list_next_entry(rq, link)) > last_prio)
+	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
+	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 		return true;
 
 	if (rb) {
@@ -446,11 +446,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	struct list_head *uninitialized_var(pl);
 	int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	list_for_each_entry_safe_reverse(rq, rn,
-					 &engine->timeline.requests,
-					 link) {
+					 &engine->active.requests,
+					 sched.link) {
 		struct intel_engine_cs *owner;
 
 		if (i915_request_completed(rq))
@@ -477,9 +477,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			}
 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-			list_add(&rq->sched.link, pl);
+			list_move(&rq->sched.link, pl);
 			active = rq;
 		} else {
+			list_del_init(&rq->sched.link);
 			if (__i915_request_has_started(rq))
 				rq->sched.attr.priority |= ACTIVE_PRIORITY;
 
@@ -955,11 +956,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 		struct i915_request *rq;
 
-		spin_lock(&ve->base.timeline.lock);
+		spin_lock(&ve->base.active.lock);
 
 		rq = ve->request;
 		if (unlikely(!rq)) { /* lost the race to a sibling */
-			spin_unlock(&ve->base.timeline.lock);
+			spin_unlock(&ve->base.active.lock);
 			rb_erase_cached(rb, &execlists->virtual);
 			RB_CLEAR_NODE(rb);
 			rb = rb_first_cached(&execlists->virtual);
@@ -972,13 +973,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 		if (rq_prio(rq) >= queue_prio(execlists)) {
 			if (!virtual_matches(ve, rq, engine)) {
-				spin_unlock(&ve->base.timeline.lock);
+				spin_unlock(&ve->base.active.lock);
 				rb = rb_next(rb);
 				continue;
 			}
 
 			if (last && !can_merge_rq(last, rq)) {
-				spin_unlock(&ve->base.timeline.lock);
+				spin_unlock(&ve->base.active.lock);
 				return; /* leave this rq for another engine */
 			}
 
@@ -1030,7 +1031,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			last = rq;
 		}
 
-		spin_unlock(&ve->base.timeline.lock);
+		spin_unlock(&ve->base.active.lock);
 		break;
 	}
 
@@ -1087,8 +1088,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(port_isset(port));
 			}
 
-			list_del_init(&rq->sched.link);
-
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
 
@@ -1189,7 +1188,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -1349,7 +1348,7 @@ static void process_csb(struct intel_engine_cs *engine)
 
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	process_csb(engine);
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
@@ -1370,9 +1369,9 @@ static void execlists_submission_tasklet(unsigned long data)
 		  !!engine->i915->gt.awake,
 		  engine->execlists.active);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	__execlists_submission_tasklet(engine);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -1409,7 +1408,7 @@ static void execlists_submit_request(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	queue_request(engine, &request->sched, rq_prio(request));
 
@@ -1418,7 +1417,7 @@ static void execlists_submit_request(struct i915_request *request)
 
 	submit_queue(engine, rq_prio(request));
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void __execlists_context_fini(struct intel_context *ce)
@@ -2069,8 +2068,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	intel_engine_stop_cs(engine);
 
 	/* And flush any current direct submission. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static bool lrc_regs_ok(const struct i915_request *rq)
@@ -2113,11 +2112,11 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
 
 static struct i915_request *active_request(struct i915_request *rq)
 {
-	const struct list_head * const list = &rq->engine->timeline.requests;
+	const struct list_head * const list = &rq->engine->active.requests;
 	const struct intel_context * const context = rq->hw_context;
 	struct i915_request *active = NULL;
 
-	list_for_each_entry_from_reverse(rq, list, link) {
+	list_for_each_entry_from_reverse(rq, list, sched.link) {
 		if (i915_request_completed(rq))
 			break;
 
@@ -2234,11 +2233,11 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 
 	GEM_TRACE("%s\n", engine->name);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__execlists_reset(engine, stalled);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void nop_submission_tasklet(unsigned long data)
@@ -2269,12 +2268,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	 * submission's irq state, we also wish to remind ourselves that
 	 * it is irq state.)
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__execlists_reset(engine, true);
 
 	/* Mark all executing requests as skipped. */
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
 
@@ -2305,7 +2304,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		rb_erase_cached(rb, &execlists->virtual);
 		RB_CLEAR_NODE(rb);
 
-		spin_lock(&ve->base.timeline.lock);
+		spin_lock(&ve->base.active.lock);
 		if (ve->request) {
 			ve->request->engine = engine;
 			__i915_request_submit(ve->request);
@@ -2314,7 +2313,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 			ve->base.execlists.queue_priority_hint = INT_MIN;
 			ve->request = NULL;
 		}
-		spin_unlock(&ve->base.timeline.lock);
+		spin_unlock(&ve->base.active.lock);
 	}
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
@@ -2326,7 +2325,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
 	execlists->tasklet.func = nop_submission_tasklet;
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
@@ -3050,12 +3049,12 @@ static void virtual_context_destroy(struct kref *kref)
 		if (RB_EMPTY_NODE(node))
 			continue;
 
-		spin_lock_irq(&sibling->timeline.lock);
+		spin_lock_irq(&sibling->active.lock);
 
 		if (!RB_EMPTY_NODE(node))
 			rb_erase_cached(node, &sibling->execlists.virtual);
 
-		spin_unlock_irq(&sibling->timeline.lock);
+		spin_unlock_irq(&sibling->active.lock);
 	}
 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
 
@@ -3063,8 +3062,6 @@ static void virtual_context_destroy(struct kref *kref)
 		__execlists_context_fini(&ve->context);
 
 	kfree(ve->bonds);
-
-	i915_timeline_fini(&ve->base.timeline);
 	kfree(ve);
 }
 
@@ -3183,16 +3180,16 @@ static void virtual_submission_tasklet(unsigned long data)
 
 		if (unlikely(!(mask & sibling->mask))) {
 			if (!RB_EMPTY_NODE(&node->rb)) {
-				spin_lock(&sibling->timeline.lock);
+				spin_lock(&sibling->active.lock);
 				rb_erase_cached(&node->rb,
 						&sibling->execlists.virtual);
 				RB_CLEAR_NODE(&node->rb);
-				spin_unlock(&sibling->timeline.lock);
+				spin_unlock(&sibling->active.lock);
 			}
 			continue;
 		}
 
-		spin_lock(&sibling->timeline.lock);
+		spin_lock(&sibling->active.lock);
 
 		if (!RB_EMPTY_NODE(&node->rb)) {
 			/*
@@ -3236,7 +3233,7 @@ static void virtual_submission_tasklet(unsigned long data)
 			tasklet_hi_schedule(&sibling->execlists.tasklet);
 		}
 
-		spin_unlock(&sibling->timeline.lock);
+		spin_unlock(&sibling->active.lock);
 	}
 	local_irq_enable();
 }
@@ -3319,10 +3316,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 
 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
 
-	err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
-	if (err)
-		goto err_put;
-	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
+	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
 
 	intel_engine_init_execlists(&ve->base);
 
@@ -3487,11 +3481,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 	unsigned int count;
 	struct rb_node *rb;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	last = NULL;
 	count = 0;
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (count++ < max - 1)
 			show_request(m, rq, "\t\tE ");
 		else
@@ -3554,7 +3548,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 		show_request(m, last, "\t\tV ");
 	}
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 void intel_lr_context_reset(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 59baa20a3299..a46283b0aa6c 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -47,12 +47,12 @@ static void engine_skip_context(struct i915_request *rq)
 	struct intel_engine_cs *engine = rq->engine;
 	struct i915_gem_context *hung_ctx = rq->gem_context;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (!i915_request_is_active(rq))
 		return;
 
-	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
+	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
 		if (rq->gem_context == hung_ctx)
 			i915_request_skip(rq, -EIO);
 }
@@ -128,7 +128,7 @@ void i915_reset_request(struct i915_request *rq, bool guilty)
 		  rq->fence.seqno,
 		  yesno(guilty));
 
-	lockdep_assert_held(&rq->engine->timeline.lock);
+	lockdep_assert_held(&rq->engine->active.lock);
 	GEM_BUG_ON(i915_request_completed(rq));
 
 	if (guilty) {
@@ -783,10 +783,10 @@ static void nop_submit_request(struct i915_request *request)
 		  engine->name, request->fence.context, request->fence.seqno);
 	dma_fence_set_error(&request->fence, -EIO);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	__i915_request_submit(request);
 	i915_request_mark_complete(request);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	intel_engine_queue_breadcrumbs(engine);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index b539463931c0..f53275bd577e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -750,14 +750,13 @@ static void reset_prepare(struct intel_engine_cs *engine)
 
 static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-	struct i915_timeline *tl = &engine->timeline;
 	struct i915_request *pos, *rq;
 	unsigned long flags;
 	u32 head;
 
 	rq = NULL;
-	spin_lock_irqsave(&tl->lock, flags);
-	list_for_each_entry(pos, &tl->requests, link) {
+	spin_lock_irqsave(&engine->active.lock, flags);
+	list_for_each_entry(pos, &engine->active.requests, sched.link) {
 		if (!i915_request_completed(pos)) {
 			rq = pos;
 			break;
@@ -811,7 +810,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 	}
 	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
 
-	spin_unlock_irqrestore(&tl->lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void reset_finish(struct intel_engine_cs *engine)
@@ -883,10 +882,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
 	struct i915_request *request;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->timeline.requests, link) {
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(request))
 			dma_fence_set_error(&request->fence, -EIO);
 
@@ -895,7 +894,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void i9xx_submit_request(struct i915_request *request)
@@ -1281,8 +1280,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 
 	GEM_BUG_ON(!is_power_of_2(size));
 	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
-	GEM_BUG_ON(timeline == &engine->timeline);
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 	if (!ring)
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index b7675ef18523..00c666d3e652 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -229,17 +229,17 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
 	struct i915_request *request;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->timeline.requests, sched.link) {
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(request))
 			dma_fence_set_error(&request->fence, -EIO);
 
 		i915_request_mark_complete(request);
 	}
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -285,28 +285,23 @@ int mock_engine_init(struct intel_engine_cs *engine)
 	struct drm_i915_private *i915 = engine->i915;
 	int err;
 
+	intel_engine_init_active(engine, ENGINE_MOCK);
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlists(engine);
 	intel_engine_init__pm(engine);
 
-	if (i915_timeline_init(i915, &engine->timeline, NULL))
-		goto err_breadcrumbs;
-	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
-
 	engine->kernel_context =
 		i915_gem_context_get_engine(i915->kernel_context, engine->id);
 	if (IS_ERR(engine->kernel_context))
-		goto err_timeline;
+		goto err_breadcrumbs;
 
 	err = intel_context_pin(engine->kernel_context);
 	intel_context_put(engine->kernel_context);
 	if (err)
-		goto err_timeline;
+		goto err_breadcrumbs;
 
 	return 0;
 
-err_timeline:
-	i915_timeline_fini(&engine->timeline);
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(engine);
 	return -ENOMEM;
@@ -340,7 +335,6 @@ void mock_engine_free(struct intel_engine_cs *engine)
 	intel_context_unpin(engine->kernel_context);
 
 	intel_engine_fini_breadcrumbs(engine);
-	i915_timeline_fini(&engine->timeline);
 
 	kfree(engine);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 9143c3eda2ec..9edb6662b903 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -642,13 +642,15 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	GEM_BUG_ON(i915_request_completed(rq));
 
 	i915_sw_fence_init(&rq->submit, dummy_notify);
-	i915_sw_fence_commit(&rq->submit);
+	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 
 	return rq;
 }
 
 static void dummy_request_free(struct i915_request *dummy)
 {
+	i915_sw_fence_commit(&dummy->submit);
+
 	i915_request_mark_complete(dummy);
 	i915_sched_node_fini(&dummy->sched);
 	i915_sw_fence_fini(&dummy->submit);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f919cf1a8c41..a3a0e1537a0a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1266,7 +1266,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline.requests, link)
+	list_for_each_entry_from(request, &engine->active.requests, sched.link)
 		count++;
 	if (!count)
 		return;
@@ -1279,7 +1279,8 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline.requests, link) {
+	list_for_each_entry_from(request,
+				 &engine->active.requests, sched.link) {
 		if (count >= ee->num_requests) {
 			/*
 			 * If the ring request list was changed in
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 89e4bd80e0bc..54f5cf7d857b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -232,9 +232,9 @@ static bool i915_request_retire(struct i915_request *rq)
 
 	local_irq_disable();
 
-	spin_lock(&rq->engine->timeline.lock);
-	list_del(&rq->link);
-	spin_unlock(&rq->engine->timeline.lock);
+	spin_lock(&rq->engine->active.lock);
+	list_del(&rq->sched.link);
+	spin_unlock(&rq->engine->active.lock);
 
 	spin_lock(&rq->lock);
 	i915_request_mark_complete(rq);
@@ -254,6 +254,7 @@ static bool i915_request_retire(struct i915_request *rq)
 	intel_context_unpin(rq->hw_context);
 
 	i915_request_remove_from_client(rq);
+	list_del(&rq->link);
 
 	free_capture_list(rq);
 	i915_sched_node_fini(&rq->sched);
@@ -373,28 +374,17 @@ __i915_request_await_execution(struct i915_request *rq,
 	return 0;
 }
 
-static void move_to_timeline(struct i915_request *request,
-			     struct i915_timeline *timeline)
-{
-	GEM_BUG_ON(request->timeline == &request->engine->timeline);
-	lockdep_assert_held(&request->engine->timeline.lock);
-
-	spin_lock(&request->timeline->lock);
-	list_move_tail(&request->link, &timeline->requests);
-	spin_unlock(&request->timeline->lock);
-}
-
 void __i915_request_submit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld -> current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
@@ -402,6 +392,8 @@ void __i915_request_submit(struct i915_request *request)
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
+	list_move_tail(&request->sched.link, &engine->active.requests);
+
 	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 
@@ -417,9 +409,6 @@ void __i915_request_submit(struct i915_request *request)
 	engine->emit_fini_breadcrumb(request,
 				     request->ring->vaddr + request->postfix);
 
-	/* Transfer from per-context onto the global per-engine timeline */
-	move_to_timeline(request, &engine->timeline);
-
 	engine->serial++;
 
 	trace_i915_request_execute(request);
@@ -431,11 +420,11 @@ void i915_request_submit(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__i915_request_submit(request);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 void __i915_request_unsubmit(struct i915_request *request)
@@ -448,7 +437,7 @@ void __i915_request_unsubmit(struct i915_request *request)
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	/*
 	 * Only unwind in reverse order, required so that the per-context list
@@ -475,9 +464,6 @@ void __i915_request_unsubmit(struct i915_request *request)
 
 	spin_unlock(&request->lock);
 
-	/* Transfer back from the global per-engine timeline to per-context */
-	move_to_timeline(request, request->timeline);
-
 	/*
 	 * We don't need to wake_up any waiters on request->execute, they
 	 * will get woken by any other event or us re-adding this request
@@ -493,11 +479,11 @@ void i915_request_unsubmit(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__i915_request_unsubmit(request);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static int __i915_sw_fence_call
@@ -663,7 +649,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->engine = ce->engine;
 	rq->ring = ce->ring;
 	rq->timeline = tl;
-	GEM_BUG_ON(rq->timeline == &ce->engine->timeline);
 	rq->hwsp_seqno = tl->hwsp_seqno;
 	rq->hwsp_cacheline = tl->hwsp_cacheline;
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
@@ -1087,9 +1072,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 							 0);
 	}
 
-	spin_lock_irq(&timeline->lock);
 	list_add_tail(&rq->link, &timeline->requests);
-	spin_unlock_irq(&timeline->lock);
 
 	/*
 	 * Make sure that no request gazumped us - if it was allocated after
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c9f7d07991c8..edbbdfec24ab 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -217,7 +217,7 @@ struct i915_request {
 
 	bool waitboost;
 
-	/** engine->request_list entry for this request */
+	/** timeline->request entry for this request */
 	struct list_head link;
 
 	/** ring->request_list entry for this request */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index b58d9c23a876..946b5eb443bf 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -106,8 +106,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 {
 	struct i915_dependency *dep, *tmp;
 
-	GEM_BUG_ON(!list_empty(&node->link));
-
 	spin_lock_irq(&schedule_lock);
 
 	/*
@@ -180,7 +178,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 	bool first = true;
 	int idx, i;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 	assert_priolists(execlists);
 
 	/* buckets sorted from highest [in slot 0] to lowest priority */
@@ -260,9 +258,9 @@ sched_lock_engine(const struct i915_sched_node *node,
 	 * check that the rq still belongs to the newly locked engine.
 	 */
 	while (locked != (engine = READ_ONCE(rq->engine))) {
-		spin_unlock(&locked->timeline.lock);
+		spin_unlock(&locked->active.lock);
 		memset(cache, 0, sizeof(*cache));
-		spin_lock(&engine->timeline.lock);
+		spin_lock(&engine->active.lock);
 		locked = engine;
 	}
 
@@ -365,7 +363,7 @@ static void __i915_schedule(struct i915_request *rq,
 
 	memset(&cache, 0, sizeof(cache));
 	engine = rq->engine;
-	spin_lock(&engine->timeline.lock);
+	spin_lock(&engine->active.lock);
 
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
@@ -374,7 +372,7 @@ static void __i915_schedule(struct i915_request *rq,
 		INIT_LIST_HEAD(&dep->dfs_link);
 
 		engine = sched_lock_engine(node, engine, &cache);
-		lockdep_assert_held(&engine->timeline.lock);
+		lockdep_assert_held(&engine->active.lock);
 
 		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority || node_signaled(node))
@@ -383,14 +381,8 @@ static void __i915_schedule(struct i915_request *rq,
 		GEM_BUG_ON(node_to_request(node)->engine != engine);
 
 		node->attr.priority = prio;
-		if (!list_empty(&node->link)) {
-			GEM_BUG_ON(intel_engine_is_virtual(engine));
-			if (!cache.priolist)
-				cache.priolist =
-					i915_sched_lookup_priolist(engine,
-								   prio);
-			list_move_tail(&node->link, cache.priolist);
-		} else {
+
+		if (!i915_sw_fence_done(&node_to_request(node)->submit)) {
 			/*
 			 * If the request is not in the priolist queue because
 			 * it is not yet runnable, then it doesn't contribute
@@ -399,8 +391,17 @@ static void __i915_schedule(struct i915_request *rq,
 			 * queue; but in that case we may still need to reorder
 			 * the inflight requests.
 			 */
-			if (!i915_sw_fence_done(&node_to_request(node)->submit))
-				continue;
+			continue;
+		}
+
+		if (!list_empty(&node->link) &&
+		    !i915_request_is_active(node_to_request(node))) {
+			GEM_BUG_ON(intel_engine_is_virtual(engine));
+			if (!cache.priolist)
+				cache.priolist =
+					i915_sched_lookup_priolist(engine,
+								   prio);
+			list_move_tail(&node->link, cache.priolist);
 		}
 
 		if (prio <= engine->execlists.queue_priority_hint)
@@ -419,7 +420,7 @@ static void __i915_schedule(struct i915_request *rq,
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
-	spin_unlock(&engine->timeline.lock);
+	spin_unlock(&engine->active.lock);
 }
 
 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 000e1a9b6750..c311ce9c6f9d 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -251,7 +251,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 
 	timeline->fence_context = dma_fence_context_alloc(1);
 
-	spin_lock_init(&timeline->lock);
 	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->last_request);
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 27668a1a69a3..36e5e5a65155 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -36,25 +36,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 		       struct i915_vma *hwsp);
 void i915_timeline_fini(struct i915_timeline *tl);
 
-static inline void
-i915_timeline_set_subclass(struct i915_timeline *timeline,
-			   unsigned int subclass)
-{
-	lockdep_set_subclass(&timeline->lock, subclass);
-
-	/*
-	 * Due to an interesting quirk in lockdep's internal debug tracking,
-	 * after setting a subclass we must ensure the lock is used. Otherwise,
-	 * nr_unused_locks is incremented once too often.
-	 */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	local_irq_disable();
-	lock_map_acquire(&timeline->lock.dep_map);
-	lock_map_release(&timeline->lock.dep_map);
-	local_irq_enable();
-#endif
-}
-
 struct i915_timeline *
 i915_timeline_create(struct drm_i915_private *i915,
 		     struct i915_vma *global_hwsp);
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
index 1688705f4a2b..fce5cb4f1090 100644
--- a/drivers/gpu/drm/i915/i915_timeline_types.h
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -23,10 +23,6 @@ struct i915_timeline {
 	u64 fence_context;
 	u32 seqno;
 
-	spinlock_t lock;
-#define TIMELINE_CLIENT 0 /* default subclass */
-#define TIMELINE_ENGINE 1
-#define TIMELINE_VIRTUAL 2
 	struct mutex mutex; /* protects the flow of requests */
 
 	unsigned int pin_count;
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 16226b2ab608..5ad111066cd5 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -740,7 +740,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 	bool submit = false;
 	struct rb_node *rb;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (port_isset(port)) {
 		if (intel_engine_has_preemption(engine)) {
@@ -821,7 +821,7 @@ static void guc_submission_tasklet(unsigned long data)
 	struct i915_request *rq;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
@@ -846,7 +846,7 @@ static void guc_submission_tasklet(unsigned long data)
 	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 		guc_dequeue(engine);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_reset_prepare(struct intel_engine_cs *engine)
@@ -883,7 +883,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 	struct i915_request *rq;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	execlists_cancel_port_requests(execlists);
 
@@ -899,7 +899,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 	intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
 
 out_unlock:
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_cancel_requests(struct intel_engine_cs *engine)
@@ -925,13 +925,13 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 	 * submission's irq state, we also wish to remind ourselves that
 	 * it is irq state.)
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
 	execlists_cancel_port_requests(execlists);
 
 	/* Mark all executing requests as skipped. */
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
 
@@ -960,7 +960,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 	execlists->queue = RB_ROOT_CACHED;
 	GEM_BUG_ON(port_isset(execlists->port));
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_reset_finish(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index e084476469ef..65b52be23d42 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -13,7 +13,6 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 	timeline->i915 = NULL;
 	timeline->fence_context = context;
 
-	spin_lock_init(&timeline->lock);
 	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->last_request);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 44/45] drm/i915/execlists: Preempt-to-busy
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (41 preceding siblings ...)
  2019-04-25  9:20 ` [PATCH 43/45] drm/i915: Replace engine->timeline with a plain list Chris Wilson
@ 2019-04-25  9:20 ` Chris Wilson
  2019-04-25  9:20 ` [PATCH 45/45] drm/i915/execlists: Minimalistic timeslicing Chris Wilson
                   ` (8 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:20 UTC (permalink / raw)
  To: intel-gfx

When using a global seqno, we required a precise stop-the-workd event to
handle preemption and unwind the global seqno counter. To accomplish
this, we would preempt to a special out-of-band context and wait for the
machine to report that it was idle. Given an idle machine, we could very
precisely see which requests had completed and which we needed to feed
back into the run queue.

However, now that we have scrapped the global seqno, we no longer need
to precisely unwind the global counter and only track requests by their
per-context seqno. This allows us to loosely unwind inflight requests
while scheduling a preemption, with the enormous caveat that the
requests we put back on the run queue are still _inflight_ (until the
preemption request is complete). This makes request tracking much more
messy, as at any point then we can see a completed request that we
believe is not currently scheduled for execution. We also have to be
careful not to rewind RING_TAIL past RING_HEAD on preempting to the
running context, and for this we use a semaphore to prevent completion
of the request before continuing.

To accomplish this feat, we change how we track requests scheduled to
the HW. Instead of appending our requests onto a single list as we
submit, we track each submission to ELSP as its own block. Then upon
receiving the CS preemption event, we promote the pending block to the
inflight block (discarding what was previously being tracked). As normal
CS completion events arrive, we then remove stale entries from the
inflight tracker.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |   5 +
 drivers/gpu/drm/i915/gt/intel_engine.h        |  61 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  61 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  52 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 682 ++++++++----------
 drivers/gpu/drm/i915/i915_gpu_error.c         |  19 +-
 drivers/gpu/drm/i915/i915_request.c           |   6 +
 drivers/gpu/drm/i915/i915_request.h           |   1 +
 drivers/gpu/drm/i915/i915_scheduler.c         |  13 +-
 drivers/gpu/drm/i915/i915_utils.h             |  13 +
 drivers/gpu/drm/i915/intel_guc_submission.c   | 174 ++---
 drivers/gpu/drm/i915/selftests/i915_request.c |   8 +-
 13 files changed, 480 insertions(+), 617 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 3d0a7af096f6..cf333007a867 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -630,7 +630,7 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_EXECLISTS(i915);
+	return USES_GUC_SUBMISSION(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 92401d25764b..05e984864098 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 
 #include "i915_active_types.h"
+#include "i915_utils.h"
 #include "intel_sseu.h"
 
 struct i915_gem_context;
@@ -37,6 +38,10 @@ struct intel_context {
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
+#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
+#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
+#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
+#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
 
 	struct list_head signal_link;
 	struct list_head signals;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 1654af266af5..5d4e9480be04 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -124,71 +124,26 @@ static inline bool __execlists_need_preempt(int prio, int last)
 	return prio > max(I915_PRIORITY_NORMAL - 1, last);
 }
 
-static inline void
-execlists_set_active(struct intel_engine_execlists *execlists,
-		     unsigned int bit)
-{
-	__set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline bool
-execlists_set_active_once(struct intel_engine_execlists *execlists,
-			  unsigned int bit)
-{
-	return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_active(struct intel_engine_execlists *execlists,
-		       unsigned int bit)
-{
-	__clear_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_all_active(struct intel_engine_execlists *execlists)
+static inline unsigned int
+execlists_num_ports(const struct intel_engine_execlists * const execlists)
 {
-	execlists->active = 0;
+	return execlists->port_mask + 1;
 }
 
-static inline bool
-execlists_is_active(const struct intel_engine_execlists *execlists,
-		    unsigned int bit)
+static inline struct i915_request *
+execlists_active(const struct intel_engine_execlists *execlists)
 {
-	return test_bit(bit, (unsigned long *)&execlists->active);
+	GEM_BUG_ON(execlists->active - execlists->inflight >
+		   execlists_num_ports(execlists));
+	return READ_ONCE(*execlists->active);
 }
 
-void execlists_user_begin(struct intel_engine_execlists *execlists,
-			  const struct execlist_port *port);
-void execlists_user_end(struct intel_engine_execlists *execlists);
-
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
 
 struct i915_request *
 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
 
-static inline unsigned int
-execlists_num_ports(const struct intel_engine_execlists * const execlists)
-{
-	return execlists->port_mask + 1;
-}
-
-static inline struct execlist_port *
-execlists_port_complete(struct intel_engine_execlists * const execlists,
-			struct execlist_port * const port)
-{
-	const unsigned int m = execlists->port_mask;
-
-	GEM_BUG_ON(port_index(port, execlists) != 0);
-	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-
-	memmove(port, port + 1, m * sizeof(struct execlist_port));
-	memset(port + m, 0, sizeof(struct execlist_port));
-
-	return port;
-}
-
 static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 12110e62b3fb..4bd5526c3f57 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -439,6 +439,10 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
 	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
+	memset(execlists->pending, 0, sizeof(execlists->pending));
+	execlists->active =
+		memset(execlists->inflight, 0, sizeof(execlists->inflight));
+
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
 }
@@ -1075,7 +1079,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		return true;
 
 	/* Waiting to drain ELSP? */
-	if (READ_ONCE(engine->execlists.active)) {
+	if (execlists_active(&engine->execlists)) {
 		struct tasklet_struct *t = &engine->execlists.tasklet;
 
 		local_bh_disable();
@@ -1090,7 +1094,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		/* Otherwise flush the tasklet if it was on another cpu */
 		tasklet_unlock_wait(t);
 
-		if (READ_ONCE(engine->execlists.active))
+		if (execlists_active(&engine->execlists))
 			return false;
 	}
 
@@ -1284,6 +1288,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 	}
 
 	if (HAS_EXECLISTS(dev_priv)) {
+		struct i915_request * const *port, *rq;
 		const u32 *hws =
 			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 		const u8 num_entries = execlists->csb_size;
@@ -1316,26 +1321,28 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 		}
 
 		rcu_read_lock();
-		for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
-			struct i915_request *rq;
-			unsigned int count;
-
-			rq = port_unpack(&execlists->port[idx], &count);
-			if (rq) {
-				char hdr[80];
-
-				snprintf(hdr, sizeof(hdr),
-					 "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
-					 idx, count,
-					 i915_ggtt_offset(rq->ring->vma),
-					 rq->timeline->hwsp_offset,
-					 hwsp_seqno(rq));
-				print_request(m, rq, hdr);
-			} else {
-				drm_printf(m, "\t\tELSP[%d] idle\n", idx);
-			}
+		for (port = execlists->active; (rq = *port); port++) {
+			char hdr[80];
+
+			snprintf(hdr, sizeof(hdr),
+				 "\t\tActive[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+				 (int)(port - execlists->active),
+				 i915_ggtt_offset(rq->ring->vma),
+				 rq->timeline->hwsp_offset,
+				 hwsp_seqno(rq));
+			print_request(m, rq, hdr);
+		}
+		for (port = execlists->pending; (rq = *port); port++) {
+			char hdr[80];
+
+			snprintf(hdr, sizeof(hdr),
+				 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+				 (int)(port - execlists->pending),
+				 i915_ggtt_offset(rq->ring->vma),
+				 rq->timeline->hwsp_offset,
+				 hwsp_seqno(rq));
+			print_request(m, rq, hdr);
 		}
-		drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
 		rcu_read_unlock();
 	} else if (INTEL_GEN(dev_priv) > 6) {
 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
@@ -1501,15 +1508,19 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	}
 
 	if (engine->stats.enabled++ == 0) {
-		const struct execlist_port *port = execlists->port;
-		unsigned int num_ports = execlists_num_ports(execlists);
+		struct i915_request * const *port;
+		struct i915_request *rq;
 
 		engine->stats.enabled_at = ktime_get();
 
 		/* XXX submission method oblivious? */
-		while (num_ports-- && port_isset(port)) {
+		for (port = execlists->active; (rq = *port); port++)
 			engine->stats.active++;
-			port++;
+
+		for (port = execlists->pending; (rq = *port); port++) {
+			/* Exclude any contexts already counted in active */
+			if (intel_context_inflight_count(rq->hw_context) == 1)
+				engine->stats.active++;
 		}
 
 		if (engine->stats.active)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index f2aecc4450fb..30292e17b827 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -160,51 +160,10 @@ struct intel_engine_execlists {
 	 */
 	u32 __iomem *ctrl_reg;
 
-	/**
-	 * @port: execlist port states
-	 *
-	 * For each hardware ELSP (ExecList Submission Port) we keep
-	 * track of the last request and the number of times we submitted
-	 * that port to hw. We then count the number of times the hw reports
-	 * a context completion or preemption. As only one context can
-	 * be active on hw, we limit resubmission of context to port[0]. This
-	 * is called Lite Restore, of the context.
-	 */
-	struct execlist_port {
-		/**
-		 * @request_count: combined request and submission count
-		 */
-		struct i915_request *request_count;
-#define EXECLIST_COUNT_BITS 2
-#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
-#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
-#define port_set(p, packed) ((p)->request_count = (packed))
-#define port_isset(p) ((p)->request_count)
-#define port_index(p, execlists) ((p) - (execlists)->port)
-
-		/**
-		 * @context_id: context ID for port
-		 */
-		GEM_DEBUG_DECL(u32 context_id);
-
 #define EXECLIST_MAX_PORTS 2
-	} port[EXECLIST_MAX_PORTS];
-
-	/**
-	 * @active: is the HW active? We consider the HW as active after
-	 * submitting any context for execution and until we have seen the
-	 * last context completion event. After that, we do not expect any
-	 * more events until we submit, and so can park the HW.
-	 *
-	 * As we have a small number of different sources from which we feed
-	 * the HW, we track the state of each inside a single bitfield.
-	 */
-	unsigned int active;
-#define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
+	struct i915_request * const *active;
+	struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
+	struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
 
 	/**
 	 * @port_mask: number of execlist ports - 1
@@ -245,11 +204,6 @@ struct intel_engine_execlists {
 	 */
 	u32 *csb_status;
 
-	/**
-	 * @preempt_complete_status: expected CSB upon completing preemption
-	 */
-	u32 preempt_complete_status;
-
 	/**
 	 * @csb_size: context status buffer FIFO size
 	 */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 48086602a0db..f3b36a7296e1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -161,6 +161,8 @@
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 #define WA_TAIL_DWORDS 2
@@ -222,6 +224,14 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
+static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_PREEMPT_ADDR);
+}
+
+#define ring_suspend(E) ((E)->status_page.addr[I915_GEM_HWS_PREEMPT])
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -283,12 +293,6 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 {
 	int last_prio;
 
-	if (!engine->preempt_context)
-		return false;
-
-	if (i915_request_completed(rq))
-		return false;
-
 	/*
 	 * Check if the current priority hint merits a preemption attempt.
 	 *
@@ -350,9 +354,6 @@ __maybe_unused static inline bool
 assert_priority_queue(const struct i915_request *prev,
 		      const struct i915_request *next)
 {
-	const struct intel_engine_execlists *execlists =
-		&prev->engine->execlists;
-
 	/*
 	 * Without preemption, the prev may refer to the still active element
 	 * which we refuse to let go.
@@ -360,7 +361,7 @@ assert_priority_queue(const struct i915_request *prev,
 	 * Even with preemption, there are times when we think it is better not
 	 * to preempt and leave an ostensibly lower priority request in flight.
 	 */
-	if (port_request(execlists->port) == prev)
+	if (i915_request_is_active(prev))
 		return true;
 
 	return rq_prio(prev) >= rq_prio(next);
@@ -454,13 +455,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		struct intel_engine_cs *owner;
 
 		if (i915_request_completed(rq))
-			break;
+			continue; /* XXX */
 
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
-		GEM_BUG_ON(rq->hw_context->inflight);
-
 		/*
 		 * Push the request back into the queue for later resubmission.
 		 * If this request is not native to this physical engine (i.e.
@@ -510,6 +509,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	if ( ~prio & ACTIVE_PRIORITY &&
 	    active && __i915_request_has_started(active)) {
 		prio |= ACTIVE_PRIORITY;
+		GEM_BUG_ON(active->sched.attr.priority >= prio);
 		active->sched.attr.priority = prio;
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
@@ -541,32 +541,32 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
-inline void
-execlists_user_begin(struct intel_engine_execlists *execlists,
-		     const struct execlist_port *port)
+static inline struct i915_request *
+execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
-}
+	struct intel_context *ce = rq->hw_context;
+	int count;
 
-inline void
-execlists_user_end(struct intel_engine_execlists *execlists)
-{
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
-}
+	trace_i915_request_in(rq, idx);
 
-static inline void
-execlists_context_schedule_in(struct i915_request *rq)
-{
-	GEM_BUG_ON(rq->hw_context->inflight);
+	count = intel_context_inflight_count(ce);
+	if (!count) {
+		intel_context_get(ce);
+		ce->inflight = rq->engine;
+
+		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+		intel_engine_context_in(ce->inflight);
+	}
+
+	intel_context_inflight_inc(ce);
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
 
-	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-	intel_engine_context_in(rq->engine);
-	rq->hw_context->inflight = rq->engine;
+	return i915_request_get(rq);
 }
 
-static void kick_siblings(struct i915_request *rq)
+static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 {
-	struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
 	struct i915_request *next = READ_ONCE(ve->request);
 
 	if (next && next->execution_mask & ~rq->execution_mask)
@@ -574,29 +574,42 @@ static void kick_siblings(struct i915_request *rq)
 }
 
 static inline void
-execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
+execlists_schedule_out(struct i915_request *rq)
 {
-	rq->hw_context->inflight = NULL;
-	intel_engine_context_out(rq->engine);
-	execlists_context_status_change(rq, status);
+	struct intel_context *ce = rq->hw_context;
+
+	GEM_BUG_ON(!intel_context_inflight_count(ce));
+
 	trace_i915_request_out(rq);
 
-	/*
-	 * If this is part of a virtual engine, its next request may have
-	 * been blocked waiting for access to the active context. We have
-	 * to kick all the siblings again in case we need to switch (e.g.
-	 * the next request is not runnable on this engine). Hopefully,
-	 * we will already have submitted the next request before the
-	 * tasklet runs and do not need to rebuild each virtual tree
-	 * and kick everyone again.
-	 */
-	if (rq->engine != rq->hw_context->engine)
-		kick_siblings(rq);
+	intel_context_inflight_dec(ce);
+	if (!intel_context_inflight_count(ce)) {
+		intel_engine_context_out(ce->inflight);
+		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+		ce->inflight = NULL;
+		intel_context_put(ce);
+
+		/*
+		 * If this is part of a virtual engine, its next request may
+		 * have been blocked waiting for access to the active context.
+		 * We have to kick all the siblings again in case we need to
+		 * switch (e.g. the next request is not runnable on this
+		 * engine). Hopefully, we will already have submitted the next
+		 * request before the tasklet runs and do not need to rebuild
+		 * each virtual tree and kick everyone again.
+		 */
+		if (rq->engine != ce->engine)
+			kick_siblings(rq, ce);
+	}
+
+	i915_request_put(rq);
 }
 
-static u64 execlists_update_context(struct i915_request *rq)
+static u64 execlists_update_context(const struct i915_request *rq)
 {
 	struct intel_context *ce = rq->hw_context;
+	u64 desc;
 
 	ce->lrc_reg_state[CTX_RING_TAIL + 1] =
 		intel_ring_set_tail(rq->ring, rq->tail);
@@ -617,7 +630,11 @@ static u64 execlists_update_context(struct i915_request *rq)
 	 * wmb).
 	 */
 	mb();
-	return ce->lrc_desc;
+
+	desc = ce->lrc_desc;
+	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+
+	return desc;
 }
 
 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
@@ -631,12 +648,54 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
 	}
 }
 
+static __maybe_unused void
+trace_ports(const struct intel_engine_execlists *execlists,
+	    const char *msg,
+	    struct i915_request * const *ports)
+{
+	const struct intel_engine_cs *engine =
+		container_of(execlists, typeof(*engine), execlists);
+
+	GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
+		  engine->name, msg,
+		  ports[0]->fence.context,
+		  ports[0]->fence.seqno,
+		  i915_request_completed(ports[0]) ? "!" :
+		  i915_request_started(ports[0]) ? "*" :
+		  "",
+		  ports[1] ? ports[1]->fence.context : 0,
+		  ports[1] ? ports[1]->fence.seqno : 0);
+}
+
+static __maybe_unused bool
+assert_pending_valid(const struct intel_engine_execlists *execlists,
+		     const char *msg)
+{
+	struct i915_request * const *port, *rq;
+	struct intel_context *ce = NULL;
+
+	trace_ports(execlists, msg, execlists->pending);
+
+	if (execlists->pending[execlists_num_ports(execlists)])
+		return false;
+
+	for (port = execlists->pending; (rq = *port); port++) {
+		if (ce == rq->hw_context)
+			return false;
+
+		ce = rq->hw_context;
+	}
+
+	return ce;
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
 	unsigned int n;
 
+	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
+
 	/*
 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
@@ -654,38 +713,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	 * of elsq entries, keep this in mind before changing the loop below.
 	 */
 	for (n = execlists_num_ports(execlists); n--; ) {
-		struct i915_request *rq;
-		unsigned int count;
-		u64 desc;
+		struct i915_request *rq = execlists->pending[n];
 
-		rq = port_unpack(&port[n], &count);
-		if (rq) {
-			GEM_BUG_ON(count > !n);
-			if (!count++)
-				execlists_context_schedule_in(rq);
-			port_set(&port[n], port_pack(rq, count));
-			desc = execlists_update_context(rq);
-			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
-
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-				  engine->name, n,
-				  port[n].context_id, count,
-				  rq->fence.context, rq->fence.seqno,
-				  hwsp_seqno(rq),
-				  rq_prio(rq));
-		} else {
-			GEM_BUG_ON(!n);
-			desc = 0;
-		}
-
-		write_desc(execlists, desc, n);
+		write_desc(execlists,
+			   rq ? execlists_update_context(rq) : 0,
+			   n);
 	}
 
 	/* we need to manually load the submit queue */
 	if (execlists->ctrl_reg)
 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
 static bool ctx_single_port_submission(const struct intel_context *ce)
@@ -709,6 +746,7 @@ static bool can_merge_ctx(const struct intel_context *prev,
 static bool can_merge_rq(const struct i915_request *prev,
 			 const struct i915_request *next)
 {
+	GEM_BUG_ON(prev == next);
 	GEM_BUG_ON(!assert_priority_queue(prev, next));
 
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
@@ -717,58 +755,6 @@ static bool can_merge_rq(const struct i915_request *prev,
 	return true;
 }
 
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
-	GEM_BUG_ON(rq == port_request(port));
-
-	if (port_isset(port))
-		i915_request_put(port_request(port));
-
-	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
-}
-
-static void inject_preempt_context(struct intel_engine_cs *engine)
-{
-	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce = engine->preempt_context;
-	unsigned int n;
-
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
-
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
-	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
-
-	write_desc(execlists, ce->lrc_desc, n);
-
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
-	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
-
-	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
-}
-
-static void complete_preempt_context(struct intel_engine_execlists *execlists)
-{
-	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
-	if (inject_preempt_hang(execlists))
-		return;
-
-	execlists_cancel_port_requests(execlists);
-	__unwind_incomplete_requests(container_of(execlists,
-						  struct intel_engine_cs,
-						  execlists));
-}
-
 static void virtual_update_register_offsets(u32 *regs,
 					    struct intel_engine_cs *engine)
 {
@@ -831,20 +817,30 @@ static bool virtual_matches(const struct virtual_engine *ve,
 	 * we reuse the register offsets). This is a very small
 	 * hystersis on the greedy seelction algorithm.
 	 */
-	inflight = READ_ONCE(ve->context.inflight);
+	inflight = intel_context_inflight(&ve->context);
 	if (inflight && inflight != engine)
 		return false;
 
 	return true;
 }
 
-static void execlists_dequeue(struct intel_engine_cs *engine)
+static struct i915_request *
+last_active(const struct intel_engine_execlists *execlists)
+{
+	struct i915_request * const *last = execlists->active;
+
+	while (*last && i915_request_completed(*last))
+		last++;
+
+	return *last;
+}
+
+static bool execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	const struct execlist_port * const last_port =
-		&execlists->port[execlists->port_mask];
-	struct i915_request *last = port_request(port);
+	struct i915_request **port = execlists->pending;
+	struct i915_request ** const last_port = port + execlists->port_mask;
+	struct i915_request *last;
 	struct rb_node *rb;
 	bool submit = false;
 
@@ -890,65 +886,72 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		break;
 	}
 
+	/*
+	 * If the queue is higher priority than the last
+	 * request in the currently active context, submit afresh.
+	 * We will resubmit again afterwards in case we need to split
+	 * the active context to interject the preemption request,
+	 * i.e. we will retrigger preemption following the ack in case
+	 * of trouble.
+	 */
+	last = last_active(execlists);
 	if (last) {
-		/*
-		 * Don't resubmit or switch until all outstanding
-		 * preemptions (lite-restore) are seen. Then we
-		 * know the next preemption status we see corresponds
-		 * to this ELSP update.
-		 */
-		GEM_BUG_ON(!execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_USER));
-		GEM_BUG_ON(!port_count(&port[0]));
-
-		/*
-		 * If we write to ELSP a second time before the HW has had
-		 * a chance to respond to the previous write, we can confuse
-		 * the HW and hit "undefined behaviour". After writing to ELSP,
-		 * we must then wait until we see a context-switch event from
-		 * the HW to indicate that it has had a chance to respond.
-		 */
-		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-			return;
-
 		if (need_preempt(engine, last, rb)) {
-			inject_preempt_context(engine);
-			return;
-		}
+			GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
+				  engine->name,
+				  last->fence.context,
+				  last->fence.seqno,
+				  last->sched.attr.priority,
+				  execlists->queue_priority_hint);
+			/*
+			 * Don't let the RING_HEAD advance past the breadcrumb
+			 * as we unwind (and until we resubmit) so that we do
+			 * not accidentally tell it to go backwards.
+			 */
+			ring_suspend(engine) = 1;
 
-		/*
-		 * In theory, we could coalesce more requests onto
-		 * the second port (the first port is active, with
-		 * no preemptions pending). However, that means we
-		 * then have to deal with the possible lite-restore
-		 * of the second port (as we submit the ELSP, there
-		 * may be a context-switch) but also we may complete
-		 * the resubmission before the context-switch. Ergo,
-		 * coalescing onto the second port will cause a
-		 * preemption event, but we cannot predict whether
-		 * that will affect port[0] or port[1].
-		 *
-		 * If the second port is already active, we can wait
-		 * until the next context-switch before contemplating
-		 * new requests. The GPU will be busy and we should be
-		 * able to resubmit the new ELSP before it idles,
-		 * avoiding pipeline bubbles (momentary pauses where
-		 * the driver is unable to keep up the supply of new
-		 * work). However, we have to double check that the
-		 * priorities of the ports haven't been switch.
-		 */
-		if (port_count(&port[1]))
-			return;
+			/*
+			 * Note that we have not stopped the GPU at this point,
+			 * so we are unwinding the incomplete requests as they
+			 * remain inflight and so by the time we do complete
+			 * the preemption, some of the unwound requests may
+			 * complete!
+			 */
+			__unwind_incomplete_requests(engine);
 
-		/*
-		 * WaIdleLiteRestore:bdw,skl
-		 * Apply the wa NOOPs to prevent
-		 * ring:HEAD == rq:TAIL as we resubmit the
-		 * request. See gen8_emit_fini_breadcrumb() for
-		 * where we prepare the padding after the
-		 * end of the request.
-		 */
-		last->tail = last->wa_tail;
+			/*
+			 * If we need to return to the preempted context, we
+			 * need to skip the lite-restore and force it to
+			 * reload the RING_TAIL. Otherwise, the HW has a
+			 * tendency to ignore us rewinding the TAIL to the
+			 * end of an earlier request.
+			 */
+			last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+			last = NULL;
+		} else {
+			/*
+			 * Otherwise if we already have a request pending
+			 * for execution after the current one, we can
+			 * just wait until the next CS event before
+			 * queuing more. In either case we will force a
+			 * lite-restore preemption event, but if we wait
+			 * we hopefully coalesce several updates into a single
+			 * submission.
+			 */
+			if (!list_is_last(&last->sched.link,
+					  &engine->active.requests))
+				return false;
+
+			/*
+			 * WaIdleLiteRestore:bdw,skl
+			 * Apply the wa NOOPs to prevent
+			 * ring:HEAD == rq:TAIL as we resubmit the
+			 * request. See gen8_emit_fini_breadcrumb() for
+			 * where we prepare the padding after the
+			 * end of the request.
+			 */
+			last->tail = last->wa_tail;
+		}
 	}
 
 	while (rb) { /* XXX virtual is always taking precedence */
@@ -978,9 +981,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				continue;
 			}
 
+			if (i915_request_completed(rq)) {
+				ve->request = NULL;
+				ve->base.execlists.queue_priority_hint = INT_MIN;
+				rb_erase_cached(rb, &execlists->virtual);
+				RB_CLEAR_NODE(rb);
+
+				rq->engine = engine;
+				__i915_request_submit(rq);
+
+				spin_unlock(&ve->base.active.lock);
+
+				rb = rb_first_cached(&execlists->virtual);
+				continue;
+			}
+
 			if (last && !can_merge_rq(last, rq)) {
 				spin_unlock(&ve->base.active.lock);
-				return; /* leave this rq for another engine */
+				return false; /* leave this for another */
 			}
 
 			GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
@@ -1026,9 +1044,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			}
 
 			__i915_request_submit(rq);
-			trace_i915_request_in(rq, port_index(port, execlists));
-			submit = true;
-			last = rq;
+			if (!i915_request_completed(rq)) {
+				submit = true;
+				last = rq;
+			}
 		}
 
 		spin_unlock(&ve->base.active.lock);
@@ -1041,6 +1060,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
+			if (i915_request_completed(rq))
+				goto skip;
+
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -1080,19 +1102,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				    ctx_single_port_submission(rq->hw_context))
 					goto done;
 
-
-				if (submit)
-					port_assign(port, last);
+				*port = execlists_schedule_in(last, port - execlists->pending);
 				port++;
-
-				GEM_BUG_ON(port_isset(port));
 			}
 
-			__i915_request_submit(rq);
-			trace_i915_request_in(rq, port_index(port, execlists));
-
 			last = rq;
 			submit = true;
+skip:
+			__i915_request_submit(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -1117,54 +1134,32 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * interrupt for secondary ports).
 	 */
 	execlists->queue_priority_hint = queue_prio(execlists);
+	GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
+		  engine->name, execlists->queue_priority_hint,
+		  yesno(submit));
 
 	if (submit) {
-		port_assign(port, last);
+		*port = execlists_schedule_in(last, port - execlists->pending);
+		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists_submit_ports(engine);
 	}
 
-	/* We must always keep the beast fed if we have work piled up */
-	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
-		   !port_isset(execlists->port));
-
-	/* Re-evaluate the executing context setup after each preemptive kick */
-	if (last)
-		execlists_user_begin(execlists, execlists->port);
-
-	/* If the engine is now idle, so should be the flag; and vice versa. */
-	GEM_BUG_ON(execlists_is_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_USER) ==
-		   !port_isset(engine->execlists.port));
+	return submit;
 }
 
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
-	struct execlist_port *port = execlists->port;
-	unsigned int num_ports = execlists_num_ports(execlists);
-
-	while (num_ports-- && port_isset(port)) {
-		struct i915_request *rq = port_request(port);
-
-		GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
-			  rq->engine->name,
-			  (unsigned int)(port - execlists->port),
-			  rq->fence.context, rq->fence.seqno,
-			  hwsp_seqno(rq));
-
-		GEM_BUG_ON(!execlists->active);
-		execlists_context_schedule_out(rq,
-					       i915_request_completed(rq) ?
-					       INTEL_CONTEXT_SCHEDULE_OUT :
-					       INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+	struct i915_request * const *port, *rq;
 
-		i915_request_put(rq);
+	for (port = execlists->pending; (rq = *port); port++)
+		execlists_schedule_out(rq);
+	memset(execlists->pending, 0, sizeof(execlists->pending));
 
-		memset(port, 0, sizeof(*port));
-		port++;
-	}
-
-	execlists_clear_all_active(execlists);
+	for (port = execlists->active; (rq = *port); port++)
+		execlists_schedule_out(rq);
+	execlists->active =
+		memset(execlists->inflight, 0, sizeof(execlists->inflight));
 }
 
 static inline void
@@ -1183,7 +1178,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
 	const u32 * const buf = execlists->csb_status;
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
@@ -1217,9 +1211,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 
 	do {
-		struct i915_request *rq;
 		unsigned int status;
-		unsigned int count;
 
 		if (++head == num_entries)
 			head = 0;
@@ -1242,68 +1234,37 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
-		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
 			  engine->name, head,
-			  buf[2 * head + 0], buf[2 * head + 1],
-			  execlists->active);
+			  buf[2 * head + 0], buf[2 * head + 1]);
 
 		status = buf[2 * head];
-		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-			      GEN8_CTX_STATUS_PREEMPTED))
-			execlists_set_active(execlists,
-					     EXECLISTS_ACTIVE_HWACK);
-		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-			execlists_clear_active(execlists,
-					       EXECLISTS_ACTIVE_HWACK);
-
-		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-			continue;
-
-		/* We should never get a COMPLETED | IDLE_ACTIVE! */
-		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
-
-		if (status & GEN8_CTX_STATUS_COMPLETE &&
-		    buf[2*head + 1] == execlists->preempt_complete_status) {
-			GEM_TRACE("%s preempt-idle\n", engine->name);
-			complete_preempt_context(execlists);
-			continue;
-		}
+		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
+promote:
+			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+			execlists->active =
+				memcpy(execlists->inflight,
+				       execlists->pending,
+				       execlists_num_ports(execlists) *
+				       sizeof(*execlists->pending));
+			execlists->pending[0] = NULL;
 
-		if (status & GEN8_CTX_STATUS_PREEMPTED &&
-		    execlists_is_active(execlists,
-					EXECLISTS_ACTIVE_PREEMPT))
-			continue;
+			if (!inject_preempt_hang(execlists))
+				ring_suspend(engine) = 0;
+		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
+			struct i915_request * const *port = execlists->active;
 
-		GEM_BUG_ON(!execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_USER));
+			trace_ports(execlists, "preempted", execlists->active);
 
-		rq = port_unpack(port, &count);
-		GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-			  engine->name,
-			  port->context_id, count,
-			  rq ? rq->fence.context : 0,
-			  rq ? rq->fence.seqno : 0,
-			  rq ? hwsp_seqno(rq) : 0,
-			  rq ? rq_prio(rq) : 0);
+			while (*port)
+				execlists_schedule_out(*port++);
 
-		/* Check the context/desc id for this event matches */
-		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+			goto promote;
+		} else if (*execlists->active) {
+			struct i915_request *rq = *execlists->active++;
 
-		GEM_BUG_ON(count == 0);
-		if (--count == 0) {
-			/*
-			 * On the final event corresponding to the
-			 * submission of this context, we expect either
-			 * an element-switch event or a completion
-			 * event (and on completion, the active-idle
-			 * marker). No more preemptions, lite-restore
-			 * or otherwise.
-			 */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-			GEM_BUG_ON(port_isset(&port[1]) &&
-				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-			GEM_BUG_ON(!port_isset(&port[1]) &&
-				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+			trace_ports(execlists, "completed",
+				    execlists->active - 1);
 
 			/*
 			 * We rely on the hardware being strongly
@@ -1312,21 +1273,10 @@ static void process_csb(struct intel_engine_cs *engine)
 			 * user interrupt and CSB is processed.
 			 */
 			GEM_BUG_ON(!i915_request_completed(rq));
+			execlists_schedule_out(rq);
 
-			execlists_context_schedule_out(rq,
-						       INTEL_CONTEXT_SCHEDULE_OUT);
-			i915_request_put(rq);
-
-			GEM_TRACE("%s completed ctx=%d\n",
-				  engine->name, port->context_id);
-
-			port = execlists_port_complete(execlists, port);
-			if (port_isset(port))
-				execlists_user_begin(execlists, port);
-			else
-				execlists_user_end(execlists);
-		} else {
-			port_set(port, port_pack(rq, count));
+			GEM_BUG_ON(execlists->active - execlists->inflight >
+				   execlists_num_ports(execlists));
 		}
 	} while (head != tail);
 
@@ -1348,11 +1298,17 @@ static void process_csb(struct intel_engine_cs *engine)
 
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
+	bool submit;
+
 	lockdep_assert_held(&engine->active.lock);
 
-	process_csb(engine);
-	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
-		execlists_dequeue(engine);
+	do {
+		process_csb(engine);
+
+		submit = false;
+		if (!engine->execlists.pending[0])
+			submit = execlists_dequeue(engine);
+	} while (submit);
 }
 
 /*
@@ -1364,11 +1320,6 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	unsigned long flags;
 
-	GEM_TRACE("%s awake?=%d, active=%x\n",
-		  engine->name,
-		  !!engine->i915->gt.awake,
-		  engine->execlists.active);
-
 	spin_lock_irqsave(&engine->active.lock, flags);
 	__execlists_submission_tasklet(engine);
 	spin_unlock_irqrestore(&engine->active.lock, flags);
@@ -1394,12 +1345,16 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 		tasklet_hi_schedule(&execlists->tasklet);
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static void submit_queue(struct intel_engine_cs *engine,
+			 const struct i915_request *rq)
 {
-	if (prio > engine->execlists.queue_priority_hint) {
-		engine->execlists.queue_priority_hint = prio;
-		__submit_queue_imm(engine);
-	}
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	if (rq_prio(rq) <= execlists->queue_priority_hint)
+		return;
+
+	execlists->queue_priority_hint = rq_prio(rq);
+	__submit_queue_imm(engine);
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1415,7 +1370,7 @@ static void execlists_submit_request(struct i915_request *request)
 	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
-	submit_queue(engine, rq_prio(request));
+	submit_queue(engine, request);
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -2072,27 +2027,13 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-static bool lrc_regs_ok(const struct i915_request *rq)
-{
-	const struct intel_ring *ring = rq->ring;
-	const u32 *regs = rq->hw_context->lrc_reg_state;
-
-	/* Quick spot check for the common signs of context corruption */
-
-	if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
-	    (RING_CTL_SIZE(ring->size) | RING_VALID))
-		return false;
-
-	if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
-		return false;
-
-	return true;
-}
-
-static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+static void reset_csb_pointers(struct intel_engine_cs *engine)
 {
+	struct intel_engine_execlists * const execlists = &engine->execlists;
 	const unsigned int reset_value = execlists->csb_size - 1;
 
+	ring_suspend(engine) = 0;
+
 	/*
 	 * After a reset, the HW starts writing into CSB entry [0]. We
 	 * therefore have to set our HEAD pointer back one entry so that
@@ -2139,18 +2080,19 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	process_csb(engine); /* drain preemption events */
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
-	reset_csb_pointers(&engine->execlists);
+	reset_csb_pointers(engine);
 
 	/*
 	 * Save the currently executing context, even if we completed
 	 * its request, it was still running at the time of the
 	 * reset and will have been clobbered.
 	 */
-	if (!port_isset(execlists->port))
-		goto out_clear;
+	rq = execlists_active(execlists);
+	if (!rq)
+		return;
 
-	rq = port_request(execlists->port);
 	ce = rq->hw_context;
+	rq = active_request(rq);
 
 	/*
 	 * Catch up with any missed context-switch interrupts.
@@ -2163,9 +2105,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 */
 	execlists_cancel_port_requests(execlists);
 
-	rq = active_request(rq);
-	if (!rq)
+	if (!rq) {
+		ce->ring->head = ce->ring->tail;
 		goto out_replay;
+	}
+
+	ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
 
 	/*
 	 * If this request hasn't started yet, e.g. it is waiting on a
@@ -2179,7 +2124,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * Otherwise, if we have not started yet, the request should replay
 	 * perfectly and we do not need to flag the result as being erroneous.
 	 */
-	if (!i915_request_started(rq) && lrc_regs_ok(rq))
+	if (!i915_request_started(rq))
 		goto out_replay;
 
 	/*
@@ -2194,7 +2139,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * image back to the expected values to skip over the guilty request.
 	 */
 	i915_reset_request(rq, stalled);
-	if (!stalled && lrc_regs_ok(rq))
+	if (!stalled)
 		goto out_replay;
 
 	/*
@@ -2214,17 +2159,13 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	execlists_init_reg_state(regs, ce, engine, ce->ring);
 
 out_replay:
-	/* Rerun the request; its payload has been neutered (if guilty). */
-	ce->ring->head =
-		rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
+	GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
+		  engine->name, ce->ring->head, ce->ring->tail);
 	intel_ring_update_space(ce->ring);
 	__execlists_update_reg_state(ce, engine);
 
 	/* Push back any incomplete requests for replay after the reset. */
 	__unwind_incomplete_requests(engine);
-
-out_clear:
-	execlists_clear_all_active(execlists);
 }
 
 static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
@@ -2320,7 +2261,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
-	GEM_BUG_ON(port_isset(execlists->port));
 
 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
 	execlists->tasklet.func = nop_submission_tasklet;
@@ -2538,6 +2478,19 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 	return cs;
 }
 
+static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_EQ_SDD;
+	*cs++ = 0;
+	*cs++ = intel_hws_preempt_address(request->engine);
+	*cs++ = 0;
+
+	return cs;
+}
+
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write(cs,
@@ -2552,7 +2505,9 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 
 
 	*cs++ = MI_USER_INTERRUPT;
+
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2577,7 +2532,9 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				      PIPE_CONTROL_STORE_DATA_INDEX);
 
 	*cs++ = MI_USER_INTERRUPT;
+
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2626,8 +2583,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (!intel_vgpu_active(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-	if (engine->preempt_context &&
-	    HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+	if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 }
 
@@ -2750,11 +2706,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 			i915_mmio_reg_offset(RING_ELSP(base));
 	}
 
-	execlists->preempt_complete_status = ~0u;
-	if (engine->preempt_context)
-		execlists->preempt_complete_status =
-			upper_32_bits(engine->preempt_context->lrc_desc);
-
 	execlists->csb_status =
 		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
@@ -2766,7 +2717,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 	else
 		execlists->csb_size = GEN11_CSB_ENTRIES;
 
-	reset_csb_pointers(execlists);
+	reset_csb_pointers(engine);
 
 	return 0;
 }
@@ -2946,11 +2897,6 @@ populate_lr_context(struct intel_context *ce,
 	if (!engine->default_state)
 		regs[CTX_CONTEXT_CONTROL + 1] |=
 			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-	if (ce->gem_context == engine->i915->preempt_context &&
-	    INTEL_GEN(engine->i915) < 11)
-		regs[CTX_CONTEXT_CONTROL + 1] |=
-			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
 
 	ret = 0;
 err_unpin_ctx:
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a3a0e1537a0a..1020041b5f81 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1238,10 +1238,10 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	}
 }
 
-static void record_request(struct i915_request *request,
+static void record_request(const struct i915_request *request,
 			   struct drm_i915_error_request *erq)
 {
-	struct i915_gem_context *ctx = request->gem_context;
+	const struct i915_gem_context *ctx = request->gem_context;
 
 	erq->flags = request->fence.flags;
 	erq->context = request->fence.context;
@@ -1305,20 +1305,15 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 	ee->num_requests = count;
 }
 
-static void error_record_engine_execlists(struct intel_engine_cs *engine,
+static void error_record_engine_execlists(const struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
 {
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
-	unsigned int n;
+	struct i915_request * const *port = execlists->active;
+	unsigned int n = 0;
 
-	for (n = 0; n < execlists_num_ports(execlists); n++) {
-		struct i915_request *rq = port_request(&execlists->port[n]);
-
-		if (!rq)
-			break;
-
-		record_request(rq, &ee->execlist[n]);
-	}
+	while (*port)
+		record_request(*port++, &ee->execlist[n++]);
 
 	ee->num_ports = n;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 54f5cf7d857b..755e9135cefe 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -232,6 +232,12 @@ static bool i915_request_retire(struct i915_request *rq)
 
 	local_irq_disable();
 
+	/*
+	 * We only loosely track inflight requests across preemption,
+	 * and so we may find ourselves attempting to retire a _completed_
+	 * request that we have removed from the HW and put back on a run
+	 * queue.
+	 */
 	spin_lock(&rq->engine->active.lock);
 	list_del(&rq->sched.link);
 	spin_unlock(&rq->engine->active.lock);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index edbbdfec24ab..bebc1e9b4a5e 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -28,6 +28,7 @@
 #include <linux/dma-fence.h>
 #include <linux/lockdep.h>
 
+#include "gt/intel_context_types.h"
 #include "gt/intel_engine_types.h"
 
 #include "i915_gem.h"
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 946b5eb443bf..601aae909491 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -268,16 +268,11 @@ sched_lock_engine(const struct i915_sched_node *node,
 	return locked;
 }
 
-static bool inflight(const struct i915_request *rq,
-		     const struct intel_engine_cs *engine)
+static bool kick_tasklet(const struct intel_engine_cs *engine, int prio)
 {
-	const struct i915_request *active;
+	struct i915_request *inflight = *engine->execlists.active;
 
-	if (!i915_request_is_active(rq))
-		return false;
-
-	active = port_request(engine->execlists.port);
-	return active->hw_context == rq->hw_context;
+	return inflight && prio > inflight->sched.attr.priority;
 }
 
 static void __i915_schedule(struct i915_request *rq,
@@ -413,7 +408,7 @@ static void __i915_schedule(struct i915_request *rq,
 		 * If we are already the currently executing context, don't
 		 * bother evaluating if we should preempt ourselves.
 		 */
-		if (inflight(node_to_request(node), engine))
+		if (!kick_tasklet(engine, prio))
 			continue;
 
 		/* Defer (tasklet) submission until after all of our updates. */
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 1436fe2fb5f8..27ec164149c3 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -27,6 +27,7 @@
 
 #include <linux/kernel.h>
 #include <linux/overflow.h>
+#include <linux/workqueue.h>
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -128,6 +129,18 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
 	((typeof(ptr))((unsigned long)(ptr) | __bits));			\
 })
 
+#define ptr_count_dec(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(--__v);					\
+} while (0)
+
+#define ptr_count_inc(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(++__v);					\
+} while (0)
+
 #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
 #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
 #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 5ad111066cd5..5967f3b1e2a3 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -32,7 +32,11 @@
 #include "intel_guc_submission.h"
 #include "i915_drv.h"
 
-#define GUC_PREEMPT_FINISHED		0x1
+enum {
+	GUC_PREEMPT_NONE = 0,
+	GUC_PREEMPT_INPROGRESS,
+	GUC_PREEMPT_FINISHED,
+};
 #define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
 #define GUC_PREEMPT_BREADCRUMB_BYTES	\
 	(sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
@@ -537,15 +541,11 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 	u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
 	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
-	spin_lock(&client->wq_lock);
-
 	guc_wq_item_append(client, engine->guc_id, ctx_desc,
 			   ring_tail, rq->fence.seqno);
 	guc_ring_doorbell(client);
 
 	client->submissions[engine->id] += 1;
-
-	spin_unlock(&client->wq_lock);
 }
 
 /*
@@ -631,8 +631,9 @@ static void inject_preempt_context(struct work_struct *work)
 	data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
 
 	if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
-		execlists_clear_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_PREEMPT);
+		intel_write_status_page(engine,
+					I915_GEM_HWS_PREEMPT,
+					GUC_PREEMPT_NONE);
 		tasklet_schedule(&engine->execlists.tasklet);
 	}
 
@@ -672,8 +673,6 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
 
-	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
 	if (inject_preempt_hang(execlists))
 		return;
 
@@ -681,88 +680,90 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
 	execlists_unwind_incomplete_requests(execlists);
 
 	wait_for_guc_preempt_report(engine);
-	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
+	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, GUC_PREEMPT_NONE);
 }
 
-/**
- * guc_submit() - Submit commands through GuC
- * @engine: engine associated with the commands
- *
- * The only error here arises if the doorbell hardware isn't functioning
- * as expected, which really shouln't happen.
- */
-static void guc_submit(struct intel_engine_cs *engine)
+static void guc_submit(struct intel_engine_cs *engine,
+		       struct i915_request **out,
+		       struct i915_request **end)
 {
 	struct intel_guc *guc = &engine->i915->guc;
-	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	unsigned int n;
+	struct intel_guc_client *client = guc->execbuf_client;
 
-	for (n = 0; n < execlists_num_ports(execlists); n++) {
-		struct i915_request *rq;
-		unsigned int count;
+	spin_lock(&client->wq_lock);
 
-		rq = port_unpack(&port[n], &count);
-		if (rq && count == 0) {
-			port_set(&port[n], port_pack(rq, ++count));
+	do {
+		struct i915_request *rq = *out++;
 
-			flush_ggtt_writes(rq->ring->vma);
+		flush_ggtt_writes(rq->ring->vma);
+		guc_add_request(guc, rq);
+	} while (out != end);
 
-			guc_add_request(guc, rq);
-		}
-	}
+	spin_unlock(&client->wq_lock);
 }
 
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
+static inline int rq_prio(const struct i915_request *rq)
 {
-	GEM_BUG_ON(port_isset(port));
-
-	port_set(port, i915_request_get(rq));
+	return rq->sched.attr.priority | __NO_PREEMPTION;
 }
 
-static inline int rq_prio(const struct i915_request *rq)
+static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 {
-	return rq->sched.attr.priority;
+	trace_i915_request_in(rq, idx);
+
+	if (!rq->hw_context->inflight)
+		rq->hw_context->inflight = rq->engine;
+	intel_context_inflight_inc(rq->hw_context);
+
+	return i915_request_get(rq);
 }
 
-static inline int port_prio(const struct execlist_port *port)
+static void schedule_out(struct i915_request *rq)
 {
-	return rq_prio(port_request(port)) | __NO_PREEMPTION;
+	trace_i915_request_out(rq);
+
+	intel_context_inflight_dec(rq->hw_context);
+	if (!intel_context_inflight_count(rq->hw_context))
+		rq->hw_context->inflight = NULL;
+
+	i915_request_put(rq);
 }
 
-static bool __guc_dequeue(struct intel_engine_cs *engine)
+static void __guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	struct i915_request *last = NULL;
-	const struct execlist_port * const last_port =
-		&execlists->port[execlists->port_mask];
+	struct i915_request **first = execlists->inflight;
+	struct i915_request ** const last_port = first + execlists->port_mask;
+	struct i915_request *last = first[0];
+	struct i915_request **port;
 	bool submit = false;
 	struct rb_node *rb;
 
 	lockdep_assert_held(&engine->active.lock);
 
-	if (port_isset(port)) {
+	if (last) {
 		if (intel_engine_has_preemption(engine)) {
 			struct guc_preempt_work *preempt_work =
 				&engine->i915->guc.preempt_work[engine->id];
 			int prio = execlists->queue_priority_hint;
 
-			if (__execlists_need_preempt(prio, port_prio(port))) {
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_PREEMPT);
+			if (__execlists_need_preempt(prio, rq_prio(last))) {
+				intel_write_status_page(engine,
+							I915_GEM_HWS_PREEMPT,
+							GUC_PREEMPT_INPROGRESS);
 				queue_work(engine->i915->guc.preempt_wq,
 					   &preempt_work->work);
-				return false;
+				return;
 			}
 		}
 
-		port++;
-		if (port_isset(port))
-			return false;
+		if (*++first)
+			return;
+
+		last = NULL;
 	}
-	GEM_BUG_ON(port_isset(port));
 
+	port = first;
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -773,18 +774,15 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 				if (port == last_port)
 					goto done;
 
-				if (submit)
-					port_assign(port, last);
+				*port = schedule_in(last,
+						    port - execlists->inflight);
 				port++;
 			}
 
 			list_del_init(&rq->sched.link);
-
 			__i915_request_submit(rq);
-			trace_i915_request_in(rq, port_index(port, execlists));
-
-			last = rq;
 			submit = true;
+			last = rq;
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -793,58 +791,41 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 done:
 	execlists->queue_priority_hint =
 		rb ? to_priolist(rb)->priority : INT_MIN;
-	if (submit)
-		port_assign(port, last);
-	if (last)
-		execlists_user_begin(execlists, execlists->port);
-
-	/* We must always keep the beast fed if we have work piled up */
-	GEM_BUG_ON(port_isset(execlists->port) &&
-		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
-		   !port_isset(execlists->port));
-
-	return submit;
-}
-
-static void guc_dequeue(struct intel_engine_cs *engine)
-{
-	if (__guc_dequeue(engine))
-		guc_submit(engine);
+	if (submit) {
+		*port = schedule_in(last, port - execlists->inflight);
+		*++port = NULL;
+		guc_submit(engine, first, port);
+	}
+	execlists->active = execlists->inflight;
 }
 
 static void guc_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	struct i915_request *rq;
+	struct i915_request **port, *rq;
 	unsigned long flags;
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	rq = port_request(port);
-	while (rq && i915_request_completed(rq)) {
-		trace_i915_request_out(rq);
-		i915_request_put(rq);
+	for (port = execlists->inflight; (rq = *port); port++) {
+		if (!i915_request_completed(rq))
+			break;
 
-		port = execlists_port_complete(execlists, port);
-		if (port_isset(port)) {
-			execlists_user_begin(execlists, port);
-			rq = port_request(port);
-		} else {
-			execlists_user_end(execlists);
-			rq = NULL;
-		}
+		schedule_out(rq);
+	}
+	if (port != execlists->inflight) {
+		int idx = port - execlists->inflight;
+		int rem = ARRAY_SIZE(execlists->inflight) - idx;
+		memmove(execlists->inflight, port, rem * sizeof(*port));
 	}
 
-	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
-	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
+	if (intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
 	    GUC_PREEMPT_FINISHED)
 		complete_preempt_context(engine);
 
-	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
-		guc_dequeue(engine);
+	if (!intel_read_status_page(engine, I915_GEM_HWS_PREEMPT))
+		__guc_dequeue(engine);
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -958,7 +939,6 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
-	GEM_BUG_ON(port_isset(execlists->port));
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -1421,7 +1401,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 	 * and it is guaranteed that it will remove the work item from the
 	 * queue before our request is completed.
 	 */
-	BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) *
+	BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) *
 		     sizeof(struct guc_wq_item) *
 		     I915_NUM_ENGINES > GUC_WQ_SIZE);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 950a2c55aa96..311681b6819d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -367,13 +367,15 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 
 		if (!wait_event_timeout(wait->wait,
 					i915_sw_fence_done(wait),
-					HZ / 2)) {
+					5 * HZ)) {
 			struct i915_request *rq = requests[count - 1];
 
-			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
-			       count,
+			pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
+			       atomic_read(&wait->pending), count,
 			       rq->fence.context, rq->fence.seqno,
 			       t->engine->name);
+			GEM_TRACE_DUMP();
+
 			i915_gem_set_wedged(t->engine->i915);
 			GEM_BUG_ON(!i915_request_completed(rq));
 			i915_sw_fence_wait(wait);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* [PATCH 45/45] drm/i915/execlists: Minimalistic timeslicing
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (42 preceding siblings ...)
  2019-04-25  9:20 ` [PATCH 44/45] drm/i915/execlists: Preempt-to-busy Chris Wilson
@ 2019-04-25  9:20 ` Chris Wilson
  2019-04-25 10:35 ` [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Tvrtko Ursulin
                   ` (7 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:20 UTC (permalink / raw)
  To: intel-gfx

If we have multiple contexts of equal priority pending execution,
activate a timer to demote the currently executing context in favour of
the next in the queue when that timeslice expires. This enforces
fairness between contexts (so long as they allow preemption -- forced
preemption, in the future, will kick those who do not obey) and allows
us to avoid userspace blocking forward progress with e.g. unbounded
MI_SEMAPHORE_WAIT.

For the starting point here, we use the jiffie as our timeslice so that
we should be reasonably efficient wrt frequent CPU wakeups.

Testcase: igt/gem_exec_scheduler/semaphore-resolve
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   6 +
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 111 +++++++++++++++++++
 drivers/gpu/drm/i915/gt/selftest_lrc.c       |   3 +
 drivers/gpu/drm/i915/i915_scheduler.c        |   1 +
 drivers/gpu/drm/i915/i915_scheduler_types.h  |   1 +
 5 files changed, 122 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 30292e17b827..ca28ffbcf43c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -12,6 +12,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/llist.h>
+#include <linux/timer.h>
 #include <linux/types.h>
 
 #include "i915_gem.h"
@@ -137,6 +138,11 @@ struct intel_engine_execlists {
 	 */
 	struct tasklet_struct tasklet;
 
+	/**
+	 * @timer: kick the current context if its timeslice expires
+	 */
+	struct timer_list timer;
+
 	/**
 	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
 	 */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f3b36a7296e1..e140c2a301a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -835,6 +835,82 @@ last_active(const struct intel_engine_execlists *execlists)
 	return *last;
 }
 
+static void
+defer_request(struct i915_request * const rq, struct list_head * const pl)
+{
+	struct i915_dependency *p;
+
+	/*
+	 * We want to move the interrupted request to the back of
+	 * the round-robin list (i.e. its priority level), but
+	 * in doing so, we must then move all requests that were in
+	 * flight and were waiting for the interrupted request to
+	 * be run after it again.
+	 */
+	list_move_tail(&rq->sched.link, pl);
+
+	list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+		struct i915_request *w =
+			container_of(p->waiter, typeof(*w), sched);
+
+		if (!i915_sw_fence_done(&w->submit))
+			continue;
+
+		/* Leave semaphores spinning on the other engines */
+		if (w->engine != rq->engine)
+			continue;
+
+		/* No waiter should start before the active request completed */
+		GEM_BUG_ON(i915_request_started(w));
+
+		GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
+		if (rq_prio(w) < rq_prio(rq))
+			continue;
+
+		/*
+		 * This should be very shallow as it is limited by the
+		 * number of requests that can fit in a ring (<64) and
+		 * the number of contexts that can be in flight on this
+		 * engine.
+		 */
+		defer_request(w, pl);
+	}
+}
+
+static void defer_active(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = __unwind_incomplete_requests(engine);
+	if (!rq)
+		return;
+
+	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
+}
+
+static bool
+need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+	int hint;
+
+	if (list_is_last(&rq->sched.link, &engine->active.requests))
+		return false;
+
+	hint = max(rq_prio(list_next_entry(rq, sched.link)),
+		   queue_prio(&engine->execlists));
+	hint |= __NO_PREEMPTION;
+
+	return hint >= rq_prio(rq);
+}
+
+static bool
+enable_timeslice(struct intel_engine_cs *engine)
+{
+	struct i915_request *last = last_active(&engine->execlists);
+
+	return last && need_timeslice(engine, last);
+}
+
 static bool execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -928,6 +1004,27 @@ static bool execlists_dequeue(struct intel_engine_cs *engine)
 			 */
 			last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 			last = NULL;
+		} else if (need_timeslice(engine, last) &&
+			   !timer_pending(&engine->execlists.timer)) {
+			GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
+				  engine->name,
+				  last->fence.context,
+				  last->fence.seqno,
+				  last->sched.attr.priority,
+				  execlists->queue_priority_hint);
+
+			ring_suspend(engine) = 1;
+			defer_active(engine);
+
+			/*
+			 * Unlike for preemption, if we rewind and continue
+			 * executing the same context as previously active,
+			 * the order of execution will remain the same and
+			 * the tail will only advance. We do not need to
+			 * force a full context restore, as a lite-restore
+			 * is sufficient to resample the monotonic TAIL.
+			 */
+			last = NULL;
 		} else {
 			/*
 			 * Otherwise if we already have a request pending
@@ -1249,6 +1346,9 @@ static void process_csb(struct intel_engine_cs *engine)
 				       sizeof(*execlists->pending));
 			execlists->pending[0] = NULL;
 
+			if (enable_timeslice(engine))
+				mod_timer(&execlists->timer, jiffies + 1);
+
 			if (!inject_preempt_hang(execlists))
 				ring_suspend(engine) = 0;
 		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
@@ -1325,6 +1425,15 @@ static void execlists_submission_tasklet(unsigned long data)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+static void execlists_submission_timer(struct timer_list *timer)
+{
+	struct intel_engine_cs *engine =
+		from_timer(engine, timer, execlists.timer);
+
+	/* Kick the tasklet for some interrupt coalescing and reset handling */
+	tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
 static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
@@ -2563,6 +2672,7 @@ static int gen8_init_rcs_context(struct i915_request *rq)
 
 static void execlists_park(struct intel_engine_cs *engine)
 {
+	del_timer_sync(&engine->execlists.timer);
 	tasklet_kill(&engine->execlists.tasklet);
 }
 
@@ -2661,6 +2771,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
+	timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
 
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 9edb6662b903..8a0c706e9126 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -396,6 +396,9 @@ static int live_late_preempt(void *arg)
 	if (!ctx_lo)
 		goto err_ctx_hi;
 
+	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
+	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
+
 	for_each_engine(engine, i915, id) {
 		struct igt_live_test t;
 		struct i915_request *rq;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 601aae909491..7d0c14a5e687 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -71,6 +71,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 		list_add(&dep->wait_link, &signal->waiters_list);
 		list_add(&dep->signal_link, &node->signalers_list);
 		dep->signaler = signal;
+		dep->waiter = node;
 		dep->flags = flags;
 
 		/* Keep track of whether anyone on this chain has a semaphore */
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 166a457884b2..21fb9cd81fcb 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -62,6 +62,7 @@ struct i915_sched_node {
 
 struct i915_dependency {
 	struct i915_sched_node *signaler;
+	struct i915_sched_node *waiter;
 	struct list_head signal_link;
 	struct list_head wait_link;
 	struct list_head dfs_link;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* Re: [PATCH 14/45] drm/i915: Make engine_mask & num_engines static
  2019-04-25  9:19 ` [PATCH 14/45] drm/i915: Make engine_mask & num_engines static Chris Wilson
@ 2019-04-25  9:30   ` Jani Nikula
  2019-04-25 10:20   ` Tvrtko Ursulin
  1 sibling, 0 replies; 74+ messages in thread
From: Jani Nikula @ 2019-04-25  9:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Thu, 25 Apr 2019, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Having removed the urge to modify the engine_mask at runtime, we can
> promote the num_engines from a runtime calculation to a static and push
> it into the device_info tables.

\o/

Acked-by: Jani Nikula <jani.nikula@intel.com>




>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  3 --
>  drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  2 +-
>  drivers/gpu/drm/i915/gt/selftest_lrc.c        |  4 +-
>  drivers/gpu/drm/i915/i915_pci.c               | 45 +++++++++----------
>  drivers/gpu/drm/i915/intel_device_info.h      |  3 +-
>  .../gpu/drm/i915/selftests/i915_gem_context.c |  4 +-
>  drivers/gpu/drm/i915/selftests/i915_request.c |  2 +-
>  7 files changed, 27 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 862cf1040f88..b2cd6c6785be 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -383,9 +383,6 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
>  		goto cleanup;
>  	}
>  
> -	RUNTIME_INFO(i915)->num_engines =
> -		hweight32(INTEL_INFO(i915)->engine_mask);
> -
>  	i915_check_and_clear_faults(i915);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 7a4804c47466..2edeedb748ed 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1568,7 +1568,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>  	struct intel_engine_cs *engine = rq->engine;
>  	enum intel_engine_id id;
>  	const int num_engines =
> -		IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
> +		IS_HSW_GT1(i915) ? INTEL_INFO(i915)->num_engines - 1 : 0;
>  	bool force_restore = false;
>  	int len;
>  	u32 *cs;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 84538f69185b..6f223f7facde 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1185,7 +1185,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
>  
>  	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
>  		count, flags,
> -		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
> +		INTEL_INFO(smoke->i915)->num_engines, smoke->ncontext);
>  	return 0;
>  }
>  
> @@ -1213,7 +1213,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
>  
>  	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
>  		count, flags,
> -		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
> +		INTEL_INFO(smoke->i915)->num_engines, smoke->ncontext);
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index ffa2ee70a03d..431a4a2c20e1 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -35,6 +35,7 @@
>  
>  #define PLATFORM(x) .platform = (x)
>  #define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1)
> +#define ENGINES(x) .engine_mask = (x), .num_engines = hweight8(x)
>  
>  #define I845_PIPE_OFFSETS \
>  	.pipe_offsets = { \
> @@ -145,6 +146,7 @@
>  
>  #define I830_FEATURES \
>  	GEN(2), \
> +	ENGINES(BIT(RCS0)), \
>  	.is_mobile = 1, \
>  	.num_pipes = 2, \
>  	.display.has_overlay = 1, \
> @@ -154,7 +156,6 @@
>  	.gpu_reset_clobbers_display = true, \
>  	.hws_needs_physical = 1, \
>  	.unfenced_needs_alignment = 1, \
> -	.engine_mask = BIT(RCS0), \
>  	.has_snoop = true, \
>  	.has_coherent_ggtt = false, \
>  	I9XX_PIPE_OFFSETS, \
> @@ -164,6 +165,7 @@
>  
>  #define I845_FEATURES \
>  	GEN(2), \
> +	ENGINES(BIT(RCS0)), \
>  	.num_pipes = 1, \
>  	.display.has_overlay = 1, \
>  	.display.overlay_needs_physical = 1, \
> @@ -171,7 +173,6 @@
>  	.gpu_reset_clobbers_display = true, \
>  	.hws_needs_physical = 1, \
>  	.unfenced_needs_alignment = 1, \
> -	.engine_mask = BIT(RCS0), \
>  	.has_snoop = true, \
>  	.has_coherent_ggtt = false, \
>  	I845_PIPE_OFFSETS, \
> @@ -202,10 +203,10 @@ static const struct intel_device_info intel_i865g_info = {
>  
>  #define GEN3_FEATURES \
>  	GEN(3), \
> +	ENGINES(BIT(RCS0)), \
>  	.num_pipes = 2, \
>  	.display.has_gmch = 1, \
>  	.gpu_reset_clobbers_display = true, \
> -	.engine_mask = BIT(RCS0), \
>  	.has_snoop = true, \
>  	.has_coherent_ggtt = true, \
>  	I9XX_PIPE_OFFSETS, \
> @@ -286,11 +287,11 @@ static const struct intel_device_info intel_pineview_m_info = {
>  
>  #define GEN4_FEATURES \
>  	GEN(4), \
> +	ENGINES(BIT(RCS0)), \
>  	.num_pipes = 2, \
>  	.display.has_hotplug = 1, \
>  	.display.has_gmch = 1, \
>  	.gpu_reset_clobbers_display = true, \
> -	.engine_mask = BIT(RCS0), \
>  	.has_snoop = true, \
>  	.has_coherent_ggtt = true, \
>  	I9XX_PIPE_OFFSETS, \
> @@ -320,25 +321,25 @@ static const struct intel_device_info intel_i965gm_info = {
>  static const struct intel_device_info intel_g45_info = {
>  	GEN4_FEATURES,
>  	PLATFORM(INTEL_G45),
> -	.engine_mask = BIT(RCS0) | BIT(VCS0),
> +	ENGINES(BIT(RCS0) | BIT(VCS0)),
>  	.gpu_reset_clobbers_display = false,
>  };
>  
>  static const struct intel_device_info intel_gm45_info = {
>  	GEN4_FEATURES,
>  	PLATFORM(INTEL_GM45),
> +	ENGINES(BIT(RCS0) | BIT(VCS0)),
>  	.is_mobile = 1,
>  	.display.has_fbc = 1,
>  	.display.supports_tv = 1,
> -	.engine_mask = BIT(RCS0) | BIT(VCS0),
>  	.gpu_reset_clobbers_display = false,
>  };
>  
>  #define GEN5_FEATURES \
>  	GEN(5), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0)), \
>  	.num_pipes = 2, \
>  	.display.has_hotplug = 1, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0), \
>  	.has_snoop = true, \
>  	.has_coherent_ggtt = true, \
>  	/* ilk does support rc6, but we do not implement [power] contexts */ \
> @@ -362,10 +363,10 @@ static const struct intel_device_info intel_ironlake_m_info = {
>  
>  #define GEN6_FEATURES \
>  	GEN(6), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0)), \
>  	.num_pipes = 2, \
>  	.display.has_hotplug = 1, \
>  	.display.has_fbc = 1, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
>  	.has_coherent_ggtt = true, \
>  	.has_llc = 1, \
>  	.has_rc6 = 1, \
> @@ -410,10 +411,10 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
>  
>  #define GEN7_FEATURES  \
>  	GEN(7), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0)), \
>  	.num_pipes = 3, \
>  	.display.has_hotplug = 1, \
>  	.display.has_fbc = 1, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
>  	.has_coherent_ggtt = true, \
>  	.has_llc = 1, \
>  	.has_rc6 = 1, \
> @@ -468,6 +469,7 @@ static const struct intel_device_info intel_ivybridge_q_info = {
>  static const struct intel_device_info intel_valleyview_info = {
>  	PLATFORM(INTEL_VALLEYVIEW),
>  	GEN(7),
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0)),
>  	.is_lp = 1,
>  	.num_pipes = 2,
>  	.has_runtime_pm = 1,
> @@ -479,7 +481,6 @@ static const struct intel_device_info intel_valleyview_info = {
>  	.ppgtt_size = 31,
>  	.has_snoop = true,
>  	.has_coherent_ggtt = false,
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0),
>  	.display_mmio_offset = VLV_DISPLAY_BASE,
>  	I9XX_PIPE_OFFSETS,
>  	I9XX_CURSOR_OFFSETS,
> @@ -489,7 +490,7 @@ static const struct intel_device_info intel_valleyview_info = {
>  
>  #define G75_FEATURES  \
>  	GEN7_FEATURES, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0)), \
>  	.display.has_ddi = 1, \
>  	.has_fpga_dbg = 1, \
>  	.display.has_psr = 1, \
> @@ -553,18 +554,17 @@ static const struct intel_device_info intel_broadwell_rsvd_info = {
>  
>  static const struct intel_device_info intel_broadwell_gt3_info = {
>  	BDW_PLATFORM,
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)),
>  	.gt = 3,
> -	.engine_mask =
> -		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
>  };
>  
>  static const struct intel_device_info intel_cherryview_info = {
>  	PLATFORM(INTEL_CHERRYVIEW),
>  	GEN(8),
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0)),
>  	.num_pipes = 3,
>  	.display.has_hotplug = 1,
>  	.is_lp = 1,
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0),
>  	.has_64bit_reloc = 1,
>  	.has_runtime_pm = 1,
>  	.has_rc6 = 1,
> @@ -616,9 +616,7 @@ static const struct intel_device_info intel_skylake_gt2_info = {
>  
>  #define SKL_GT3_PLUS_PLATFORM \
>  	SKL_PLATFORM, \
> -	.engine_mask = \
> -		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)
> -
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1))
>  
>  static const struct intel_device_info intel_skylake_gt3_info = {
>  	SKL_GT3_PLUS_PLATFORM,
> @@ -632,9 +630,9 @@ static const struct intel_device_info intel_skylake_gt4_info = {
>  
>  #define GEN9_LP_FEATURES \
>  	GEN(9), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0)), \
>  	.is_lp = 1, \
>  	.display.has_hotplug = 1, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \
>  	.num_pipes = 3, \
>  	.has_64bit_reloc = 1, \
>  	.display.has_ddi = 1, \
> @@ -689,9 +687,8 @@ static const struct intel_device_info intel_kabylake_gt2_info = {
>  
>  static const struct intel_device_info intel_kabylake_gt3_info = {
>  	KBL_PLATFORM,
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)),
>  	.gt = 3,
> -	.engine_mask =
> -		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
>  };
>  
>  #define CFL_PLATFORM \
> @@ -710,9 +707,8 @@ static const struct intel_device_info intel_coffeelake_gt2_info = {
>  
>  static const struct intel_device_info intel_coffeelake_gt3_info = {
>  	CFL_PLATFORM,
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)),
>  	.gt = 3,
> -	.engine_mask =
> -		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
>  };
>  
>  #define GEN10_FEATURES \
> @@ -754,15 +750,14 @@ static const struct intel_device_info intel_cannonlake_info = {
>  static const struct intel_device_info intel_icelake_11_info = {
>  	GEN11_FEATURES,
>  	PLATFORM(INTEL_ICELAKE),
> -	.engine_mask =
> -		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
> +	ENGINES(BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2)),
>  };
>  
>  static const struct intel_device_info intel_elkhartlake_info = {
>  	GEN11_FEATURES,
>  	PLATFORM(INTEL_ELKHARTLAKE),
> +	ENGINES(BIT(RCS0) | BIT(BCS0) | BIT(VCS0)),
>  	.is_alpha_support = 1,
> -	.engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0),
>  	.ppgtt_size = 36,
>  };
>  
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 5a2e17d6146b..0deda1efd37d 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -149,6 +149,7 @@ struct intel_device_info {
>  	u8 gen;
>  	u8 gt; /* GT number, 0 if undefined */
>  	intel_engine_mask_t engine_mask; /* Engines supported by the HW */
> +	u8 num_engines;
>  
>  	enum intel_platform platform;
>  
> @@ -202,8 +203,6 @@ struct intel_runtime_info {
>  	u8 num_sprites[I915_MAX_PIPES];
>  	u8 num_scalers[I915_MAX_PIPES];
>  
> -	u8 num_engines;
> -
>  	/* Slice/subslice/EU info */
>  	struct sseu_dev_info sseu;
>  
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index b62f005e4d50..43d014472ac6 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1257,7 +1257,7 @@ static int igt_ctx_readonly(void *arg)
>  		}
>  	}
>  	pr_info("Submitted %lu dwords (across %u engines)\n",
> -		ndwords, RUNTIME_INFO(i915)->num_engines);
> +		ndwords, INTEL_INFO(i915)->num_engines);
>  
>  	dw = 0;
>  	idx = 0;
> @@ -1586,7 +1586,7 @@ static int igt_vm_isolation(void *arg)
>  		count += this;
>  	}
>  	pr_info("Checked %lu scratch offsets across %d engines\n",
> -		count, RUNTIME_INFO(i915)->num_engines);
> +		count, INTEL_INFO(i915)->num_engines);
>  
>  out_rpm:
>  	intel_runtime_pm_put(i915, wakeref);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index b60591531e4a..8fe9a43c99d9 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -1211,7 +1211,7 @@ static int live_breadcrumbs_smoketest(void *arg)
>  		num_fences += atomic_long_read(&t[id].num_fences);
>  	}
>  	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
> -		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
> +		num_waits, num_fences, INTEL_INFO(i915)->num_engines, ncpus);
>  
>  	mutex_lock(&i915->drm.struct_mutex);
>  	ret = igt_live_test_end(&live) ?: ret;

-- 
Jani Nikula, Intel Open Source Graphics Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 14/45] drm/i915: Make engine_mask & num_engines static
  2019-04-25  9:19 ` [PATCH 14/45] drm/i915: Make engine_mask & num_engines static Chris Wilson
  2019-04-25  9:30   ` Jani Nikula
@ 2019-04-25 10:20   ` Tvrtko Ursulin
  2019-04-25 10:30     ` Chris Wilson
  1 sibling, 1 reply; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-25 10:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula


On 25/04/2019 10:19, Chris Wilson wrote:
> Having removed the urge to modify the engine_mask at runtime, we can
> promote the num_engines from a runtime calculation to a static and push
> it into the device_info tables.

What about fused off engines (intel_device_info_init_mmio)?

I don't see the patch touching that so it doesn't remove the runtime 
modification of engine_mask and also makes num_engines wrong in those cases.

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  3 --
>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  2 +-
>   drivers/gpu/drm/i915/gt/selftest_lrc.c        |  4 +-
>   drivers/gpu/drm/i915/i915_pci.c               | 45 +++++++++----------
>   drivers/gpu/drm/i915/intel_device_info.h      |  3 +-
>   .../gpu/drm/i915/selftests/i915_gem_context.c |  4 +-
>   drivers/gpu/drm/i915/selftests/i915_request.c |  2 +-
>   7 files changed, 27 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 862cf1040f88..b2cd6c6785be 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -383,9 +383,6 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
>   		goto cleanup;
>   	}
>   
> -	RUNTIME_INFO(i915)->num_engines =
> -		hweight32(INTEL_INFO(i915)->engine_mask);
> -
>   	i915_check_and_clear_faults(i915);
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 7a4804c47466..2edeedb748ed 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1568,7 +1568,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>   	struct intel_engine_cs *engine = rq->engine;
>   	enum intel_engine_id id;
>   	const int num_engines =
> -		IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
> +		IS_HSW_GT1(i915) ? INTEL_INFO(i915)->num_engines - 1 : 0;
>   	bool force_restore = false;
>   	int len;
>   	u32 *cs;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 84538f69185b..6f223f7facde 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1185,7 +1185,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
>   
>   	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
>   		count, flags,
> -		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
> +		INTEL_INFO(smoke->i915)->num_engines, smoke->ncontext);
>   	return 0;
>   }
>   
> @@ -1213,7 +1213,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
>   
>   	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
>   		count, flags,
> -		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
> +		INTEL_INFO(smoke->i915)->num_engines, smoke->ncontext);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index ffa2ee70a03d..431a4a2c20e1 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -35,6 +35,7 @@
>   
>   #define PLATFORM(x) .platform = (x)
>   #define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1)
> +#define ENGINES(x) .engine_mask = (x), .num_engines = hweight8(x)
>   
>   #define I845_PIPE_OFFSETS \
>   	.pipe_offsets = { \
> @@ -145,6 +146,7 @@
>   
>   #define I830_FEATURES \
>   	GEN(2), \
> +	ENGINES(BIT(RCS0)), \
>   	.is_mobile = 1, \
>   	.num_pipes = 2, \
>   	.display.has_overlay = 1, \
> @@ -154,7 +156,6 @@
>   	.gpu_reset_clobbers_display = true, \
>   	.hws_needs_physical = 1, \
>   	.unfenced_needs_alignment = 1, \
> -	.engine_mask = BIT(RCS0), \
>   	.has_snoop = true, \
>   	.has_coherent_ggtt = false, \
>   	I9XX_PIPE_OFFSETS, \
> @@ -164,6 +165,7 @@
>   
>   #define I845_FEATURES \
>   	GEN(2), \
> +	ENGINES(BIT(RCS0)), \
>   	.num_pipes = 1, \
>   	.display.has_overlay = 1, \
>   	.display.overlay_needs_physical = 1, \
> @@ -171,7 +173,6 @@
>   	.gpu_reset_clobbers_display = true, \
>   	.hws_needs_physical = 1, \
>   	.unfenced_needs_alignment = 1, \
> -	.engine_mask = BIT(RCS0), \
>   	.has_snoop = true, \
>   	.has_coherent_ggtt = false, \
>   	I845_PIPE_OFFSETS, \
> @@ -202,10 +203,10 @@ static const struct intel_device_info intel_i865g_info = {
>   
>   #define GEN3_FEATURES \
>   	GEN(3), \
> +	ENGINES(BIT(RCS0)), \
>   	.num_pipes = 2, \
>   	.display.has_gmch = 1, \
>   	.gpu_reset_clobbers_display = true, \
> -	.engine_mask = BIT(RCS0), \
>   	.has_snoop = true, \
>   	.has_coherent_ggtt = true, \
>   	I9XX_PIPE_OFFSETS, \
> @@ -286,11 +287,11 @@ static const struct intel_device_info intel_pineview_m_info = {
>   
>   #define GEN4_FEATURES \
>   	GEN(4), \
> +	ENGINES(BIT(RCS0)), \
>   	.num_pipes = 2, \
>   	.display.has_hotplug = 1, \
>   	.display.has_gmch = 1, \
>   	.gpu_reset_clobbers_display = true, \
> -	.engine_mask = BIT(RCS0), \
>   	.has_snoop = true, \
>   	.has_coherent_ggtt = true, \
>   	I9XX_PIPE_OFFSETS, \
> @@ -320,25 +321,25 @@ static const struct intel_device_info intel_i965gm_info = {
>   static const struct intel_device_info intel_g45_info = {
>   	GEN4_FEATURES,
>   	PLATFORM(INTEL_G45),
> -	.engine_mask = BIT(RCS0) | BIT(VCS0),
> +	ENGINES(BIT(RCS0) | BIT(VCS0)),
>   	.gpu_reset_clobbers_display = false,
>   };
>   
>   static const struct intel_device_info intel_gm45_info = {
>   	GEN4_FEATURES,
>   	PLATFORM(INTEL_GM45),
> +	ENGINES(BIT(RCS0) | BIT(VCS0)),
>   	.is_mobile = 1,
>   	.display.has_fbc = 1,
>   	.display.supports_tv = 1,
> -	.engine_mask = BIT(RCS0) | BIT(VCS0),
>   	.gpu_reset_clobbers_display = false,
>   };
>   
>   #define GEN5_FEATURES \
>   	GEN(5), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0)), \
>   	.num_pipes = 2, \
>   	.display.has_hotplug = 1, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0), \
>   	.has_snoop = true, \
>   	.has_coherent_ggtt = true, \
>   	/* ilk does support rc6, but we do not implement [power] contexts */ \
> @@ -362,10 +363,10 @@ static const struct intel_device_info intel_ironlake_m_info = {
>   
>   #define GEN6_FEATURES \
>   	GEN(6), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0)), \
>   	.num_pipes = 2, \
>   	.display.has_hotplug = 1, \
>   	.display.has_fbc = 1, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
>   	.has_coherent_ggtt = true, \
>   	.has_llc = 1, \
>   	.has_rc6 = 1, \
> @@ -410,10 +411,10 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
>   
>   #define GEN7_FEATURES  \
>   	GEN(7), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0)), \
>   	.num_pipes = 3, \
>   	.display.has_hotplug = 1, \
>   	.display.has_fbc = 1, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
>   	.has_coherent_ggtt = true, \
>   	.has_llc = 1, \
>   	.has_rc6 = 1, \
> @@ -468,6 +469,7 @@ static const struct intel_device_info intel_ivybridge_q_info = {
>   static const struct intel_device_info intel_valleyview_info = {
>   	PLATFORM(INTEL_VALLEYVIEW),
>   	GEN(7),
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0)),
>   	.is_lp = 1,
>   	.num_pipes = 2,
>   	.has_runtime_pm = 1,
> @@ -479,7 +481,6 @@ static const struct intel_device_info intel_valleyview_info = {
>   	.ppgtt_size = 31,
>   	.has_snoop = true,
>   	.has_coherent_ggtt = false,
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0),
>   	.display_mmio_offset = VLV_DISPLAY_BASE,
>   	I9XX_PIPE_OFFSETS,
>   	I9XX_CURSOR_OFFSETS,
> @@ -489,7 +490,7 @@ static const struct intel_device_info intel_valleyview_info = {
>   
>   #define G75_FEATURES  \
>   	GEN7_FEATURES, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0)), \
>   	.display.has_ddi = 1, \
>   	.has_fpga_dbg = 1, \
>   	.display.has_psr = 1, \
> @@ -553,18 +554,17 @@ static const struct intel_device_info intel_broadwell_rsvd_info = {
>   
>   static const struct intel_device_info intel_broadwell_gt3_info = {
>   	BDW_PLATFORM,
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)),
>   	.gt = 3,
> -	.engine_mask =
> -		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
>   };
>   
>   static const struct intel_device_info intel_cherryview_info = {
>   	PLATFORM(INTEL_CHERRYVIEW),
>   	GEN(8),
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0)),
>   	.num_pipes = 3,
>   	.display.has_hotplug = 1,
>   	.is_lp = 1,
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0),
>   	.has_64bit_reloc = 1,
>   	.has_runtime_pm = 1,
>   	.has_rc6 = 1,
> @@ -616,9 +616,7 @@ static const struct intel_device_info intel_skylake_gt2_info = {
>   
>   #define SKL_GT3_PLUS_PLATFORM \
>   	SKL_PLATFORM, \
> -	.engine_mask = \
> -		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)
> -
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1))
>   
>   static const struct intel_device_info intel_skylake_gt3_info = {
>   	SKL_GT3_PLUS_PLATFORM,
> @@ -632,9 +630,9 @@ static const struct intel_device_info intel_skylake_gt4_info = {
>   
>   #define GEN9_LP_FEATURES \
>   	GEN(9), \
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0)), \
>   	.is_lp = 1, \
>   	.display.has_hotplug = 1, \
> -	.engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \
>   	.num_pipes = 3, \
>   	.has_64bit_reloc = 1, \
>   	.display.has_ddi = 1, \
> @@ -689,9 +687,8 @@ static const struct intel_device_info intel_kabylake_gt2_info = {
>   
>   static const struct intel_device_info intel_kabylake_gt3_info = {
>   	KBL_PLATFORM,
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)),
>   	.gt = 3,
> -	.engine_mask =
> -		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
>   };
>   
>   #define CFL_PLATFORM \
> @@ -710,9 +707,8 @@ static const struct intel_device_info intel_coffeelake_gt2_info = {
>   
>   static const struct intel_device_info intel_coffeelake_gt3_info = {
>   	CFL_PLATFORM,
> +	ENGINES(BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1)),
>   	.gt = 3,
> -	.engine_mask =
> -		BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS1),
>   };
>   
>   #define GEN10_FEATURES \
> @@ -754,15 +750,14 @@ static const struct intel_device_info intel_cannonlake_info = {
>   static const struct intel_device_info intel_icelake_11_info = {
>   	GEN11_FEATURES,
>   	PLATFORM(INTEL_ICELAKE),
> -	.engine_mask =
> -		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
> +	ENGINES(BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2)),
>   };
>   
>   static const struct intel_device_info intel_elkhartlake_info = {
>   	GEN11_FEATURES,
>   	PLATFORM(INTEL_ELKHARTLAKE),
> +	ENGINES(BIT(RCS0) | BIT(BCS0) | BIT(VCS0)),
>   	.is_alpha_support = 1,
> -	.engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0),
>   	.ppgtt_size = 36,
>   };
>   
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 5a2e17d6146b..0deda1efd37d 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -149,6 +149,7 @@ struct intel_device_info {
>   	u8 gen;
>   	u8 gt; /* GT number, 0 if undefined */
>   	intel_engine_mask_t engine_mask; /* Engines supported by the HW */
> +	u8 num_engines;
>   
>   	enum intel_platform platform;
>   
> @@ -202,8 +203,6 @@ struct intel_runtime_info {
>   	u8 num_sprites[I915_MAX_PIPES];
>   	u8 num_scalers[I915_MAX_PIPES];
>   
> -	u8 num_engines;
> -
>   	/* Slice/subslice/EU info */
>   	struct sseu_dev_info sseu;
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index b62f005e4d50..43d014472ac6 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1257,7 +1257,7 @@ static int igt_ctx_readonly(void *arg)
>   		}
>   	}
>   	pr_info("Submitted %lu dwords (across %u engines)\n",
> -		ndwords, RUNTIME_INFO(i915)->num_engines);
> +		ndwords, INTEL_INFO(i915)->num_engines);
>   
>   	dw = 0;
>   	idx = 0;
> @@ -1586,7 +1586,7 @@ static int igt_vm_isolation(void *arg)
>   		count += this;
>   	}
>   	pr_info("Checked %lu scratch offsets across %d engines\n",
> -		count, RUNTIME_INFO(i915)->num_engines);
> +		count, INTEL_INFO(i915)->num_engines);
>   
>   out_rpm:
>   	intel_runtime_pm_put(i915, wakeref);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index b60591531e4a..8fe9a43c99d9 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -1211,7 +1211,7 @@ static int live_breadcrumbs_smoketest(void *arg)
>   		num_fences += atomic_long_read(&t[id].num_fences);
>   	}
>   	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
> -		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
> +		num_waits, num_fences, INTEL_INFO(i915)->num_engines, ncpus);
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   	ret = igt_live_test_end(&live) ?: ret;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 14/45] drm/i915: Make engine_mask & num_engines static
  2019-04-25 10:20   ` Tvrtko Ursulin
@ 2019-04-25 10:30     ` Chris Wilson
  2019-04-25 10:43       ` Tvrtko Ursulin
  0 siblings, 1 reply; 74+ messages in thread
From: Chris Wilson @ 2019-04-25 10:30 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: Jani Nikula

Quoting Tvrtko Ursulin (2019-04-25 11:20:47)
> 
> On 25/04/2019 10:19, Chris Wilson wrote:
> > Having removed the urge to modify the engine_mask at runtime, we can
> > promote the num_engines from a runtime calculation to a static and push
> > it into the device_info tables.
> 
> What about fused off engines (intel_device_info_init_mmio)?

Didn't notice. :((

Plan B was just to calculate num_engines at point of use, since it is
rare enough.

Oh well, this pair of patches was just inspired because I touched the
cleanup code and not important.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (43 preceding siblings ...)
  2019-04-25  9:20 ` [PATCH 45/45] drm/i915/execlists: Minimalistic timeslicing Chris Wilson
@ 2019-04-25 10:35 ` Tvrtko Ursulin
  2019-04-25 10:42   ` Chris Wilson
  2019-04-25 11:22 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] " Patchwork
                   ` (6 subsequent siblings)
  51 siblings, 1 reply; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-25 10:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/04/2019 10:19, Chris Wilson wrote:
> Currently there is an underlying assumption that i915_request_unsubmit()
> is synchronous wrt the GPU -- that is the request is no longer in flight
> as we remove it. In the near future that may change, and this may upset
> our signaling as we can process an interrupt for that request while it
> is no longer in flight.
> 
> CPU0					CPU1
> intel_engine_breadcrumbs_irq
> (queue request completion)
> 					i915_request_cancel_signaling
> ...					...
> 					i915_request_enable_signaling
> dma_fence_signal
> 
> Hence in the time it took us to drop the lock to signal the request, a
> preemption event may have occurred and re-queued the request. In the
> process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
> so reused the rq->signal_link that was in use on CPU0, leading to bad
> pointer chasing in intel_engine_breadcrumbs_irq.
> 
> A related issue was that if someone started listening for a signal on a
> completed but no longer in-flight request, we missed the opportunity to
> immediately signal that request.
> 
> Furthermore, as intel_contexts may be immediately released during
> request retirement, in order to be entirely sure that
> intel_engine_breadcrumbs_irq may no longer dereference the intel_context
> (ce->signals and ce->signal_link), we must wait for irq spinlock.
> 
> In order to prevent the race, we use a bit in the fence.flags to signal
> the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
> For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
> quickly signals to any outside observer that the fence is indeed signaled.
> 
> Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/dma-buf/dma-fence.c                 |  1 +
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 58 +++++++++++++--------
>   drivers/gpu/drm/i915/i915_request.c         |  1 +
>   3 files changed, 39 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 3aa8733f832a..9bf06042619a 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -29,6 +29,7 @@
>   
>   EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
>   EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
>   
>   static DEFINE_SPINLOCK(dma_fence_stub_lock);
>   static struct dma_fence dma_fence_stub;
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 3cbffd400b1b..4283224249d4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -23,6 +23,7 @@
>    */
>   
>   #include <linux/kthread.h>
> +#include <trace/events/dma_fence.h>
>   #include <uapi/linux/sched/types.h>
>   
>   #include "i915_drv.h"
> @@ -83,6 +84,7 @@ static inline bool __request_completed(const struct i915_request *rq)
>   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   {
>   	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	const ktime_t timestamp = ktime_get();
>   	struct intel_context *ce, *cn;
>   	struct list_head *pos, *next;
>   	LIST_HEAD(signal);
> @@ -104,6 +106,11 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   
>   			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
>   					     &rq->fence.flags));
> +			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +
> +			if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +					     &rq->fence.flags))
> +				continue;
>   
>   			/*
>   			 * Queue for execution after dropping the signaling
> @@ -111,14 +118,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   			 * more signalers to the same context or engine.
>   			 */
>   			i915_request_get(rq);
> -
> -			/*
> -			 * We may race with direct invocation of
> -			 * dma_fence_signal(), e.g. i915_request_retire(),
> -			 * so we need to acquire our reference to the request
> -			 * before we cancel the breadcrumb.
> -			 */
> -			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   			list_add_tail(&rq->signal_link, &signal);
>   		}
>   
> @@ -140,8 +139,21 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   	list_for_each_safe(pos, next, &signal) {
>   		struct i915_request *rq =
>   			list_entry(pos, typeof(*rq), signal_link);
> +		struct dma_fence_cb *cur, *tmp;
> +
> +		trace_dma_fence_signaled(&rq->fence);
> +
> +		rq->fence.timestamp = timestamp;
> +		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &rq->fence.flags);
> +
> +		spin_lock(&rq->lock);
> +		list_for_each_entry_safe(cur, tmp, &rq->fence.cb_list, node) {
> +			INIT_LIST_HEAD(&cur->node);
> +			cur->func(&rq->fence, cur);
> +		}
> +		INIT_LIST_HEAD(&rq->fence.cb_list);
> +		spin_unlock(&rq->lock);
>   
> -		dma_fence_signal(&rq->fence);

I posted some comments on this patch already. In essence it was a 
suggestion to not open-code-and-optimize dma_fence_signal, but split it 
into two low-level helpers and export from the parent location. Like 
__dma_fence_maybe/start_signal and __dma_fence_finish_signal.

bool __dma_fence_start_signal(...)
{
	return test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
				fence->flags);
}
EXPORT_SYMBOL(...)

void __dma_fence_finish_signal(...)
{
	trace_dma_fence_signaled...
	timestamp...
	callbacks...
}
EXPORT_SYMBOL(...)

This way we don't add coupling to low level implementation details in i915.

Regards,

Tvrtko

>   		i915_request_put(rq);
>   	}
>   }
> @@ -243,19 +255,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
>   
>   bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   {
> -	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
> -
> -	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
> -
> -	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
> -		return true;
> +	lockdep_assert_held(&rq->lock);
> +	lockdep_assert_irqs_disabled();
>   
> -	spin_lock(&b->irq_lock);
> -	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
> -	    !__request_completed(rq)) {
> +	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
> +		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>   		struct intel_context *ce = rq->hw_context;
>   		struct list_head *pos;
>   
> +		spin_lock(&b->irq_lock);
> +		GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
> +
>   		__intel_breadcrumbs_arm_irq(b);
>   
>   		/*
> @@ -284,8 +294,8 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   			list_move_tail(&ce->signal_link, &b->signalers);
>   
>   		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +		spin_unlock(&b->irq_lock);
>   	}
> -	spin_unlock(&b->irq_lock);
>   
>   	return !__request_completed(rq);
>   }
> @@ -294,9 +304,15 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>   {
>   	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>   
> -	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
> -		return;
> +	lockdep_assert_held(&rq->lock);
> +	lockdep_assert_irqs_disabled();
>   
> +	/*
> +	 * We must wait for b->irq_lock so that we know the interrupt handler
> +	 * has released its reference to the intel_context and has completed
> +	 * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
> +	 * required).
> +	 */
>   	spin_lock(&b->irq_lock);
>   	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
>   		struct intel_context *ce = rq->hw_context;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 5869c37a35e1..fa9037173f1d 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -417,6 +417,7 @@ void __i915_request_submit(struct i915_request *request)
>   	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
>   
>   	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
> +	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
>   	    !i915_request_enable_breadcrumb(request))
>   		intel_engine_queue_breadcrumbs(engine);
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-25 10:35 ` [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Tvrtko Ursulin
@ 2019-04-25 10:42   ` Chris Wilson
  2019-04-26 10:40     ` Tvrtko Ursulin
  0 siblings, 1 reply; 74+ messages in thread
From: Chris Wilson @ 2019-04-25 10:42 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-25 11:35:01)
> 
> On 25/04/2019 10:19, Chris Wilson wrote:
> > Currently there is an underlying assumption that i915_request_unsubmit()
> > is synchronous wrt the GPU -- that is the request is no longer in flight
> > as we remove it. In the near future that may change, and this may upset
> > our signaling as we can process an interrupt for that request while it
> > is no longer in flight.
> > 
> > CPU0                                  CPU1
> > intel_engine_breadcrumbs_irq
> > (queue request completion)
> >                                       i915_request_cancel_signaling
> > ...                                   ...
> >                                       i915_request_enable_signaling
> > dma_fence_signal
> > 
> > Hence in the time it took us to drop the lock to signal the request, a
> > preemption event may have occurred and re-queued the request. In the
> > process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
> > so reused the rq->signal_link that was in use on CPU0, leading to bad
> > pointer chasing in intel_engine_breadcrumbs_irq.
> > 
> > A related issue was that if someone started listening for a signal on a
> > completed but no longer in-flight request, we missed the opportunity to
> > immediately signal that request.
> > 
> > Furthermore, as intel_contexts may be immediately released during
> > request retirement, in order to be entirely sure that
> > intel_engine_breadcrumbs_irq may no longer dereference the intel_context
> > (ce->signals and ce->signal_link), we must wait for irq spinlock.
> > 
> > In order to prevent the race, we use a bit in the fence.flags to signal
> > the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
> > For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
> > quickly signals to any outside observer that the fence is indeed signaled.
> > 
> > Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/dma-buf/dma-fence.c                 |  1 +
> >   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 58 +++++++++++++--------
> >   drivers/gpu/drm/i915/i915_request.c         |  1 +
> >   3 files changed, 39 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> > index 3aa8733f832a..9bf06042619a 100644
> > --- a/drivers/dma-buf/dma-fence.c
> > +++ b/drivers/dma-buf/dma-fence.c
> > @@ -29,6 +29,7 @@
> >   
> >   EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
> >   EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
> > +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
> >   
> >   static DEFINE_SPINLOCK(dma_fence_stub_lock);
> >   static struct dma_fence dma_fence_stub;
> > diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > index 3cbffd400b1b..4283224249d4 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > @@ -23,6 +23,7 @@
> >    */
> >   
> >   #include <linux/kthread.h>
> > +#include <trace/events/dma_fence.h>
> >   #include <uapi/linux/sched/types.h>
> >   
> >   #include "i915_drv.h"
> > @@ -83,6 +84,7 @@ static inline bool __request_completed(const struct i915_request *rq)
> >   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
> >   {
> >       struct intel_breadcrumbs *b = &engine->breadcrumbs;
> > +     const ktime_t timestamp = ktime_get();
> >       struct intel_context *ce, *cn;
> >       struct list_head *pos, *next;
> >       LIST_HEAD(signal);
> > @@ -104,6 +106,11 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
> >   
> >                       GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
> >                                            &rq->fence.flags));
> > +                     clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> > +
> > +                     if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> > +                                          &rq->fence.flags))
> > +                             continue;
> >   
> >                       /*
> >                        * Queue for execution after dropping the signaling
> > @@ -111,14 +118,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
> >                        * more signalers to the same context or engine.
> >                        */
> >                       i915_request_get(rq);
> > -
> > -                     /*
> > -                      * We may race with direct invocation of
> > -                      * dma_fence_signal(), e.g. i915_request_retire(),
> > -                      * so we need to acquire our reference to the request
> > -                      * before we cancel the breadcrumb.
> > -                      */
> > -                     clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> >                       list_add_tail(&rq->signal_link, &signal);
> >               }
> >   
> > @@ -140,8 +139,21 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
> >       list_for_each_safe(pos, next, &signal) {
> >               struct i915_request *rq =
> >                       list_entry(pos, typeof(*rq), signal_link);
> > +             struct dma_fence_cb *cur, *tmp;
> > +
> > +             trace_dma_fence_signaled(&rq->fence);
> > +
> > +             rq->fence.timestamp = timestamp;
> > +             set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &rq->fence.flags);
> > +
> > +             spin_lock(&rq->lock);
> > +             list_for_each_entry_safe(cur, tmp, &rq->fence.cb_list, node) {
> > +                     INIT_LIST_HEAD(&cur->node);
> > +                     cur->func(&rq->fence, cur);
> > +             }
> > +             INIT_LIST_HEAD(&rq->fence.cb_list);
> > +             spin_unlock(&rq->lock);
> >   
> > -             dma_fence_signal(&rq->fence);
> 
> I posted some comments on this patch already. In essence it was a 
> suggestion to not open-code-and-optimize dma_fence_signal, but split it 
> into two low-level helpers and export from the parent location. Like 
> __dma_fence_maybe/start_signal and __dma_fence_finish_signal.
> 
> bool __dma_fence_start_signal(...)
> {
>         return test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>                                 fence->flags);
> }
> EXPORT_SYMBOL(...)
> 
> void __dma_fence_finish_signal(...)
> {
>         trace_dma_fence_signaled...
>         timestamp...
>         callbacks...
> }
> EXPORT_SYMBOL(...)
> 
> This way we don't add coupling to low level implementation details in i915.

You mean midlayer implementation details :-p

The alternative I mentioned was that we use a redundant bit. But that
has duplicity of purpose.

The patch needs to be stable-friendly for the fixes, so I opted for
taking control.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 14/45] drm/i915: Make engine_mask & num_engines static
  2019-04-25 10:30     ` Chris Wilson
@ 2019-04-25 10:43       ` Tvrtko Ursulin
  0 siblings, 0 replies; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-25 10:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Jani Nikula


On 25/04/2019 11:30, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-25 11:20:47)
>>
>> On 25/04/2019 10:19, Chris Wilson wrote:
>>> Having removed the urge to modify the engine_mask at runtime, we can
>>> promote the num_engines from a runtime calculation to a static and push
>>> it into the device_info tables.
>>
>> What about fused off engines (intel_device_info_init_mmio)?
> 
> Didn't notice. :((
> 
> Plan B was just to calculate num_engines at point of use, since it is
> rare enough.

I'd just leave it in runtime info for now. Don't see a benefit of moving it.

If I can find my last series which tried to eliminate mkwrite and reduce 
runtime info, there were some notes in the resulting threads about which 
fields have what problems associated with them. I only remember the gen 
field was looking like easy pickings..

Anyways...

> Oh well, this pair of patches was just inspired because I touched the
> cleanup code and not important.

.. please restrain the inspiration if ahead of media scalability 
otherwise we will never be done with it! :))

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 12/45] drm/i915: Move the engine->destroy() vfunc onto the engine
  2019-04-25  9:19 ` [PATCH 12/45] drm/i915: Move the engine->destroy() vfunc onto the engine Chris Wilson
@ 2019-04-25 11:01   ` Tvrtko Ursulin
  2019-04-25 11:24     ` Chris Wilson
  2019-04-25 11:29     ` [PATCH v2] " Chris Wilson
  0 siblings, 2 replies; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-25 11:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/04/2019 10:19, Chris Wilson wrote:
> Make the engine responsible for cleaning itself up!

Why? (Just a hint to explain in the commit message.)

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine.h       |  4 ++
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 63 ++++++++++----------
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 30 ++++------
>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c   | 35 +++++------
>   drivers/gpu/drm/i915/i915_drv.h              |  6 --
>   drivers/gpu/drm/i915/i915_gem.c              | 19 +-----
>   7 files changed, 66 insertions(+), 93 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 3e53f53bc52b..f5b0f27cecb6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -362,7 +362,11 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
>   	return (head - tail - CACHELINE_BYTES) & (size - 1);
>   }
>   
> +int intel_engines_init_mmio(struct drm_i915_private *i915);
>   int intel_engines_setup(struct drm_i915_private *i915);
> +int intel_engines_init(struct drm_i915_private *i915);
> +void intel_engines_cleanup(struct drm_i915_private *i915);
> +
>   int intel_engine_init_common(struct intel_engine_cs *engine);
>   void intel_engine_cleanup_common(struct intel_engine_cs *engine);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 4e30f3720eb7..65cdd0c4accc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -303,6 +303,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	engine->class = info->class;
>   	engine->instance = info->instance;
>   
> +	/*
> +	 * To be overridden by the backend on setup, however to facilitate
> +	 * cleanup on error during setup we always provide the destroy vfunc.
> +	 */
> +	engine->destroy = (typeof(engine->destroy))kfree;

Scheme overall is nice but has one fragile point in it.

I think we want a wrapper and then add some WARN_ON type safety if the 
engine has state allocated and the destroy vfunc accidentally still 
points to this one.

I suspect easiest would be to set an engine flag once it has been 
initialized.

> +
>   	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
>   
>   	engine->context_size = __intel_engine_context_size(dev_priv,
> @@ -327,18 +333,31 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	return 0;
>   }
>   
> +/**
> + * intel_engines_cleanup() - free the resources allocated for Command Streamers
> + * @dev_priv: i915 device private
> + */
> +void intel_engines_cleanup(struct drm_i915_private *i915)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	for_each_engine(engine, i915, id) {
> +		engine->destroy(engine);
> +		i915->engine[id] = NULL;
> +	}
> +}
> +
>   /**
>    * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
>    * @dev_priv: i915 device private
>    *
>    * Return: non-zero if the initialization failed.
>    */
> -int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
> +int intel_engines_init_mmio(struct drm_i915_private *i915)
>   {
> -	struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
> -	const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> +	struct intel_device_info *device_info = mkwrite_device_info(i915);
> +	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
>   	unsigned int mask = 0;
>   	unsigned int i;
>   	int err;
> @@ -351,10 +370,10 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
>   		return -ENODEV;
>   
>   	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
> -		if (!HAS_ENGINE(dev_priv, i))
> +		if (!HAS_ENGINE(i915, i))
>   			continue;
>   
> -		err = intel_engine_setup(dev_priv, i);
> +		err = intel_engine_setup(i915, i);

While nosing around I noticed there is mmio access in setup. :(

>   		if (err)
>   			goto cleanup;
>   
> @@ -370,20 +389,19 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
>   		device_info->engine_mask = mask;
>   
>   	/* We always presume we have at least RCS available for later probing */
> -	if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) {
> +	if (WARN_ON(!HAS_ENGINE(i915, RCS0))) {
>   		err = -ENODEV;
>   		goto cleanup;
>   	}
>   
> -	RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask);
> +	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
>   
> -	i915_check_and_clear_faults(dev_priv);
> +	i915_check_and_clear_faults(i915);
>   
>   	return 0;
>   
>   cleanup:
> -	for_each_engine(engine, dev_priv, id)
> -		kfree(engine);
> +	intel_engines_cleanup(i915);
>   	return err;
>   }
>   
> @@ -397,7 +415,7 @@ int intel_engines_init(struct drm_i915_private *i915)
>   {
>   	int (*init)(struct intel_engine_cs *engine);
>   	struct intel_engine_cs *engine;
> -	enum intel_engine_id id, err_id;
> +	enum intel_engine_id id;
>   	int err;
>   
>   	if (HAS_EXECLISTS(i915))
> @@ -406,8 +424,6 @@ int intel_engines_init(struct drm_i915_private *i915)
>   		init = intel_ring_submission_init;
>   
>   	for_each_engine(engine, i915, id) {
> -		err_id = id;
> -
>   		err = init(engine);
>   		if (err)
>   			goto cleanup;
> @@ -416,14 +432,7 @@ int intel_engines_init(struct drm_i915_private *i915)
>   	return 0;
>   
>   cleanup:
> -	for_each_engine(engine, i915, id) {
> -		if (id >= err_id) {
> -			kfree(engine);
> -			i915->engine[id] = NULL;
> -		} else {
> -			i915->gt.cleanup_engine(engine);
> -		}
> -	}
> +	intel_engines_cleanup(i915);
>   	return err;
>   }
>   
> @@ -609,13 +618,7 @@ int intel_engines_setup(struct drm_i915_private *i915)
>   	return 0;
>   
>   cleanup:
> -	for_each_engine(engine, i915, id) {
> -		if (engine->cops)
> -			i915->gt.cleanup_engine(engine);
> -		else
> -			kfree(engine);
> -		i915->engine[id] = NULL;
> -	}
> +	intel_engines_cleanup(i915);
>   	return err;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index d972c339309c..9d64e33f8427 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -420,7 +420,7 @@ struct intel_engine_cs {
>   	 */
>   	void		(*cancel_requests)(struct intel_engine_cs *engine);
>   
> -	void		(*cleanup)(struct intel_engine_cs *engine);
> +	void		(*destroy)(struct intel_engine_cs *engine);
>   
>   	struct intel_engine_execlists execlists;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index ea7794d368d1..2e74c792104e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2336,26 +2336,6 @@ static void execlists_park(struct intel_engine_cs *engine)
>   	tasklet_kill(&engine->execlists.tasklet);
>   }
>   
> -/**
> - * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
> - * @engine: Engine Command Streamer.
> - */
> -void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *i915 = engine->i915;
> -
> -	if (engine->cleanup)
> -		engine->cleanup(engine);
> -
> -	intel_engine_cleanup_common(engine);
> -
> -	lrc_destroy_wa_ctx(engine);
> -
> -	engine->i915 = NULL;
> -	i915->engine[engine->id] = NULL;
> -	kfree(engine);
> -}
> -
>   void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>   {
>   	engine->submit_request = execlists_submit_request;
> @@ -2378,10 +2358,20 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>   		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>   }
>   
> +static void execlists_destroy(struct intel_engine_cs *engine)
> +{
> +	intel_engine_cleanup_common(engine);
> +
> +	lrc_destroy_wa_ctx(engine);
> +	kfree(engine);
> +}
> +
>   static void
>   logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   {
>   	/* Default vfuncs which can be overriden by each engine. */
> +
> +	engine->destroy = execlists_destroy;
>   	engine->resume = execlists_resume;
>   
>   	engine->reset.prepare = execlists_reset_prepare;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 5bcd3034a101..7a4804c47466 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1520,25 +1520,6 @@ static const struct intel_context_ops ring_context_ops = {
>   	.destroy = ring_context_destroy,
>   };
>   
> -void intel_engine_cleanup(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
> -		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
> -
> -	intel_ring_unpin(engine->buffer);
> -	intel_ring_put(engine->buffer);
> -
> -	if (engine->cleanup)
> -		engine->cleanup(engine);
> -
> -	intel_engine_cleanup_common(engine);
> -
> -	dev_priv->engine[engine->id] = NULL;
> -	kfree(engine);
> -}
> -
>   static int load_pd_dir(struct i915_request *rq,
>   		       const struct i915_hw_ppgtt *ppgtt)
>   {
> @@ -2130,6 +2111,20 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
>   	engine->submit_request = gen6_bsd_submit_request;
>   }
>   
> +static void ring_destroy(struct intel_engine_cs *engine)
> +{
> +	struct drm_i915_private *dev_priv = engine->i915;
> +
> +	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
> +		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
> +
> +	intel_ring_unpin(engine->buffer);
> +	intel_ring_put(engine->buffer);
> +
> +	intel_engine_cleanup_common(engine);
> +	kfree(engine);
> +}
> +
>   static void setup_irq(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *i915 = engine->i915;
> @@ -2158,6 +2153,8 @@ static void setup_common(struct intel_engine_cs *engine)
>   
>   	setup_irq(engine);
>   
> +	engine->destroy = ring_destroy;
> +
>   	engine->resume = xcs_resume;
>   	engine->reset.prepare = reset_prepare;
>   	engine->reset.reset = reset_ring;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5c77bf5b735b..5ca4df9a7428 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1995,8 +1995,6 @@ struct drm_i915_private {
>   
>   	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
>   	struct {
> -		void (*cleanup_engine)(struct intel_engine_cs *engine);
> -
>   		struct i915_gt_timelines {
>   			struct mutex mutex; /* protects list, tainted by GPU */
>   			struct list_head active_list;
> @@ -2722,9 +2720,6 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>   extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>   int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
>   
> -int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
> -int intel_engines_init(struct drm_i915_private *dev_priv);
> -
>   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>   
>   /* intel_hotplug.c */
> @@ -3132,7 +3127,6 @@ int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
>   int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
>   void i915_gem_init_swizzling(struct drm_i915_private *dev_priv);
>   void i915_gem_fini(struct drm_i915_private *dev_priv);
> -void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
>   int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
>   			   unsigned int flags, long timeout);
>   void i915_gem_suspend(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4c1793b1012e..2fcec1bfb038 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4476,11 +4476,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   
>   	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
>   
> -	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
> -		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
> -	else
> -		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
> -
>   	i915_timelines_init(dev_priv);
>   
>   	ret = i915_gem_init_userptr(dev_priv);
> @@ -4601,7 +4596,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   err_pm:
>   	if (ret != -EIO) {
>   		intel_cleanup_gt_powersave(dev_priv);
> -		i915_gem_cleanup_engines(dev_priv);
> +		intel_engines_cleanup(dev_priv);
>   	}
>   err_context:
>   	if (ret != -EIO)
> @@ -4661,7 +4656,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
>   	mutex_lock(&dev_priv->drm.struct_mutex);
>   	intel_uc_fini_hw(dev_priv);
>   	intel_uc_fini(dev_priv);
> -	i915_gem_cleanup_engines(dev_priv);
> +	intel_engines_cleanup(dev_priv);
>   	i915_gem_contexts_fini(dev_priv);
>   	i915_gem_fini_scratch(dev_priv);
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
> @@ -4684,16 +4679,6 @@ void i915_gem_init_mmio(struct drm_i915_private *i915)
>   	i915_gem_sanitize(i915);
>   }
>   
> -void
> -i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, dev_priv, id)
> -		dev_priv->gt.cleanup_engine(engine);
> -}
> -
>   void
>   i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
>   {
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (44 preceding siblings ...)
  2019-04-25 10:35 ` [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Tvrtko Ursulin
@ 2019-04-25 11:22 ` Patchwork
  2019-04-25 11:39 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (5 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Patchwork @ 2019-04-25 11:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
URL   : https://patchwork.freedesktop.org/series/59933/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
d3bdb17e90b7 drm/i915: Seal races between async GPU cancellation, retirement and signaling
4413100b422a drm/i915/gvt: Pin the per-engine GVT shadow contexts
2ee46a2f553e drm/i915: Export intel_context_instance()
6702f3f43ea2 drm/i915/selftests: Use the real kernel context for sseu isolation tests
4b16b7f00463 drm/i915/selftests: Pass around intel_context for sseu
01acbbacf9fe drm/i915: Pass intel_context to intel_context_pin_lock()
dd5199890d17 drm/i915: Split engine setup/init into two phases
1ecd5a4be8fc drm/i915: Switch back to an array of logical per-engine HW contexts
-:591: WARNING:LINE_SPACING: Missing a blank line after declarations
#591: FILE: drivers/gpu/drm/i915/i915_gem_context.h:214:
+		struct i915_gem_engines *e = rcu_dereference(ctx->engines);
+		if (likely(idx < e->num_engines && e->engines[idx]))

-:610: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'it' - possible side-effects?
#610: FILE: drivers/gpu/drm/i915/i915_gem_context.h:233:
+#define for_each_gem_engine(ce, engines, it) \
+	for (i915_gem_engines_iter_init(&(it), (engines)); \
+	     ((ce) = i915_gem_engines_iter_next(&(it)));)

total: 0 errors, 1 warnings, 1 checks, 965 lines checked
66ee78beaf16 drm/i915: Remove intel_context.active_link
a7aa0b935302 drm/i915: Move i915_request_alloc into selftests/
-:421: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#421: 
new file mode 100644

-:426: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#426: FILE: drivers/gpu/drm/i915/selftests/igt_gem_utils.c:1:
+/*

-:427: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#427: FILE: drivers/gpu/drm/i915/selftests/igt_gem_utils.c:2:
+ * SPDX-License-Identifier: MIT

-:466: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#466: FILE: drivers/gpu/drm/i915/selftests/igt_gem_utils.h:1:
+/*

-:467: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#467: FILE: drivers/gpu/drm/i915/selftests/igt_gem_utils.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 408 lines checked
8e3b0d14da6e drm/i915/execlists: Flush the tasklet on parking
cc8c59145f86 drm/i915: Move the engine->destroy() vfunc onto the engine
b4def543830c drm/i915: Convert inconsistent static engine tables into an init error
4d41aff00f07 drm/i915: Make engine_mask & num_engines static
-:72: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'x' - possible side-effects?
#72: FILE: drivers/gpu/drm/i915/i915_pci.c:38:
+#define ENGINES(x) .engine_mask = (x), .num_engines = hweight8(x)

total: 0 errors, 0 warnings, 1 checks, 279 lines checked
eacb264b1b19 drm/i915: Restore control over ppgtt for context creation ABI
-:80: WARNING:LONG_LINE: line over 100 characters
#80: FILE: include/uapi/drm/i915_drm.h:420:
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)

-:81: WARNING:LONG_LINE: line over 100 characters
#81: FILE: include/uapi/drm/i915_drm.h:421:
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)

-:81: WARNING:SPACING: space prohibited between function name and open parenthesis '('
#81: FILE: include/uapi/drm/i915_drm.h:421:
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)

-:81: ERROR:COMPLEX_MACRO: Macros with complex values should be enclosed in parentheses
#81: FILE: include/uapi/drm/i915_drm.h:421:
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)

total: 1 errors, 3 warnings, 0 checks, 64 lines checked
8116cec3c578 drm/i915: Allow a context to define its set of engines
-:403: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'p' - possible side-effects?
#403: FILE: drivers/gpu/drm/i915/i915_utils.h:107:
+#define check_struct_size(p, member, n, sz) \
+	likely(__check_struct_size(sizeof(*(p)), \
+				   sizeof(*(p)->member) + __must_be_array((p)->member), \
+				   n, sz))

-:403: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects?
#403: FILE: drivers/gpu/drm/i915/i915_utils.h:107:
+#define check_struct_size(p, member, n, sz) \
+	likely(__check_struct_size(sizeof(*(p)), \
+				   sizeof(*(p)->member) + __must_be_array((p)->member), \
+				   n, sz))

-:403: CHECK:MACRO_ARG_PRECEDENCE: Macro argument 'member' may be better as '(member)' to avoid precedence issues
#403: FILE: drivers/gpu/drm/i915/i915_utils.h:107:
+#define check_struct_size(p, member, n, sz) \
+	likely(__check_struct_size(sizeof(*(p)), \
+				   sizeof(*(p)->member) + __must_be_array((p)->member), \
+				   n, sz))

total: 0 errors, 0 warnings, 3 checks, 396 lines checked
2a6aed246ca7 drm/i915: Re-expose SINGLE_TIMELINE flags for context creation
c0bede879ff9 drm/i915: Allow userspace to clone contexts on creation
-:213: ERROR:BRACKET_SPACE: space prohibited before open square bracket '['
#213: FILE: drivers/gpu/drm/i915/i915_gem_context.c:1843:
+#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y

total: 1 errors, 0 warnings, 0 checks, 235 lines checked
65cb8bcd56e0 drm/i915: Load balancing across a virtual engine
-:260: ERROR:SPACING: space prohibited after that open parenthesis '('
#260: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:495:
+	if ( ~prio & ACTIVE_PRIORITY &&

total: 1 errors, 0 warnings, 0 checks, 1276 lines checked
7ab058d19497 drm/i915: Apply an execution_mask to the virtual_engine
f9a6aebb5f5e drm/i915: Extend execution fence to support a callback
ff6b2382417e drm/i915/execlists: Virtual engine bonding
39b04a462159 drm/i915: Allow specification of parallel execbuf
7e3b449981b1 drm/i915: Split GEM object type definition to its own header
-:25: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

-:59: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#59: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:1:
+/*

-:60: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#60: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:2:
+ * SPDX-License-Identifier: MIT

-:177: WARNING:LINE_SPACING: Missing a blank line after declarations
#177: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:119:
+	struct list_head batch_pool_link;
+	I915_SELFTEST_DECLARE(struct list_head st_link);

-:436: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#436: FILE: drivers/gpu/drm/i915/i915_gem_object.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 664 lines checked
504adb89c47b drm/i915: Pull GEM ioctls interface to its own file
-:12: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#12: 
new file mode 100644

-:17: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#17: FILE: drivers/gpu/drm/i915/gem/i915_gem_ioctls.h:1:
+/*

-:18: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#18: FILE: drivers/gpu/drm/i915/gem/i915_gem_ioctls.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 3 warnings, 0 checks, 152 lines checked
cfbd63a2cc15 drm/i915: Move object->pages API to i915_gem_object.[ch]
-:38: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#38: 
rename from drivers/gpu/drm/i915/i915_gem_object.c

total: 0 errors, 1 warnings, 0 checks, 339 lines checked
88ecbcf16a43 drm/i915: Move shmem object setup to its own file
-:413: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#413: 
new file mode 100644

-:418: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#418: FILE: drivers/gpu/drm/i915/gem/i915_gem_shmem.c:1:
+/*

-:419: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#419: FILE: drivers/gpu/drm/i915/gem/i915_gem_shmem.c:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 3 warnings, 0 checks, 2003 lines checked
27574b61465b drm/i915: Move phys objects to its own file
-:80: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#80: 
new file mode 100644

-:85: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#85: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:1:
+/*

-:86: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#86: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:2:
+ * SPDX-License-Identifier: MIT

-:185: WARNING:MEMORY_BARRIER: memory barrier without comment
#185: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:101:
+		smp_mb__before_atomic();

-:250: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#250: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:166:
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;

-:376: WARNING:MEMORY_BARRIER: memory barrier without comment
#376: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:292:
+			smp_mb__before_atomic();

-:395: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#395: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:311:
+		ptr = obj->mm.mapping = NULL;

-:601: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#601: FILE: drivers/gpu/drm/i915/gem/i915_gem_phys.c:1:
+/*

-:602: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#602: FILE: drivers/gpu/drm/i915/gem/i915_gem_phys.c:2:
+ * SPDX-License-Identifier: MIT

-:898: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#898: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c:1:
+/*

-:899: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#899: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 9 warnings, 2 checks, 1825 lines checked
ef17118e7363 drm/i915: Move mmap and friends to its own file
-:25: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

-:30: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#30: FILE: drivers/gpu/drm/i915/gem/i915_gem_mman.c:1:
+/*

-:31: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#31: FILE: drivers/gpu/drm/i915/gem/i915_gem_mman.c:2:
+ * SPDX-License-Identifier: MIT

-:590: WARNING:MEMORY_BARRIER: memory barrier without comment
#590: FILE: drivers/gpu/drm/i915/gem/i915_gem_object.c:400:
+		wmb();

-:633: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#633: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:1:
+/*

-:634: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#634: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:2:
+ * SPDX-License-Identifier: MIT

-:827: WARNING:LONG_LINE: line over 100 characters
#827: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:195:
+			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);

-:946: WARNING:SPACING: space prohibited before semicolon
#946: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:314:
+next_tiling: ;

total: 0 errors, 8 warnings, 0 checks, 2256 lines checked
7d08333f4f33 drm/i915: Move GEM domain management to its own file
-:25: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

-:30: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#30: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:1:
+/*

-:31: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#31: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:2:
+ * SPDX-License-Identifier: MIT

-:113: WARNING:MEMORY_BARRIER: memory barrier without comment
#113: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:84:
+		mb();

-:175: WARNING:MEMORY_BARRIER: memory barrier without comment
#175: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:146:
+		mb();

total: 0 errors, 5 warnings, 0 checks, 1871 lines checked
93ad355975cb drm/i915: Move more GEM objects under gem/
-:87: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#87: 
rename from drivers/gpu/drm/i915/i915_gem_clflush.c

-:114: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#114: FILE: drivers/gpu/drm/i915/gem/i915_gem_clflush.c:2:
+ * SPDX-License-Identifier: MIT

-:135: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#135: FILE: drivers/gpu/drm/i915/gem/i915_gem_clflush.h:1:
+/*

-:136: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#136: FILE: drivers/gpu/drm/i915/gem/i915_gem_clflush.h:2:
+ * SPDX-License-Identifier: MIT

-:187: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#187: FILE: drivers/gpu/drm/i915/gem/i915_gem_context.c:2:
+ * SPDX-License-Identifier: MIT

-:231: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#231: FILE: drivers/gpu/drm/i915/gem/i915_gem_context.h:2:
+ * SPDX-License-Identifier: MIT

-:262: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#262: FILE: drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c:2:
+ * SPDX-License-Identifier: MIT

-:338: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#338: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2:
+ * SPDX-License-Identifier: MIT

-:394: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#394: FILE: drivers/gpu/drm/i915/gem/i915_gem_internal.c:2:
+ * SPDX-License-Identifier: MIT

-:495: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#495: FILE: drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:2:
+ * SPDX-License-Identifier: MIT

-:544: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#544: FILE: drivers/gpu/drm/i915/gem/i915_gem_stolen.c:2:
+ * SPDX-License-Identifier: MIT

-:592: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#592: FILE: drivers/gpu/drm/i915/gem/i915_gem_tiling.c:2:
+ * SPDX-License-Identifier: MIT

-:640: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#640: FILE: drivers/gpu/drm/i915/gem/i915_gem_userptr.c:2:
+ * SPDX-License-Identifier: MIT

-:691: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#691: FILE: drivers/gpu/drm/i915/gem/i915_gemfs.c:2:
+ * SPDX-License-Identifier: MIT

-:714: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#714: FILE: drivers/gpu/drm/i915/gem/i915_gemfs.h:1:
+/*

-:715: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#715: FILE: drivers/gpu/drm/i915/gem/i915_gemfs.h:2:
+ * SPDX-License-Identifier: MIT

-:759: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#759: FILE: drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c:2:
+ * SPDX-License-Identifier: MIT

-:771: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#771: FILE: drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h:1:
+/*

-:772: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#772: FILE: drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h:2:
+ * SPDX-License-Identifier: MIT

-:827: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#827: FILE: drivers/gpu/drm/i915/gem/selftests/huge_pages.c:2:
+ * SPDX-License-Identifier: MIT

-:879: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#879: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c:2:
+ * SPDX-License-Identifier: MIT

-:922: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#922: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:2:
+ * SPDX-License-Identifier: MIT

-:984: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#984: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:2:
+ * SPDX-License-Identifier: MIT

-:1040: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1040: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c:2:
+ * SPDX-License-Identifier: MIT

-:1108: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1108: FILE: drivers/gpu/drm/i915/gem/selftests/mock_context.c:2:
+ * SPDX-License-Identifier: MIT

-:1125: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1125: FILE: drivers/gpu/drm/i915/gem/selftests/mock_context.h:1:
+/*

-:1126: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1126: FILE: drivers/gpu/drm/i915/gem/selftests/mock_context.h:2:
+ * SPDX-License-Identifier: MIT

-:1178: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1178: FILE: drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c:2:
+ * SPDX-License-Identifier: MIT

-:1190: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1190: FILE: drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h:1:
+/*

-:1191: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1191: FILE: drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h:2:
+ * SPDX-License-Identifier: MIT

-:1221: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1221: FILE: drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h:1:
+/*

-:1222: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1222: FILE: drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 32 warnings, 0 checks, 1404 lines checked
8d4d95a47223 drm/i915: Pull scatterlist utils out of i915_gem.h
-:372: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#372: 
new file mode 100644

-:377: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#377: FILE: drivers/gpu/drm/i915/i915_scatterlist.c:1:
+/*

-:378: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#378: FILE: drivers/gpu/drm/i915/i915_scatterlist.c:2:
+ * SPDX-License-Identifier: MIT

-:422: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#422: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:1:
+/*

-:423: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#423: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:2:
+ * SPDX-License-Identifier: MIT

-:452: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#452: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:31:
+		s.max = s.curr = s.sgp->offset;

-:497: CHECK:MACRO_ARG_REUSE: Macro argument reuse '__iter' - possible side-effects?
#497: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:76:
+#define __for_each_sgt_dma(__dmap, __iter, __sgt, __step)		\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
+	     ((__dmap) = (__iter).dma + (__iter).curr);			\
+	     (((__iter).curr += (__step)) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)

-:509: CHECK:MACRO_ARG_REUSE: Macro argument reuse '__iter' - possible side-effects?
#509: FILE: drivers/gpu/drm/i915/i915_s

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 12/45] drm/i915: Move the engine->destroy() vfunc onto the engine
  2019-04-25 11:01   ` Tvrtko Ursulin
@ 2019-04-25 11:24     ` Chris Wilson
  2019-04-25 11:29     ` [PATCH v2] " Chris Wilson
  1 sibling, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25 11:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-25 12:01:36)
> 
> On 25/04/2019 10:19, Chris Wilson wrote:
> > Make the engine responsible for cleaning itself up!
> 
> Why? (Just a hint to explain in the commit message.)

... so we can remove the vfunc stuck away inside the i915->gt that has
been annoying the reader for the last several years.

> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index 4e30f3720eb7..65cdd0c4accc 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -303,6 +303,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
> >       engine->class = info->class;
> >       engine->instance = info->instance;
> >   
> > +     /*
> > +      * To be overridden by the backend on setup, however to facilitate
> > +      * cleanup on error during setup we always provide the destroy vfunc.
> > +      */
> > +     engine->destroy = (typeof(engine->destroy))kfree;
> 
> Scheme overall is nice but has one fragile point in it.
> 
> I think we want a wrapper and then add some WARN_ON type safety if the 
> engine has state allocated and the destroy vfunc accidentally still 
> points to this one.
> 
> I suspect easiest would be to set an engine flag once it has been 
> initialized.

Hmm, still fragile since you rely on the user? And if you don't rely on
the user, we just put a GEM_BUG_ON(engine->destroy == kfree) at the
point where we expect the backend to take control, i.e. after a
successful call to setup() in intel_engines_setup.

@@ -617,6 +617,9 @@ int intel_engines_setup(struct drm_i915_private *i915)
                if (err)
                        goto cleanup;

+               /* We expect the backend to take control over its state */
+               GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
+
                GEM_BUG_ON(!engine->cops);
> 
> > +
> >       engine->uabi_class = intel_engine_classes[info->class].uabi_class;
> >   
> >       engine->context_size = __intel_engine_context_size(dev_priv,
> > @@ -327,18 +333,31 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
> >       return 0;
> >   }
> >   
> > +/**
> > + * intel_engines_cleanup() - free the resources allocated for Command Streamers
> > + * @dev_priv: i915 device private
> > + */
> > +void intel_engines_cleanup(struct drm_i915_private *i915)
> > +{
> > +     struct intel_engine_cs *engine;
> > +     enum intel_engine_id id;
> > +
> > +     for_each_engine(engine, i915, id) {
> > +             engine->destroy(engine);
> > +             i915->engine[id] = NULL;
> > +     }
> > +}
> > +
> >   /**
> >    * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
> >    * @dev_priv: i915 device private
> >    *
> >    * Return: non-zero if the initialization failed.
> >    */
> > -int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
> > +int intel_engines_init_mmio(struct drm_i915_private *i915)
> >   {
> > -     struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
> > -     const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask;
> > -     struct intel_engine_cs *engine;
> > -     enum intel_engine_id id;
> > +     struct intel_device_info *device_info = mkwrite_device_info(i915);
> > +     const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
> >       unsigned int mask = 0;
> >       unsigned int i;
> >       int err;
> > @@ -351,10 +370,10 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
> >               return -ENODEV;
> >   
> >       for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
> > -             if (!HAS_ENGINE(dev_priv, i))
> > +             if (!HAS_ENGINE(i915, i))
> >                       continue;
> >   
> > -             err = intel_engine_setup(dev_priv, i);
> > +             err = intel_engine_setup(i915, i);
> 
> While nosing around I noticed there is mmio access in setup. :(

Yeah, we can take another step or two to split up the phases between sw
setup and HW setup.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* [PATCH v2] drm/i915: Move the engine->destroy() vfunc onto the engine
  2019-04-25 11:01   ` Tvrtko Ursulin
  2019-04-25 11:24     ` Chris Wilson
@ 2019-04-25 11:29     ` Chris Wilson
  2019-04-25 11:46       ` Tvrtko Ursulin
  1 sibling, 1 reply; 74+ messages in thread
From: Chris Wilson @ 2019-04-25 11:29 UTC (permalink / raw)
  To: intel-gfx

Make the engine responsible for cleaning itself up!

This removes the i915->gt.cleanup vfunc that has been annoying the
casual reader and myself for the last several years, and helps keep a
future patch to add more cleanup tidy.

v2: Assert that engine->destroy is set after the backend starts
allocating its own state.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h       |  4 ++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 66 +++++++++++---------
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 29 +++------
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c   | 35 +++++------
 drivers/gpu/drm/i915/i915_drv.h              |  6 --
 drivers/gpu/drm/i915/i915_gem.c              | 19 +-----
 7 files changed, 68 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 3e53f53bc52b..f5b0f27cecb6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -362,7 +362,11 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
 	return (head - tail - CACHELINE_BYTES) & (size - 1);
 }
 
+int intel_engines_init_mmio(struct drm_i915_private *i915);
 int intel_engines_setup(struct drm_i915_private *i915);
+int intel_engines_init(struct drm_i915_private *i915);
+void intel_engines_cleanup(struct drm_i915_private *i915);
+
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4e30f3720eb7..5994528eac79 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -303,6 +303,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	engine->class = info->class;
 	engine->instance = info->instance;
 
+	/*
+	 * To be overridden by the backend on setup. However to facilitate
+	 * cleanup on error during setup, we always provide the destroy vfunc.
+	 */
+	engine->destroy = (typeof(engine->destroy))kfree;
+
 	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
 
 	engine->context_size = __intel_engine_context_size(dev_priv,
@@ -327,18 +333,31 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	return 0;
 }
 
+/**
+ * intel_engines_cleanup() - free the resources allocated for Command Streamers
+ * @i915: the i915 device private
+ */
+void intel_engines_cleanup(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		engine->destroy(engine);
+		i915->engine[id] = NULL;
+	}
+}
+
 /**
  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
  * @dev_priv: i915 device private
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
+int intel_engines_init_mmio(struct drm_i915_private *i915)
 {
-	struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
-	const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
+	struct intel_device_info *device_info = mkwrite_device_info(i915);
+	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
 	unsigned int mask = 0;
 	unsigned int i;
 	int err;
@@ -351,10 +370,10 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 		return -ENODEV;
 
 	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
-		if (!HAS_ENGINE(dev_priv, i))
+		if (!HAS_ENGINE(i915, i))
 			continue;
 
-		err = intel_engine_setup(dev_priv, i);
+		err = intel_engine_setup(i915, i);
 		if (err)
 			goto cleanup;
 
@@ -370,20 +389,19 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 		device_info->engine_mask = mask;
 
 	/* We always presume we have at least RCS available for later probing */
-	if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) {
+	if (WARN_ON(!HAS_ENGINE(i915, RCS0))) {
 		err = -ENODEV;
 		goto cleanup;
 	}
 
-	RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask);
+	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
 
-	i915_check_and_clear_faults(dev_priv);
+	i915_check_and_clear_faults(i915);
 
 	return 0;
 
 cleanup:
-	for_each_engine(engine, dev_priv, id)
-		kfree(engine);
+	intel_engines_cleanup(i915);
 	return err;
 }
 
@@ -397,7 +415,7 @@ int intel_engines_init(struct drm_i915_private *i915)
 {
 	int (*init)(struct intel_engine_cs *engine);
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id, err_id;
+	enum intel_engine_id id;
 	int err;
 
 	if (HAS_EXECLISTS(i915))
@@ -406,8 +424,6 @@ int intel_engines_init(struct drm_i915_private *i915)
 		init = intel_ring_submission_init;
 
 	for_each_engine(engine, i915, id) {
-		err_id = id;
-
 		err = init(engine);
 		if (err)
 			goto cleanup;
@@ -416,14 +432,7 @@ int intel_engines_init(struct drm_i915_private *i915)
 	return 0;
 
 cleanup:
-	for_each_engine(engine, i915, id) {
-		if (id >= err_id) {
-			kfree(engine);
-			i915->engine[id] = NULL;
-		} else {
-			i915->gt.cleanup_engine(engine);
-		}
-	}
+	intel_engines_cleanup(i915);
 	return err;
 }
 
@@ -603,19 +612,16 @@ int intel_engines_setup(struct drm_i915_private *i915)
 		if (err)
 			goto cleanup;
 
+		/* We expect the backend to take control over its state */
+		GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
+
 		GEM_BUG_ON(!engine->cops);
 	}
 
 	return 0;
 
 cleanup:
-	for_each_engine(engine, i915, id) {
-		if (engine->cops)
-			i915->gt.cleanup_engine(engine);
-		else
-			kfree(engine);
-		i915->engine[id] = NULL;
-	}
+	intel_engines_cleanup(i915);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d972c339309c..9d64e33f8427 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -420,7 +420,7 @@ struct intel_engine_cs {
 	 */
 	void		(*cancel_requests)(struct intel_engine_cs *engine);
 
-	void		(*cleanup)(struct intel_engine_cs *engine);
+	void		(*destroy)(struct intel_engine_cs *engine);
 
 	struct intel_engine_execlists execlists;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index ea7794d368d1..349fda6b10f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2336,26 +2336,6 @@ static void execlists_park(struct intel_engine_cs *engine)
 	tasklet_kill(&engine->execlists.tasklet);
 }
 
-/**
- * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
- * @engine: Engine Command Streamer.
- */
-void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-
-	if (engine->cleanup)
-		engine->cleanup(engine);
-
-	intel_engine_cleanup_common(engine);
-
-	lrc_destroy_wa_ctx(engine);
-
-	engine->i915 = NULL;
-	i915->engine[engine->id] = NULL;
-	kfree(engine);
-}
-
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
@@ -2378,10 +2358,19 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 }
 
+static void execlists_destroy(struct intel_engine_cs *engine)
+{
+	intel_engine_cleanup_common(engine);
+	lrc_destroy_wa_ctx(engine);
+	kfree(engine);
+}
+
 static void
 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
+
+	engine->destroy = execlists_destroy;
 	engine->resume = execlists_resume;
 
 	engine->reset.prepare = execlists_reset_prepare;
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 5bcd3034a101..7a4804c47466 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1520,25 +1520,6 @@ static const struct intel_context_ops ring_context_ops = {
 	.destroy = ring_context_destroy,
 };
 
-void intel_engine_cleanup(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
-		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
-
-	intel_ring_unpin(engine->buffer);
-	intel_ring_put(engine->buffer);
-
-	if (engine->cleanup)
-		engine->cleanup(engine);
-
-	intel_engine_cleanup_common(engine);
-
-	dev_priv->engine[engine->id] = NULL;
-	kfree(engine);
-}
-
 static int load_pd_dir(struct i915_request *rq,
 		       const struct i915_hw_ppgtt *ppgtt)
 {
@@ -2130,6 +2111,20 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = gen6_bsd_submit_request;
 }
 
+static void ring_destroy(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
+		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+
+	intel_ring_unpin(engine->buffer);
+	intel_ring_put(engine->buffer);
+
+	intel_engine_cleanup_common(engine);
+	kfree(engine);
+}
+
 static void setup_irq(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
@@ -2158,6 +2153,8 @@ static void setup_common(struct intel_engine_cs *engine)
 
 	setup_irq(engine);
 
+	engine->destroy = ring_destroy;
+
 	engine->resume = xcs_resume;
 	engine->reset.prepare = reset_prepare;
 	engine->reset.reset = reset_ring;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c77bf5b735b..5ca4df9a7428 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1995,8 +1995,6 @@ struct drm_i915_private {
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
-		void (*cleanup_engine)(struct intel_engine_cs *engine);
-
 		struct i915_gt_timelines {
 			struct mutex mutex; /* protects list, tainted by GPU */
 			struct list_head active_list;
@@ -2722,9 +2720,6 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
-int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
-int intel_engines_init(struct drm_i915_private *dev_priv);
-
 u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
 
 /* intel_hotplug.c */
@@ -3132,7 +3127,6 @@ int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv);
 void i915_gem_fini(struct drm_i915_private *dev_priv);
-void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 			   unsigned int flags, long timeout);
 void i915_gem_suspend(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4c1793b1012e..2fcec1bfb038 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4476,11 +4476,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
 
-	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
-		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
-	else
-		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
-
 	i915_timelines_init(dev_priv);
 
 	ret = i915_gem_init_userptr(dev_priv);
@@ -4601,7 +4596,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_pm:
 	if (ret != -EIO) {
 		intel_cleanup_gt_powersave(dev_priv);
-		i915_gem_cleanup_engines(dev_priv);
+		intel_engines_cleanup(dev_priv);
 	}
 err_context:
 	if (ret != -EIO)
@@ -4661,7 +4656,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_uc_fini_hw(dev_priv);
 	intel_uc_fini(dev_priv);
-	i915_gem_cleanup_engines(dev_priv);
+	intel_engines_cleanup(dev_priv);
 	i915_gem_contexts_fini(dev_priv);
 	i915_gem_fini_scratch(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -4684,16 +4679,6 @@ void i915_gem_init_mmio(struct drm_i915_private *i915)
 	i915_gem_sanitize(i915);
 }
 
-void
-i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id)
-		dev_priv->gt.cleanup_engine(engine);
-}
-
 void
 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (45 preceding siblings ...)
  2019-04-25 11:22 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] " Patchwork
@ 2019-04-25 11:39 ` Patchwork
  2019-04-25 12:07 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (4 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Patchwork @ 2019-04-25 11:39 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
URL   : https://patchwork.freedesktop.org/series/59933/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Seal races between async GPU cancellation, retirement and signaling
Okay!

Commit: drm/i915/gvt: Pin the per-engine GVT shadow contexts
Okay!

Commit: drm/i915: Export intel_context_instance()
-O:drivers/gpu/drm/i915/gt/intel_context.c:131:22: warning: context imbalance in 'intel_context_pin_lock' - wrong count at exit
+drivers/gpu/drm/i915/gt/intel_context.c:131:22: warning: context imbalance in 'intel_context_pin_lock' - wrong count at exit
+drivers/gpu/drm/i915/gt/intel_context.c:150:6: warning: context imbalance in 'intel_context_pin_unlock' - wrong count at exit

Commit: drm/i915/selftests: Use the real kernel context for sseu isolation tests
Okay!

Commit: drm/i915/selftests: Pass around intel_context for sseu
Okay!

Commit: drm/i915: Pass intel_context to intel_context_pin_lock()
-O:drivers/gpu/drm/i915/gt/intel_context.c:131:22: warning: context imbalance in 'intel_context_pin_lock' - wrong count at exit
-O:drivers/gpu/drm/i915/gt/intel_context.c:150:6: warning: context imbalance in 'intel_context_pin_unlock' - wrong count at exit

Commit: drm/i915: Split engine setup/init into two phases
Okay!

Commit: drm/i915: Switch back to an array of logical per-engine HW contexts
+./include/linux/overflow.h:285:13: error: incorrect type in conditional
+./include/linux/overflow.h:285:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/overflow.h:285:13:    got void
+./include/linux/overflow.h:285:13: warning: call with no type!
+./include/linux/overflow.h:287:13: error: incorrect type in conditional
+./include/linux/overflow.h:287:13: error: undefined identifier '__builtin_add_overflow'
+./include/linux/overflow.h:287:13:    got void
+./include/linux/overflow.h:287:13: warning: call with no type!

Commit: drm/i915: Remove intel_context.active_link
Okay!

Commit: drm/i915: Move i915_request_alloc into selftests/
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915/execlists: Flush the tasklet on parking
Okay!

Commit: drm/i915: Move the engine->destroy() vfunc onto the engine
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3611:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3605:16: warning: expression using sizeof(void)

Commit: drm/i915: Convert inconsistent static engine tables into an init error
Okay!

Commit: drm/i915: Make engine_mask & num_engines static
Okay!

Commit: drm/i915: Restore control over ppgtt for context creation ABI
Okay!

Commit: drm/i915: Allow a context to define its set of engines
+drivers/gpu/drm/i915/i915_utils.h:84:13: error: incorrect type in conditional
+drivers/gpu/drm/i915/i915_utils.h:84:13: error: undefined identifier '__builtin_mul_overflow'
+drivers/gpu/drm/i915/i915_utils.h:84:13:    got void
+drivers/gpu/drm/i915/i915_utils.h:84:13: warning: call with no type!
+drivers/gpu/drm/i915/i915_utils.h:87:13: error: incorrect type in conditional
+drivers/gpu/drm/i915/i915_utils.h:87:13: error: undefined identifier '__builtin_add_overflow'
+drivers/gpu/drm/i915/i915_utils.h:87:13:    got void
+drivers/gpu/drm/i915/i915_utils.h:87:13: warning: call with no type!
+./include/linux/overflow.h:285:13: error: incorrect type in conditional
+./include/linux/overflow.h:285:13: error: not a function <noident>
+./include/linux/overflow.h:285:13:    got void
+./include/linux/overflow.h:287:13: error: incorrect type in conditional
+./include/linux/overflow.h:287:13: error: not a function <noident>
+./include/linux/overflow.h:287:13:    got void

Commit: drm/i915: Re-expose SINGLE_TIMELINE flags for context creation
Okay!

Commit: drm/i915: Allow userspace to clone contexts on creation
+drivers/gpu/drm/i915/i915_gem_context.c:1844:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1845:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1846:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1847:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1848:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1849:17: error: bad integer constant expression
-drivers/gpu/drm/i915/i915_utils.h:84:13: warning: call with no type!
-drivers/gpu/drm/i915/i915_utils.h:87:13: warning: call with no type!
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:1266:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:1266:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:454:16: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:571:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:571:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:693:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:693:33: warning: expression using sizeof(void)
+./include/linux/overflow.h:285:13: error: incorrect type in conditional
+./include/linux/overflow.h:285:13: error: not a function <noident>
-./include/linux/overflow.h:285:13: warning: call with no type!
+./include/linux/overflow.h:285:13:    got void
+./include/linux/overflow.h:287:13: error: incorrect type in conditional
+./include/linux/overflow.h:287:13: error: not a function <noident>
-./include/linux/overflow.h:287:13: warning: call with no type!
+./include/linux/overflow.h:287:13:    got void
-./include/linux/slab.h:666:13: warning: call with no type!

Commit: drm/i915: Load balancing across a virtual engine
+./include/linux/overflow.h:285:13: error: incorrect type in conditional
+./include/linux/overflow.h:285:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/overflow.h:285:13:    got void
+./include/linux/overflow.h:285:13: warning: call with no type!
+./include/linux/overflow.h:287:13: error: incorrect type in conditional
+./include/linux/overflow.h:287:13: error: undefined identifier '__builtin_add_overflow'
+./include/linux/overflow.h:287:13:    got void
+./include/linux/overflow.h:287:13: warning: call with no type!
+./include/linux/slab.h:666:13: error: not a function <noident>

Commit: drm/i915: Apply an execution_mask to the virtual_engine
Okay!

Commit: drm/i915: Extend execution fence to support a callback
Okay!

Commit: drm/i915/execlists: Virtual engine bonding
Okay!

Commit: drm/i915: Allow specification of parallel execbuf
Okay!

Commit: drm/i915: Split GEM object type definition to its own header
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3605:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3606:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Pull GEM ioctls interface to its own file
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3606:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3568:16: warning: expression using sizeof(void)

Commit: drm/i915: Move object->pages API to i915_gem_object.[ch]
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3568:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3437:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move shmem object setup to its own file
+drivers/gpu/drm/i915/gem/i915_gem_shmem.c:482:36: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:4913:36: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3437:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3427:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move phys objects to its own file
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3427:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3425:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move mmap and friends to its own file
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:191:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:191:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:191:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:191:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:306:32: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:306:32: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1502:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1502:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1502:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1502:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1617:32: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1617:32: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3425:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3424:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move GEM domain management to its own file
+drivers/gpu/drm/i915/gem/i915_gem_domain.c:447:34: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_domain.c:447:34: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem.c:889:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem.c:889:39: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1951:34: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1951:34: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:889:39: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:889:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3424:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3398:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move more GEM objects under gem/
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2017:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2018:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2019:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2020:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2021:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2022:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1496:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:55:39: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:339:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/huge_pages.c:202:36: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/huge_pages.c:202:36: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2017:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2018:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2019:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2020:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2021:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2022:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1496:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:55:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:339:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/huge_pages.c:202:36: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/huge_pages.c:202:36: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Pull scatterlist utils out of i915_gem.h
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3398:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3288:16: warning: expression using sizeof(void)

Commit: drm/i915: Move GEM object domain management from struct_mutex to local
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH v2] drm/i915: Move the engine->destroy() vfunc onto the engine
  2019-04-25 11:29     ` [PATCH v2] " Chris Wilson
@ 2019-04-25 11:46       ` Tvrtko Ursulin
  0 siblings, 0 replies; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-25 11:46 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/04/2019 12:29, Chris Wilson wrote:
> Make the engine responsible for cleaning itself up!
> 
> This removes the i915->gt.cleanup vfunc that has been annoying the
> casual reader and myself for the last several years, and helps keep a
> future patch to add more cleanup tidy.
> 
> v2: Assert that engine->destroy is set after the backend starts
> allocating its own state.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine.h       |  4 ++
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 66 +++++++++++---------
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 29 +++------
>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c   | 35 +++++------
>   drivers/gpu/drm/i915/i915_drv.h              |  6 --
>   drivers/gpu/drm/i915/i915_gem.c              | 19 +-----
>   7 files changed, 68 insertions(+), 93 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 3e53f53bc52b..f5b0f27cecb6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -362,7 +362,11 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
>   	return (head - tail - CACHELINE_BYTES) & (size - 1);
>   }
>   
> +int intel_engines_init_mmio(struct drm_i915_private *i915);
>   int intel_engines_setup(struct drm_i915_private *i915);
> +int intel_engines_init(struct drm_i915_private *i915);
> +void intel_engines_cleanup(struct drm_i915_private *i915);
> +
>   int intel_engine_init_common(struct intel_engine_cs *engine);
>   void intel_engine_cleanup_common(struct intel_engine_cs *engine);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 4e30f3720eb7..5994528eac79 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -303,6 +303,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	engine->class = info->class;
>   	engine->instance = info->instance;
>   
> +	/*
> +	 * To be overridden by the backend on setup. However to facilitate
> +	 * cleanup on error during setup, we always provide the destroy vfunc.
> +	 */
> +	engine->destroy = (typeof(engine->destroy))kfree;
> +
>   	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
>   
>   	engine->context_size = __intel_engine_context_size(dev_priv,
> @@ -327,18 +333,31 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	return 0;
>   }
>   
> +/**
> + * intel_engines_cleanup() - free the resources allocated for Command Streamers
> + * @i915: the i915 device private
> + */
> +void intel_engines_cleanup(struct drm_i915_private *i915)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	for_each_engine(engine, i915, id) {
> +		engine->destroy(engine);
> +		i915->engine[id] = NULL;
> +	}
> +}
> +
>   /**
>    * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
>    * @dev_priv: i915 device private
>    *
>    * Return: non-zero if the initialization failed.
>    */
> -int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
> +int intel_engines_init_mmio(struct drm_i915_private *i915)
>   {
> -	struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
> -	const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> +	struct intel_device_info *device_info = mkwrite_device_info(i915);
> +	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
>   	unsigned int mask = 0;
>   	unsigned int i;
>   	int err;
> @@ -351,10 +370,10 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
>   		return -ENODEV;
>   
>   	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
> -		if (!HAS_ENGINE(dev_priv, i))
> +		if (!HAS_ENGINE(i915, i))
>   			continue;
>   
> -		err = intel_engine_setup(dev_priv, i);
> +		err = intel_engine_setup(i915, i);
>   		if (err)
>   			goto cleanup;
>   
> @@ -370,20 +389,19 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
>   		device_info->engine_mask = mask;
>   
>   	/* We always presume we have at least RCS available for later probing */
> -	if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) {
> +	if (WARN_ON(!HAS_ENGINE(i915, RCS0))) {
>   		err = -ENODEV;
>   		goto cleanup;
>   	}
>   
> -	RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask);
> +	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
>   
> -	i915_check_and_clear_faults(dev_priv);
> +	i915_check_and_clear_faults(i915);
>   
>   	return 0;
>   
>   cleanup:
> -	for_each_engine(engine, dev_priv, id)
> -		kfree(engine);
> +	intel_engines_cleanup(i915);
>   	return err;
>   }
>   
> @@ -397,7 +415,7 @@ int intel_engines_init(struct drm_i915_private *i915)
>   {
>   	int (*init)(struct intel_engine_cs *engine);
>   	struct intel_engine_cs *engine;
> -	enum intel_engine_id id, err_id;
> +	enum intel_engine_id id;
>   	int err;
>   
>   	if (HAS_EXECLISTS(i915))
> @@ -406,8 +424,6 @@ int intel_engines_init(struct drm_i915_private *i915)
>   		init = intel_ring_submission_init;
>   
>   	for_each_engine(engine, i915, id) {
> -		err_id = id;
> -
>   		err = init(engine);
>   		if (err)
>   			goto cleanup;
> @@ -416,14 +432,7 @@ int intel_engines_init(struct drm_i915_private *i915)
>   	return 0;
>   
>   cleanup:
> -	for_each_engine(engine, i915, id) {
> -		if (id >= err_id) {
> -			kfree(engine);
> -			i915->engine[id] = NULL;
> -		} else {
> -			i915->gt.cleanup_engine(engine);
> -		}
> -	}
> +	intel_engines_cleanup(i915);
>   	return err;
>   }
>   
> @@ -603,19 +612,16 @@ int intel_engines_setup(struct drm_i915_private *i915)
>   		if (err)
>   			goto cleanup;
>   
> +		/* We expect the backend to take control over its state */
> +		GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
> +
>   		GEM_BUG_ON(!engine->cops);
>   	}
>   
>   	return 0;
>   
>   cleanup:
> -	for_each_engine(engine, i915, id) {
> -		if (engine->cops)
> -			i915->gt.cleanup_engine(engine);
> -		else
> -			kfree(engine);
> -		i915->engine[id] = NULL;
> -	}
> +	intel_engines_cleanup(i915);
>   	return err;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index d972c339309c..9d64e33f8427 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -420,7 +420,7 @@ struct intel_engine_cs {
>   	 */
>   	void		(*cancel_requests)(struct intel_engine_cs *engine);
>   
> -	void		(*cleanup)(struct intel_engine_cs *engine);
> +	void		(*destroy)(struct intel_engine_cs *engine);
>   
>   	struct intel_engine_execlists execlists;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index ea7794d368d1..349fda6b10f8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2336,26 +2336,6 @@ static void execlists_park(struct intel_engine_cs *engine)
>   	tasklet_kill(&engine->execlists.tasklet);
>   }
>   
> -/**
> - * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
> - * @engine: Engine Command Streamer.
> - */
> -void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *i915 = engine->i915;
> -
> -	if (engine->cleanup)
> -		engine->cleanup(engine);
> -
> -	intel_engine_cleanup_common(engine);
> -
> -	lrc_destroy_wa_ctx(engine);
> -
> -	engine->i915 = NULL;
> -	i915->engine[engine->id] = NULL;
> -	kfree(engine);
> -}
> -
>   void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>   {
>   	engine->submit_request = execlists_submit_request;
> @@ -2378,10 +2358,19 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>   		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>   }
>   
> +static void execlists_destroy(struct intel_engine_cs *engine)
> +{
> +	intel_engine_cleanup_common(engine);
> +	lrc_destroy_wa_ctx(engine);
> +	kfree(engine);
> +}
> +
>   static void
>   logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   {
>   	/* Default vfuncs which can be overriden by each engine. */
> +
> +	engine->destroy = execlists_destroy;
>   	engine->resume = execlists_resume;
>   
>   	engine->reset.prepare = execlists_reset_prepare;
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 5bcd3034a101..7a4804c47466 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1520,25 +1520,6 @@ static const struct intel_context_ops ring_context_ops = {
>   	.destroy = ring_context_destroy,
>   };
>   
> -void intel_engine_cleanup(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
> -		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
> -
> -	intel_ring_unpin(engine->buffer);
> -	intel_ring_put(engine->buffer);
> -
> -	if (engine->cleanup)
> -		engine->cleanup(engine);
> -
> -	intel_engine_cleanup_common(engine);
> -
> -	dev_priv->engine[engine->id] = NULL;
> -	kfree(engine);
> -}
> -
>   static int load_pd_dir(struct i915_request *rq,
>   		       const struct i915_hw_ppgtt *ppgtt)
>   {
> @@ -2130,6 +2111,20 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
>   	engine->submit_request = gen6_bsd_submit_request;
>   }
>   
> +static void ring_destroy(struct intel_engine_cs *engine)
> +{
> +	struct drm_i915_private *dev_priv = engine->i915;
> +
> +	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
> +		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
> +
> +	intel_ring_unpin(engine->buffer);
> +	intel_ring_put(engine->buffer);
> +
> +	intel_engine_cleanup_common(engine);
> +	kfree(engine);
> +}
> +
>   static void setup_irq(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *i915 = engine->i915;
> @@ -2158,6 +2153,8 @@ static void setup_common(struct intel_engine_cs *engine)
>   
>   	setup_irq(engine);
>   
> +	engine->destroy = ring_destroy;
> +
>   	engine->resume = xcs_resume;
>   	engine->reset.prepare = reset_prepare;
>   	engine->reset.reset = reset_ring;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5c77bf5b735b..5ca4df9a7428 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1995,8 +1995,6 @@ struct drm_i915_private {
>   
>   	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
>   	struct {
> -		void (*cleanup_engine)(struct intel_engine_cs *engine);
> -
>   		struct i915_gt_timelines {
>   			struct mutex mutex; /* protects list, tainted by GPU */
>   			struct list_head active_list;
> @@ -2722,9 +2720,6 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>   extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>   int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
>   
> -int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
> -int intel_engines_init(struct drm_i915_private *dev_priv);
> -
>   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>   
>   /* intel_hotplug.c */
> @@ -3132,7 +3127,6 @@ int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
>   int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
>   void i915_gem_init_swizzling(struct drm_i915_private *dev_priv);
>   void i915_gem_fini(struct drm_i915_private *dev_priv);
> -void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
>   int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
>   			   unsigned int flags, long timeout);
>   void i915_gem_suspend(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4c1793b1012e..2fcec1bfb038 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4476,11 +4476,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   
>   	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
>   
> -	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
> -		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
> -	else
> -		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
> -
>   	i915_timelines_init(dev_priv);
>   
>   	ret = i915_gem_init_userptr(dev_priv);
> @@ -4601,7 +4596,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   err_pm:
>   	if (ret != -EIO) {
>   		intel_cleanup_gt_powersave(dev_priv);
> -		i915_gem_cleanup_engines(dev_priv);
> +		intel_engines_cleanup(dev_priv);
>   	}
>   err_context:
>   	if (ret != -EIO)
> @@ -4661,7 +4656,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
>   	mutex_lock(&dev_priv->drm.struct_mutex);
>   	intel_uc_fini_hw(dev_priv);
>   	intel_uc_fini(dev_priv);
> -	i915_gem_cleanup_engines(dev_priv);
> +	intel_engines_cleanup(dev_priv);
>   	i915_gem_contexts_fini(dev_priv);
>   	i915_gem_fini_scratch(dev_priv);
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
> @@ -4684,16 +4679,6 @@ void i915_gem_init_mmio(struct drm_i915_private *i915)
>   	i915_gem_sanitize(i915);
>   }
>   
> -void
> -i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, dev_priv, id)
> -		dev_priv->gt.cleanup_engine(engine);
> -}
> -
>   void
>   i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
>   {
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (46 preceding siblings ...)
  2019-04-25 11:39 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-04-25 12:07 ` Patchwork
  2019-04-25 12:09 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2) Patchwork
                   ` (3 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Patchwork @ 2019-04-25 12:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
URL   : https://patchwork.freedesktop.org/series/59933/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5999 -> Patchwork_12871
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/59933/revisions/1/mbox/

New tests
---------

  New tests have been introduced between CI_DRM_5999 and Patchwork_12871:

### New IGT tests (1) ###

  * igt@i915_selftest@live_mman:
    - Statuses : 28 pass(s)
    - Exec time: [5.56, 57.56] s

  

Known issues
------------

  Here are the changes found in Patchwork_12871 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      [PASS][1] -> [FAIL][2] ([fdo#108511])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12871/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  
#### Possible fixes ####

  * igt@i915_selftest@live_contexts:
    - fi-bdw-gvtdvm:      [DMESG-FAIL][3] ([fdo#110235 ]) -> [PASS][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12871/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511
  [fdo#110235 ]: https://bugs.freedesktop.org/show_bug.cgi?id=110235 


Participating hosts (41 -> 35)
------------------------------

  Additional (6): fi-bsw-n3050 fi-kbl-guc fi-icl-u3 fi-icl-y fi-blb-e6850 fi-byt-n2820 
  Missing    (12): fi-ilk-m540 fi-byt-j1900 fi-hsw-peppy fi-byt-squawks fi-bsw-cyan fi-apl-guc fi-ctg-p8600 fi-gdg-551 fi-skl-iommu fi-pnv-d510 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_5999 -> Patchwork_12871

  CI_DRM_5999: bde6790dd103ba3c4c127bbc061b90ea3f406762 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4964: ffe44144a2285788ae07768a9240b27ea8182d5c @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12871: bb546ba28c2edd13fb76145784a0e257667f702c @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

bb546ba28c2e drm/i915/execlists: Minimalistic timeslicing
164b9bf4bf1b drm/i915/execlists: Preempt-to-busy
e4e17a4bfe57 drm/i915: Replace engine->timeline with a plain list
9599c71ad5cf drm/i915: Stop retiring along engine
bd3f0893f59c drm/i915: Keep contexts pinned until after the next kernel context switch
dce4c5818ef5 drm/i915: Rename intel_context.active to .inflight
fb34df3074bd drm/i915: Move object close under its own lock
d0d0842f8bb2 drm/i915: Drop the deferred active reference
c10b2ae59cb3 drm/i915: Move GEM client throttling to its own file
89dfa3abb4b1 drm/i915: Move GEM object busy checking to its own file
7f6e77da37c6 drm/i915: Move GEM object waiting to its own file
8d9354672a12 drm/i915: Move GEM object domain management from struct_mutex to local
8d4d95a47223 drm/i915: Pull scatterlist utils out of i915_gem.h
93ad355975cb drm/i915: Move more GEM objects under gem/
7d08333f4f33 drm/i915: Move GEM domain management to its own file
ef17118e7363 drm/i915: Move mmap and friends to its own file
27574b61465b drm/i915: Move phys objects to its own file
88ecbcf16a43 drm/i915: Move shmem object setup to its own file
cfbd63a2cc15 drm/i915: Move object->pages API to i915_gem_object.[ch]
504adb89c47b drm/i915: Pull GEM ioctls interface to its own file
7e3b449981b1 drm/i915: Split GEM object type definition to its own header
39b04a462159 drm/i915: Allow specification of parallel execbuf
ff6b2382417e drm/i915/execlists: Virtual engine bonding
f9a6aebb5f5e drm/i915: Extend execution fence to support a callback
7ab058d19497 drm/i915: Apply an execution_mask to the virtual_engine
65cb8bcd56e0 drm/i915: Load balancing across a virtual engine
c0bede879ff9 drm/i915: Allow userspace to clone contexts on creation
2a6aed246ca7 drm/i915: Re-expose SINGLE_TIMELINE flags for context creation
8116cec3c578 drm/i915: Allow a context to define its set of engines
eacb264b1b19 drm/i915: Restore control over ppgtt for context creation ABI
4d41aff00f07 drm/i915: Make engine_mask & num_engines static
b4def543830c drm/i915: Convert inconsistent static engine tables into an init error
cc8c59145f86 drm/i915: Move the engine->destroy() vfunc onto the engine
8e3b0d14da6e drm/i915/execlists: Flush the tasklet on parking
a7aa0b935302 drm/i915: Move i915_request_alloc into selftests/
66ee78beaf16 drm/i915: Remove intel_context.active_link
1ecd5a4be8fc drm/i915: Switch back to an array of logical per-engine HW contexts
dd5199890d17 drm/i915: Split engine setup/init into two phases
01acbbacf9fe drm/i915: Pass intel_context to intel_context_pin_lock()
4b16b7f00463 drm/i915/selftests: Pass around intel_context for sseu
6702f3f43ea2 drm/i915/selftests: Use the real kernel context for sseu isolation tests
2ee46a2f553e drm/i915: Export intel_context_instance()
4413100b422a drm/i915/gvt: Pin the per-engine GVT shadow contexts
d3bdb17e90b7 drm/i915: Seal races between async GPU cancellation, retirement and signaling

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12871/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2)
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (47 preceding siblings ...)
  2019-04-25 12:07 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-04-25 12:09 ` Patchwork
  2019-04-25 12:25 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  51 siblings, 0 replies; 74+ messages in thread
From: Patchwork @ 2019-04-25 12:09 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2)
URL   : https://patchwork.freedesktop.org/series/59933/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
d5f35de3a4aa drm/i915: Seal races between async GPU cancellation, retirement and signaling
5861ea2d3814 drm/i915/gvt: Pin the per-engine GVT shadow contexts
03c73e53b187 drm/i915: Export intel_context_instance()
9b953b2cad13 drm/i915/selftests: Use the real kernel context for sseu isolation tests
d9d3f9889620 drm/i915/selftests: Pass around intel_context for sseu
cdfbf2fbb191 drm/i915: Pass intel_context to intel_context_pin_lock()
071a62f7b83a drm/i915: Split engine setup/init into two phases
ab7626910592 drm/i915: Switch back to an array of logical per-engine HW contexts
-:591: WARNING:LINE_SPACING: Missing a blank line after declarations
#591: FILE: drivers/gpu/drm/i915/i915_gem_context.h:214:
+		struct i915_gem_engines *e = rcu_dereference(ctx->engines);
+		if (likely(idx < e->num_engines && e->engines[idx]))

-:610: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'it' - possible side-effects?
#610: FILE: drivers/gpu/drm/i915/i915_gem_context.h:233:
+#define for_each_gem_engine(ce, engines, it) \
+	for (i915_gem_engines_iter_init(&(it), (engines)); \
+	     ((ce) = i915_gem_engines_iter_next(&(it)));)

total: 0 errors, 1 warnings, 1 checks, 965 lines checked
eed6f45b1618 drm/i915: Remove intel_context.active_link
f3aceb963930 drm/i915: Move i915_request_alloc into selftests/
-:421: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#421: 
new file mode 100644

-:426: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#426: FILE: drivers/gpu/drm/i915/selftests/igt_gem_utils.c:1:
+/*

-:427: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#427: FILE: drivers/gpu/drm/i915/selftests/igt_gem_utils.c:2:
+ * SPDX-License-Identifier: MIT

-:466: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#466: FILE: drivers/gpu/drm/i915/selftests/igt_gem_utils.h:1:
+/*

-:467: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#467: FILE: drivers/gpu/drm/i915/selftests/igt_gem_utils.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 408 lines checked
016950b57bf5 drm/i915/execlists: Flush the tasklet on parking
8a37ac514d57 drm/i915: Move the engine->destroy() vfunc onto the engine
82fefd1a02ef drm/i915: Convert inconsistent static engine tables into an init error
2f165b9338cf drm/i915: Make engine_mask & num_engines static
-:72: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'x' - possible side-effects?
#72: FILE: drivers/gpu/drm/i915/i915_pci.c:38:
+#define ENGINES(x) .engine_mask = (x), .num_engines = hweight8(x)

total: 0 errors, 0 warnings, 1 checks, 279 lines checked
ab1288834bb0 drm/i915: Restore control over ppgtt for context creation ABI
-:80: WARNING:LONG_LINE: line over 100 characters
#80: FILE: include/uapi/drm/i915_drm.h:420:
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)

-:81: WARNING:LONG_LINE: line over 100 characters
#81: FILE: include/uapi/drm/i915_drm.h:421:
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)

-:81: WARNING:SPACING: space prohibited between function name and open parenthesis '('
#81: FILE: include/uapi/drm/i915_drm.h:421:
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)

-:81: ERROR:COMPLEX_MACRO: Macros with complex values should be enclosed in parentheses
#81: FILE: include/uapi/drm/i915_drm.h:421:
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)

total: 1 errors, 3 warnings, 0 checks, 64 lines checked
b473bbe31d59 drm/i915: Allow a context to define its set of engines
-:403: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'p' - possible side-effects?
#403: FILE: drivers/gpu/drm/i915/i915_utils.h:107:
+#define check_struct_size(p, member, n, sz) \
+	likely(__check_struct_size(sizeof(*(p)), \
+				   sizeof(*(p)->member) + __must_be_array((p)->member), \
+				   n, sz))

-:403: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects?
#403: FILE: drivers/gpu/drm/i915/i915_utils.h:107:
+#define check_struct_size(p, member, n, sz) \
+	likely(__check_struct_size(sizeof(*(p)), \
+				   sizeof(*(p)->member) + __must_be_array((p)->member), \
+				   n, sz))

-:403: CHECK:MACRO_ARG_PRECEDENCE: Macro argument 'member' may be better as '(member)' to avoid precedence issues
#403: FILE: drivers/gpu/drm/i915/i915_utils.h:107:
+#define check_struct_size(p, member, n, sz) \
+	likely(__check_struct_size(sizeof(*(p)), \
+				   sizeof(*(p)->member) + __must_be_array((p)->member), \
+				   n, sz))

total: 0 errors, 0 warnings, 3 checks, 396 lines checked
5c7f4ce704b1 drm/i915: Re-expose SINGLE_TIMELINE flags for context creation
c01464b0b711 drm/i915: Allow userspace to clone contexts on creation
-:213: ERROR:BRACKET_SPACE: space prohibited before open square bracket '['
#213: FILE: drivers/gpu/drm/i915/i915_gem_context.c:1843:
+#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y

total: 1 errors, 0 warnings, 0 checks, 235 lines checked
98d6ac93376e drm/i915: Load balancing across a virtual engine
-:260: ERROR:SPACING: space prohibited after that open parenthesis '('
#260: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:495:
+	if ( ~prio & ACTIVE_PRIORITY &&

total: 1 errors, 0 warnings, 0 checks, 1276 lines checked
7341983aa331 drm/i915: Apply an execution_mask to the virtual_engine
60b05b785259 drm/i915: Extend execution fence to support a callback
3959f70876c9 drm/i915/execlists: Virtual engine bonding
8876d041268e drm/i915: Allow specification of parallel execbuf
e227c4d30f7f drm/i915: Split GEM object type definition to its own header
-:25: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

-:59: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#59: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:1:
+/*

-:60: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#60: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:2:
+ * SPDX-License-Identifier: MIT

-:177: WARNING:LINE_SPACING: Missing a blank line after declarations
#177: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:119:
+	struct list_head batch_pool_link;
+	I915_SELFTEST_DECLARE(struct list_head st_link);

-:436: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#436: FILE: drivers/gpu/drm/i915/i915_gem_object.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 664 lines checked
236de6708974 drm/i915: Pull GEM ioctls interface to its own file
-:12: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#12: 
new file mode 100644

-:17: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#17: FILE: drivers/gpu/drm/i915/gem/i915_gem_ioctls.h:1:
+/*

-:18: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#18: FILE: drivers/gpu/drm/i915/gem/i915_gem_ioctls.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 3 warnings, 0 checks, 152 lines checked
df1d268673d8 drm/i915: Move object->pages API to i915_gem_object.[ch]
-:38: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#38: 
rename from drivers/gpu/drm/i915/i915_gem_object.c

total: 0 errors, 1 warnings, 0 checks, 339 lines checked
bdca8a1a1d45 drm/i915: Move shmem object setup to its own file
-:413: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#413: 
new file mode 100644

-:418: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#418: FILE: drivers/gpu/drm/i915/gem/i915_gem_shmem.c:1:
+/*

-:419: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#419: FILE: drivers/gpu/drm/i915/gem/i915_gem_shmem.c:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 3 warnings, 0 checks, 2003 lines checked
6ec8b7a69a75 drm/i915: Move phys objects to its own file
-:80: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#80: 
new file mode 100644

-:85: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#85: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:1:
+/*

-:86: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#86: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:2:
+ * SPDX-License-Identifier: MIT

-:185: WARNING:MEMORY_BARRIER: memory barrier without comment
#185: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:101:
+		smp_mb__before_atomic();

-:250: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#250: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:166:
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;

-:376: WARNING:MEMORY_BARRIER: memory barrier without comment
#376: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:292:
+			smp_mb__before_atomic();

-:395: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#395: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:311:
+		ptr = obj->mm.mapping = NULL;

-:601: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#601: FILE: drivers/gpu/drm/i915/gem/i915_gem_phys.c:1:
+/*

-:602: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#602: FILE: drivers/gpu/drm/i915/gem/i915_gem_phys.c:2:
+ * SPDX-License-Identifier: MIT

-:898: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#898: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c:1:
+/*

-:899: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#899: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 9 warnings, 2 checks, 1825 lines checked
572207c2d7d5 drm/i915: Move mmap and friends to its own file
-:25: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

-:30: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#30: FILE: drivers/gpu/drm/i915/gem/i915_gem_mman.c:1:
+/*

-:31: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#31: FILE: drivers/gpu/drm/i915/gem/i915_gem_mman.c:2:
+ * SPDX-License-Identifier: MIT

-:590: WARNING:MEMORY_BARRIER: memory barrier without comment
#590: FILE: drivers/gpu/drm/i915/gem/i915_gem_object.c:400:
+		wmb();

-:633: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#633: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:1:
+/*

-:634: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#634: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:2:
+ * SPDX-License-Identifier: MIT

-:827: WARNING:LONG_LINE: line over 100 characters
#827: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:195:
+			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);

-:946: WARNING:SPACING: space prohibited before semicolon
#946: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:314:
+next_tiling: ;

total: 0 errors, 8 warnings, 0 checks, 2256 lines checked
05d08fd5b328 drm/i915: Move GEM domain management to its own file
-:25: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

-:30: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#30: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:1:
+/*

-:31: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#31: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:2:
+ * SPDX-License-Identifier: MIT

-:113: WARNING:MEMORY_BARRIER: memory barrier without comment
#113: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:84:
+		mb();

-:175: WARNING:MEMORY_BARRIER: memory barrier without comment
#175: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:146:
+		mb();

total: 0 errors, 5 warnings, 0 checks, 1871 lines checked
8803cd7965eb drm/i915: Move more GEM objects under gem/
-:87: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#87: 
rename from drivers/gpu/drm/i915/i915_gem_clflush.c

-:114: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#114: FILE: drivers/gpu/drm/i915/gem/i915_gem_clflush.c:2:
+ * SPDX-License-Identifier: MIT

-:135: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#135: FILE: drivers/gpu/drm/i915/gem/i915_gem_clflush.h:1:
+/*

-:136: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#136: FILE: drivers/gpu/drm/i915/gem/i915_gem_clflush.h:2:
+ * SPDX-License-Identifier: MIT

-:187: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#187: FILE: drivers/gpu/drm/i915/gem/i915_gem_context.c:2:
+ * SPDX-License-Identifier: MIT

-:231: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#231: FILE: drivers/gpu/drm/i915/gem/i915_gem_context.h:2:
+ * SPDX-License-Identifier: MIT

-:262: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#262: FILE: drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c:2:
+ * SPDX-License-Identifier: MIT

-:338: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#338: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2:
+ * SPDX-License-Identifier: MIT

-:394: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#394: FILE: drivers/gpu/drm/i915/gem/i915_gem_internal.c:2:
+ * SPDX-License-Identifier: MIT

-:495: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#495: FILE: drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:2:
+ * SPDX-License-Identifier: MIT

-:544: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#544: FILE: drivers/gpu/drm/i915/gem/i915_gem_stolen.c:2:
+ * SPDX-License-Identifier: MIT

-:592: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#592: FILE: drivers/gpu/drm/i915/gem/i915_gem_tiling.c:2:
+ * SPDX-License-Identifier: MIT

-:640: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#640: FILE: drivers/gpu/drm/i915/gem/i915_gem_userptr.c:2:
+ * SPDX-License-Identifier: MIT

-:691: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#691: FILE: drivers/gpu/drm/i915/gem/i915_gemfs.c:2:
+ * SPDX-License-Identifier: MIT

-:714: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#714: FILE: drivers/gpu/drm/i915/gem/i915_gemfs.h:1:
+/*

-:715: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#715: FILE: drivers/gpu/drm/i915/gem/i915_gemfs.h:2:
+ * SPDX-License-Identifier: MIT

-:759: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#759: FILE: drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c:2:
+ * SPDX-License-Identifier: MIT

-:771: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#771: FILE: drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h:1:
+/*

-:772: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#772: FILE: drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h:2:
+ * SPDX-License-Identifier: MIT

-:827: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#827: FILE: drivers/gpu/drm/i915/gem/selftests/huge_pages.c:2:
+ * SPDX-License-Identifier: MIT

-:879: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#879: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c:2:
+ * SPDX-License-Identifier: MIT

-:922: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#922: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:2:
+ * SPDX-License-Identifier: MIT

-:984: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#984: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:2:
+ * SPDX-License-Identifier: MIT

-:1040: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1040: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c:2:
+ * SPDX-License-Identifier: MIT

-:1108: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1108: FILE: drivers/gpu/drm/i915/gem/selftests/mock_context.c:2:
+ * SPDX-License-Identifier: MIT

-:1125: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1125: FILE: drivers/gpu/drm/i915/gem/selftests/mock_context.h:1:
+/*

-:1126: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1126: FILE: drivers/gpu/drm/i915/gem/selftests/mock_context.h:2:
+ * SPDX-License-Identifier: MIT

-:1178: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1178: FILE: drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c:2:
+ * SPDX-License-Identifier: MIT

-:1190: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1190: FILE: drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h:1:
+/*

-:1191: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1191: FILE: drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h:2:
+ * SPDX-License-Identifier: MIT

-:1221: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1221: FILE: drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h:1:
+/*

-:1222: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#1222: FILE: drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 32 warnings, 0 checks, 1404 lines checked
1ff0c2bc3c2d drm/i915: Pull scatterlist utils out of i915_gem.h
-:372: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#372: 
new file mode 100644

-:377: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#377: FILE: drivers/gpu/drm/i915/i915_scatterlist.c:1:
+/*

-:378: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#378: FILE: drivers/gpu/drm/i915/i915_scatterlist.c:2:
+ * SPDX-License-Identifier: MIT

-:422: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#422: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:1:
+/*

-:423: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#423: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:2:
+ * SPDX-License-Identifier: MIT

-:452: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#452: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:31:
+		s.max = s.curr = s.sgp->offset;

-:497: CHECK:MACRO_ARG_REUSE: Macro argument reuse '__iter' - possible side-effects?
#497: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:76:
+#define __for_each_sgt_dma(__dmap, __iter, __sgt, __step)		\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
+	     ((__dmap) = (__iter).dma + (__iter).curr);			\
+	     (((__iter).curr += (__step)) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)

-:509: CHECK:MACRO_ARG_REUSE: Macro argument reuse '__iter' - possible side-effects?
#509: FILE: drivers/gpu/drm/i915/i915_s

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 19/45] drm/i915: Load balancing across a virtual engine
  2019-04-25  9:19 ` [PATCH 19/45] drm/i915: Load balancing across a virtual engine Chris Wilson
@ 2019-04-25 12:14   ` Tvrtko Ursulin
  2019-04-25 12:23     ` Chris Wilson
  2019-04-29 13:43   ` Tvrtko Ursulin
  1 sibling, 1 reply; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-25 12:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/04/2019 10:19, Chris Wilson wrote:
> Having allowed the user to define a set of engines that they will want
> to only use, we go one step further and allow them to bind those engines
> into a single virtual instance. Submitting a batch to the virtual engine
> will then forward it to any one of the set in a manner as best to
> distribute load.  The virtual engine has a single timeline across all
> engines (it operates as a single queue), so it is not able to concurrently
> run batches across multiple engines by itself; that is left up to the user
> to submit multiple concurrent batches to multiple queues. Multiple users
> will be load balanced across the system.
> 
> The mechanism used for load balancing in this patch is a late greedy
> balancer. When a request is ready for execution, it is added to each
> engine's queue, and when an engine is ready for its next request it
> claims it from the virtual engine. The first engine to do so, wins, i.e.
> the request is executed at the earliest opportunity (idle moment) in the
> system.
> 
> As not all HW is created equal, the user is still able to skip the
> virtual engine and execute the batch on a specific engine, all within the
> same queue. It will then be executed in order on the correct engine,
> with execution on other virtual engines being moved away due to the load
> detection.
> 
> A couple of areas for potential improvement left!
> 
> - The virtual engine always take priority over equal-priority tasks.
> Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
> and hopefully the virtual and real engines are not then congested (i.e.
> all work is via virtual engines, or all work is to the real engine).
> 
> - We require the breadcrumb irq around every virtual engine request. For
> normal engines, we eliminate the need for the slow round trip via
> interrupt by using the submit fence and queueing in order. For virtual
> engines, we have to allow any job to transfer to a new ring, and cannot
> coalesce the submissions, so require the completion fence instead,
> forcing the persistent use of interrupts.
> 
> - We only drip feed single requests through each virtual engine and onto
> the physical engines, even if there was enough work to fill all ELSP,
> leaving small stalls with an idle CS event at the end of every request.
> Could we be greedy and fill both slots? Being lazy is virtuous for load
> distribution on less-than-full workloads though.
> 
> Other areas of improvement are more general, such as reducing lock
> contention, reducing dispatch overhead, looking at direct submission
> rather than bouncing around tasklets etc.
> 
> sseu: Lift the restriction to allow sseu to be reconfigured on virtual
> engines composed of RENDER_CLASS (rcs).
> 
> v2: macroize check_user_mbz()
> v3: Cancel virtual engines on wedging
> v4: Commence commenting
> v5: Replace 64b sibling_mask with a list of class:instance
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c  |   6 +-
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |   8 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 656 ++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/intel_lrc.h          |   9 +
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 180 +++++
>   drivers/gpu/drm/i915/i915_gem.h              |   5 +
>   drivers/gpu/drm/i915/i915_gem_context.c      | 118 +++-
>   drivers/gpu/drm/i915/i915_scheduler.c        |  18 +-
>   drivers/gpu/drm/i915/i915_timeline_types.h   |   1 +
>   include/uapi/drm/i915_drm.h                  |  39 ++
>   10 files changed, 1012 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 4283224249d4..35eef02b3f09 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -290,8 +290,12 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   				break;
>   		}
>   		list_add(&rq->signal_link, pos);
> -		if (pos == &ce->signals) /* catch transitions from empty list */
> +		if (pos == &ce->signals) { /* catch transitions from empty */
>   			list_move_tail(&ce->signal_link, &b->signalers);
> +		} else if (ce->engine != rq->engine) { /* virtualised */
> +			list_move_tail(&ce->signal_link, &b->signalers);
> +			intel_engine_queue_breadcrumbs(rq->engine);
> +		}
>   
>   		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   		spin_unlock(&b->irq_lock);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 9d64e33f8427..b07d3685e2cb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -227,6 +227,7 @@ struct intel_engine_execlists {
>   	 * @queue: queue of requests, in priority lists
>   	 */
>   	struct rb_root_cached queue;
> +	struct rb_root_cached virtual;
>   
>   	/**
>   	 * @csb_write: control register for Context Switch buffer
> @@ -445,6 +446,7 @@ struct intel_engine_cs {
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
>   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
>   #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
> +#define I915_ENGINE_IS_VIRTUAL       BIT(5)
>   	unsigned int flags;
>   
>   	/*
> @@ -534,6 +536,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
>   }
>   
> +static inline bool
> +intel_engine_is_virtual(const struct intel_engine_cs *engine)
> +{
> +	return engine->flags & I915_ENGINE_IS_VIRTUAL;
> +}
> +
>   #define instdone_slice_mask(dev_priv__) \
>   	(IS_GEN(dev_priv__, 7) ? \
>   	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 2e74c792104e..e8faf3417f9c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -136,6 +136,7 @@
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
>   #include "i915_vgpu.h"
> +#include "intel_engine_pm.h"
>   #include "intel_lrc_reg.h"
>   #include "intel_mocs.h"
>   #include "intel_reset.h"
> @@ -165,6 +166,41 @@
>   
>   #define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
>   
> +struct virtual_engine {
> +	struct intel_engine_cs base;
> +	struct intel_context context;
> +
> +	/*
> +	 * We allow only a single request through the virtual engine at a time
> +	 * (each request in the timeline waits for the completion fence of
> +	 * the previous before being submitted). By restricting ourselves to
> +	 * only submitting a single request, each request is placed on to a
> +	 * physical to maximise load spreading (by virtue of the late greedy
> +	 * scheduling -- each real engine takes the next available request
> +	 * upon idling).
> +	 */
> +	struct i915_request *request;
> +
> +	/*
> +	 * We keep a rbtree of available virtual engines inside each physical
> +	 * engine, sorted by priority. Here we preallocate the nodes we need
> +	 * for the virtual engine, indexed by physical_engine->id.
> +	 */
> +	struct ve_node {
> +		struct rb_node rb;
> +		int prio;
> +	} nodes[I915_NUM_ENGINES];
> +
> +	/* And finally, which physical engines this virtual engine maps onto. */
> +	unsigned int num_siblings;
> +	struct intel_engine_cs *siblings[0];
> +};
> +
> +static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
> +{
> +	return container_of(engine, struct virtual_engine, base);
> +}
> +
>   static int execlists_context_deferred_alloc(struct intel_context *ce,
>   					    struct intel_engine_cs *engine);
>   static void execlists_init_reg_state(u32 *reg_state,
> @@ -228,7 +264,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
>   }
>   
>   static inline bool need_preempt(const struct intel_engine_cs *engine,
> -				const struct i915_request *rq)
> +				const struct i915_request *rq,
> +				struct rb_node *rb)
>   {
>   	int last_prio;
>   
> @@ -263,6 +300,25 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>   	    rq_prio(list_next_entry(rq, link)) > last_prio)
>   		return true;
>   
> +	if (rb) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		bool preempt = false;
> +
> +		if (engine == ve->siblings[0]) { /* only preempt one sibling */
> +			struct i915_request *next;
> +
> +			rcu_read_lock();
> +			next = READ_ONCE(ve->request);
> +			if (next)
> +				preempt = rq_prio(next) > last_prio;
> +			rcu_read_unlock();
> +		}
> +
> +		if (preempt)
> +			return preempt;
> +	}
> +
>   	/*
>   	 * If the inflight context did not trigger the preemption, then maybe
>   	 * it was the set of queued requests? Pick the highest priority in
> @@ -381,6 +437,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   	list_for_each_entry_safe_reverse(rq, rn,
>   					 &engine->timeline.requests,
>   					 link) {
> +		struct intel_engine_cs *owner;
> +
>   		if (i915_request_completed(rq))
>   			break;
>   
> @@ -389,16 +447,32 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   
>   		GEM_BUG_ON(rq->hw_context->active);
>   
> -		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> -		if (rq_prio(rq) != prio) {
> -			prio = rq_prio(rq);
> -			pl = i915_sched_lookup_priolist(engine, prio);
> -		}
> -		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> +		/*
> +		 * Push the request back into the queue for later resubmission.
> +		 * If this request is not native to this physical engine (i.e.
> +		 * it came from a virtual source), push it back onto the virtual
> +		 * engine so that it can be moved across onto another physical
> +		 * engine as load dictates.
> +		 */
> +		owner = rq->hw_context->engine;
> +		if (likely(owner == engine)) {
> +			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> +			if (rq_prio(rq) != prio) {
> +				prio = rq_prio(rq);
> +				pl = i915_sched_lookup_priolist(engine, prio);
> +			}
> +			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
> -		list_add(&rq->sched.link, pl);
> +			list_add(&rq->sched.link, pl);
> +			active = rq;
> +		} else {
> +			if (__i915_request_has_started(rq))
> +				rq->sched.attr.priority |= ACTIVE_PRIORITY;
>   
> -		active = rq;
> +			rq->engine = owner;
> +			owner->submit_request(rq);
> +			active = NULL;
> +		}
>   	}
>   
>   	/*
> @@ -418,7 +492,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   	 * in the priority queue, but they will not gain immediate access to
>   	 * the GPU.
>   	 */
> -	if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
> +	if ( ~prio & ACTIVE_PRIORITY &&
> +	    active && __i915_request_has_started(active)) {
>   		prio |= ACTIVE_PRIORITY;
>   		active->sched.attr.priority = prio;
>   		list_move_tail(&active->sched.link,
> @@ -658,6 +733,72 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
>   						  execlists));
>   }
>   
> +static void virtual_update_register_offsets(u32 *regs,
> +					    struct intel_engine_cs *engine)
> +{
> +	u32 base = engine->mmio_base;
> +
> +	regs[CTX_CONTEXT_CONTROL] =
> +		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
> +	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
> +	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
> +	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
> +	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
> +
> +	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
> +	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
> +	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
> +	regs[CTX_SECOND_BB_HEAD_U] =
> +		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
> +	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
> +	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
> +
> +	regs[CTX_CTX_TIMESTAMP] =
> +		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
> +	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
> +	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
> +	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
> +	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
> +	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
> +	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
> +	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
> +	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
> +
> +	if (engine->class == RENDER_CLASS) {
> +		regs[CTX_RCS_INDIRECT_CTX] =
> +			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
> +		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
> +			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
> +		regs[CTX_BB_PER_CTX_PTR] =
> +			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
> +
> +		regs[CTX_R_PWR_CLK_STATE] =
> +			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> +	}
> +}
> +
> +static bool virtual_matches(const struct virtual_engine *ve,
> +			    const struct i915_request *rq,
> +			    const struct intel_engine_cs *engine)
> +{
> +	const struct intel_engine_cs *active;
> +
> +	/*
> +	 * We track when the HW has completed saving the context image
> +	 * (i.e. when we have seen the final CS event switching out of
> +	 * the context) and must not overwrite the context image before
> +	 * then. This restricts us to only using the active engine
> +	 * while the previous virtualized request is inflight (so
> +	 * we reuse the register offsets). This is a very small
> +	 * hystersis on the greedy seelction algorithm.
> +	 */
> +	active = READ_ONCE(ve->context.active);
> +	if (active && active != engine)
> +		return false;
> +
> +	return true;
> +}
> +
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -690,6 +831,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * and context switches) submission.
>   	 */
>   
> +	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq = READ_ONCE(ve->request);
> +
> +		if (!rq) { /* lazily cleanup after another engine handled rq */
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +			rb = rb_first_cached(&execlists->virtual);
> +			continue;
> +		}
> +
> +		if (!virtual_matches(ve, rq, engine)) {
> +			rb = rb_next(rb);
> +			continue;
> +		}
> +
> +		break;
> +	}
> +
>   	if (last) {
>   		/*
>   		 * Don't resubmit or switch until all outstanding
> @@ -711,7 +872,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
>   			return;
>   
> -		if (need_preempt(engine, last)) {
> +		if (need_preempt(engine, last, rb)) {
>   			inject_preempt_context(engine);
>   			return;
>   		}
> @@ -751,6 +912,89 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		last->tail = last->wa_tail;
>   	}
>   
> +	while (rb) { /* XXX virtual is always taking precedence */
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq;
> +
> +		spin_lock(&ve->base.timeline.lock);
> +
> +		rq = ve->request;
> +		if (unlikely(!rq)) { /* lost the race to a sibling */
> +			spin_unlock(&ve->base.timeline.lock);
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +			rb = rb_first_cached(&execlists->virtual);
> +			continue;
> +		}
> +
> +		GEM_BUG_ON(rq != ve->request);
> +		GEM_BUG_ON(rq->engine != &ve->base);
> +		GEM_BUG_ON(rq->hw_context != &ve->context);
> +
> +		if (rq_prio(rq) >= queue_prio(execlists)) {
> +			if (!virtual_matches(ve, rq, engine)) {
> +				spin_unlock(&ve->base.timeline.lock);
> +				rb = rb_next(rb);
> +				continue;
> +			}
> +
> +			if (last && !can_merge_rq(last, rq)) {
> +				spin_unlock(&ve->base.timeline.lock);
> +				return; /* leave this rq for another engine */
> +			}
> +
> +			GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
> +				  engine->name,
> +				  rq->fence.context,
> +				  rq->fence.seqno,
> +				  i915_request_completed(rq) ? "!" :
> +				  i915_request_started(rq) ? "*" :
> +				  "",
> +				  yesno(engine != ve->siblings[0]));
> +
> +			ve->request = NULL;
> +			ve->base.execlists.queue_priority_hint = INT_MIN;
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +
> +			rq->engine = engine;
> +
> +			if (engine != ve->siblings[0]) {
> +				u32 *regs = ve->context.lrc_reg_state;
> +				unsigned int n;
> +
> +				GEM_BUG_ON(READ_ONCE(ve->context.active));
> +				virtual_update_register_offsets(regs, engine);
> +
> +				/*
> +				 * Move the bound engine to the top of the list
> +				 * for future execution. We then kick this
> +				 * tasklet first before checking others, so that
> +				 * we preferentially reuse this set of bound
> +				 * registers.
> +				 */
> +				for (n = 1; n < ve->num_siblings; n++) {
> +					if (ve->siblings[n] == engine) {
> +						swap(ve->siblings[n],
> +						     ve->siblings[0]);
> +						break;
> +					}
> +				}
> +
> +				GEM_BUG_ON(ve->siblings[0] != engine);
> +			}
> +
> +			__i915_request_submit(rq);
> +			trace_i915_request_in(rq, port_index(port, execlists));
> +			submit = true;
> +			last = rq;
> +		}
> +
> +		spin_unlock(&ve->base.timeline.lock);
> +		break;
> +	}
> +
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
> @@ -1874,6 +2118,25 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
>   			       &execlists->csb_status[reset_value]);
>   }
>   
> +static struct i915_request *active_request(struct i915_request *rq)
> +{
> +	const struct list_head * const list = &rq->engine->timeline.requests;
> +	const struct intel_context * const context = rq->hw_context;
> +	struct i915_request *active = NULL;
> +
> +	list_for_each_entry_from_reverse(rq, list, link) {
> +		if (i915_request_completed(rq))
> +			break;
> +
> +		if (rq->hw_context != context)
> +			break;
> +
> +		active = rq;
> +	}
> +
> +	return active;
> +}
> +
>   static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -1894,7 +2157,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	if (!port_isset(execlists->port))
>   		goto out_clear;
>   
> -	ce = port_request(execlists->port)->hw_context;
> +	rq = port_request(execlists->port);
> +	ce = rq->hw_context;
>   
>   	/*
>   	 * Catch up with any missed context-switch interrupts.
> @@ -1907,16 +2171,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	 */
>   	execlists_cancel_port_requests(execlists);
>   
> -	/* Push back any incomplete requests for replay after the reset. */
> -	rq = __unwind_incomplete_requests(engine);
> +	rq = active_request(rq);
>   	if (!rq)
>   		goto out_replay;
>   
> -	if (rq->hw_context != ce) { /* caught just before a CS event */
> -		rq = NULL;
> -		goto out_replay;
> -	}
> -
>   	/*
>   	 * If this request hasn't started yet, e.g. it is waiting on a
>   	 * semaphore, we need to avoid skipping the request or else we
> @@ -1963,13 +2221,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	}
>   	execlists_init_reg_state(regs, ce, engine, ce->ring);
>   
> -	/* Rerun the request; its payload has been neutered (if guilty). */
>   out_replay:
> +	/* Rerun the request; its payload has been neutered (if guilty). */
>   	ce->ring->head =
>   		rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
>   	intel_ring_update_space(ce->ring);
>   	__execlists_update_reg_state(ce, engine);
>   
> +	/* Push back any incomplete requests for replay after the reset. */
> +	__unwind_incomplete_requests(engine);
> +
>   out_clear:
>   	execlists_clear_all_active(execlists);
>   }
> @@ -2043,6 +2304,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   		i915_priolist_free(p);
>   	}
>   
> +	/* Cancel all attached virtual engines */
> +	while ((rb = rb_first_cached(&execlists->virtual))) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +
> +		rb_erase_cached(rb, &execlists->virtual);
> +		RB_CLEAR_NODE(rb);
> +
> +		spin_lock(&ve->base.timeline.lock);
> +		if (ve->request) {
> +			ve->request->engine = engine;
> +			__i915_request_submit(ve->request);
> +			dma_fence_set_error(&ve->request->fence, -EIO);
> +			i915_request_mark_complete(ve->request);
> +			ve->base.execlists.queue_priority_hint = INT_MIN;
> +			ve->request = NULL;
> +		}
> +		spin_unlock(&ve->base.timeline.lock);
> +	}
> +
>   	/* Remaining _unready_ requests will be nop'ed when submitted */
>   
>   	execlists->queue_priority_hint = INT_MIN;
> @@ -2760,6 +3041,318 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
>   	return ret;
>   }
>   
> +static void virtual_context_destroy(struct kref *kref)
> +{
> +	struct virtual_engine *ve =
> +		container_of(kref, typeof(*ve), context.ref);
> +	unsigned int n;
> +
> +	GEM_BUG_ON(ve->request);
> +	GEM_BUG_ON(ve->context.active);
> +
> +	for (n = 0; n < ve->num_siblings; n++) {
> +		struct intel_engine_cs *sibling = ve->siblings[n];
> +		struct rb_node *node = &ve->nodes[sibling->id].rb;
> +
> +		if (RB_EMPTY_NODE(node))
> +			continue;
> +
> +		spin_lock_irq(&sibling->timeline.lock);
> +
> +		if (!RB_EMPTY_NODE(node))
> +			rb_erase_cached(node, &sibling->execlists.virtual);
> +
> +		spin_unlock_irq(&sibling->timeline.lock);
> +	}
> +	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
> +
> +	if (ve->context.state)
> +		__execlists_context_fini(&ve->context);
> +
> +	i915_timeline_fini(&ve->base.timeline);
> +	kfree(ve);
> +}
> +
> +static void virtual_engine_initial_hint(struct virtual_engine *ve)
> +{
> +	int swp;
> +
> +	/*
> +	 * Pick a random sibling on starting to help spread the load around.
> +	 *
> +	 * New contexts are typically created with exactly the same order
> +	 * of siblings, and often started in batches. Due to the way we iterate
> +	 * the array of sibling when submitting requests, sibling[0] is
> +	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
> +	 * randomised across the system, we also help spread the load by the
> +	 * first engine we inspect being different each time.
> +	 *
> +	 * NB This does not force us to execute on this engine, it will just
> +	 * typically be the first we inspect for submission.
> +	 */
> +	swp = prandom_u32_max(ve->num_siblings);
> +	if (!swp)
> +		return;
> +
> +	swap(ve->siblings[swp], ve->siblings[0]);
> +	virtual_update_register_offsets(ve->context.lrc_reg_state,
> +					ve->siblings[0]);
> +}
> +
> +static int virtual_context_pin(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +	int err;
> +
> +	/* Note: we must use a real engine class for setting up reg state */
> +	err = __execlists_context_pin(ce, ve->siblings[0]);
> +	if (err)
> +		return err;
> +
> +	virtual_engine_initial_hint(ve);
> +	return 0;
> +}
> +
> +static void virtual_context_enter(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +	unsigned int n;
> +
> +	for (n = 0; n < ve->num_siblings; n++)
> +		intel_engine_pm_get(ve->siblings[n]);
> +}
> +
> +static void virtual_context_exit(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +	unsigned int n;
> +
> +	for (n = 0; n < ve->num_siblings; n++)
> +		intel_engine_pm_put(ve->siblings[n]);
> +}
> +
> +static const struct intel_context_ops virtual_context_ops = {
> +	.pin = virtual_context_pin,
> +	.unpin = execlists_context_unpin,
> +
> +	.enter = virtual_context_enter,
> +	.exit = virtual_context_exit,
> +
> +	.destroy = virtual_context_destroy,
> +};
> +
> +static void virtual_submission_tasklet(unsigned long data)
> +{
> +	struct virtual_engine * const ve = (struct virtual_engine *)data;
> +	const int prio = ve->base.execlists.queue_priority_hint;
> +	unsigned int n;
> +
> +	local_irq_disable();
> +	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
> +		struct intel_engine_cs *sibling = ve->siblings[n];
> +		struct ve_node * const node = &ve->nodes[sibling->id];
> +		struct rb_node **parent, *rb;
> +		bool first;
> +
> +		spin_lock(&sibling->timeline.lock);
> +
> +		if (!RB_EMPTY_NODE(&node->rb)) {
> +			/*
> +			 * Cheat and avoid rebalancing the tree if we can
> +			 * reuse this node in situ.
> +			 */
> +			first = rb_first_cached(&sibling->execlists.virtual) ==
> +				&node->rb;
> +			if (prio == node->prio || (prio > node->prio && first))
> +				goto submit_engine;
> +
> +			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
> +		}
> +
> +		rb = NULL;
> +		first = true;
> +		parent = &sibling->execlists.virtual.rb_root.rb_node;
> +		while (*parent) {
> +			struct ve_node *other;
> +
> +			rb = *parent;
> +			other = rb_entry(rb, typeof(*other), rb);
> +			if (prio > other->prio) {
> +				parent = &rb->rb_left;
> +			} else {
> +				parent = &rb->rb_right;
> +				first = false;
> +			}
> +		}
> +
> +		rb_link_node(&node->rb, rb, parent);
> +		rb_insert_color_cached(&node->rb,
> +				       &sibling->execlists.virtual,
> +				       first);
> +
> +submit_engine:
> +		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
> +		node->prio = prio;
> +		if (first && prio > sibling->execlists.queue_priority_hint) {
> +			sibling->execlists.queue_priority_hint = prio;
> +			tasklet_hi_schedule(&sibling->execlists.tasklet);
> +		}
> +
> +		spin_unlock(&sibling->timeline.lock);
> +	}
> +	local_irq_enable();
> +}
> +
> +static void virtual_submit_request(struct i915_request *rq)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(rq->engine);
> +
> +	GEM_TRACE("%s: rq=%llx:%lld\n",
> +		  ve->base.name,
> +		  rq->fence.context,
> +		  rq->fence.seqno);
> +
> +	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
> +
> +	GEM_BUG_ON(ve->request);
> +	ve->base.execlists.queue_priority_hint = rq_prio(rq);
> +	WRITE_ONCE(ve->request, rq);
> +
> +	tasklet_schedule(&ve->base.execlists.tasklet);
> +}
> +
> +struct intel_context *
> +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int count)
> +{
> +	struct virtual_engine *ve;
> +	unsigned int n;
> +	int err;
> +
> +	if (count == 0)
> +		return ERR_PTR(-EINVAL);
> +
> +	if (count == 1)
> +		return intel_context_create(ctx, siblings[0]);
> +
> +	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
> +	if (!ve)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ve->base.i915 = ctx->i915;
> +	ve->base.id = -1;
> +	ve->base.class = OTHER_CLASS;
> +	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
> +	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
> +	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
> +
> +	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
> +
> +	err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
> +	if (err)
> +		goto err_put;
> +	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
> +
> +	intel_engine_init_execlists(&ve->base);
> +
> +	ve->base.cops = &virtual_context_ops;
> +	ve->base.request_alloc = execlists_request_alloc;
> +
> +	ve->base.schedule = i915_schedule;
> +	ve->base.submit_request = virtual_submit_request;
> +
> +	ve->base.execlists.queue_priority_hint = INT_MIN;
> +	tasklet_init(&ve->base.execlists.tasklet,
> +		     virtual_submission_tasklet,
> +		     (unsigned long)ve);
> +
> +	intel_context_init(&ve->context, ctx, &ve->base);
> +
> +	for (n = 0; n < count; n++) {
> +		struct intel_engine_cs *sibling = siblings[n];
> +
> +		GEM_BUG_ON(!is_power_of_2(sibling->mask));
> +		if (sibling->mask & ve->base.mask) {
> +			DRM_DEBUG("duplicate %s entry in load balancer\n",
> +				  sibling->name);
> +			err = -EINVAL;
> +			goto err_put;
> +		}
> +
> +		/*
> +		 * The virtual engine implementation is tightly coupled to
> +		 * the execlists backend -- we push out request directly
> +		 * into a tree inside each physical engine. We could support
> +		 * layering if we handle cloning of the requests and
> +		 * submitting a copy into each backend.
> +		 */
> +		if (sibling->execlists.tasklet.func !=
> +		    execlists_submission_tasklet) {
> +			err = -ENODEV;
> +			goto err_put;
> +		}
> +
> +		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
> +		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
> +
> +		ve->siblings[ve->num_siblings++] = sibling;
> +		ve->base.mask |= sibling->mask;
> +
> +		/*
> +		 * All physical engines must be compatible for their emission
> +		 * functions (as we build the instructions during request
> +		 * construction and do not alter them before submission
> +		 * on the physical engine). We use the engine class as a guide
> +		 * here, although that could be refined.
> +		 */
> +		if (ve->base.class != OTHER_CLASS) {
> +			if (ve->base.class != sibling->class) {
> +				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> +					  sibling->class, ve->base.class);
> +				err = -EINVAL;
> +				goto err_put;
> +			}
> +			continue;
> +		}
> +
> +		ve->base.class = sibling->class;
> +		ve->base.uabi_class = sibling->uabi_class;
> +		snprintf(ve->base.name, sizeof(ve->base.name),
> +			 "v%dx%d", ve->base.class, count);
> +		ve->base.context_size = sibling->context_size;
> +
> +		ve->base.emit_bb_start = sibling->emit_bb_start;
> +		ve->base.emit_flush = sibling->emit_flush;
> +		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
> +		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
> +		ve->base.emit_fini_breadcrumb_dw =
> +			sibling->emit_fini_breadcrumb_dw;
> +	}
> +
> +	return &ve->context;
> +
> +err_put:
> +	intel_context_put(&ve->context);
> +	return ERR_PTR(err);
> +}
> +
> +struct intel_context *
> +intel_execlists_clone_virtual(struct i915_gem_context *ctx,
> +			      struct intel_engine_cs *src)
> +{
> +	struct virtual_engine *se = to_virtual_engine(src);
> +	struct intel_context *dst;
> +
> +	dst = intel_execlists_create_virtual(ctx,
> +					     se->siblings,
> +					     se->num_siblings);
> +	if (IS_ERR(dst))
> +		return dst;
> +
> +	return dst;
> +}
> +
>   void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   				   struct drm_printer *m,
>   				   void (*show_request)(struct drm_printer *m,
> @@ -2817,6 +3410,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   		show_request(m, last, "\t\tQ ");
>   	}
>   
> +	last = NULL;
> +	count = 0;
> +	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq = READ_ONCE(ve->request);
> +
> +		if (rq) {
> +			if (count++ < max - 1)
> +				show_request(m, rq, "\t\tV ");
> +			else
> +				last = rq;
> +		}
> +	}
> +	if (last) {
> +		if (count > max) {
> +			drm_printf(m,
> +				   "\t\t...skipping %d virtual requests...\n",
> +				   count - max);
> +		}
> +		show_request(m, last, "\t\tV ");
> +	}
> +
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index a0dc907a7249..5530606052e5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -114,4 +114,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   							const char *prefix),
>   				   unsigned int max);
>   
> +struct intel_context *
> +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int count);
> +
> +struct intel_context *
> +intel_execlists_clone_virtual(struct i915_gem_context *ctx,
> +			      struct intel_engine_cs *src);
> +
>   #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 6f223f7facde..558399e5c7bb 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1301,6 +1301,185 @@ static int live_preempt_smoke(void *arg)
>   	return err;
>   }
>   
> +static int nop_virtual_engine(struct drm_i915_private *i915,
> +			      struct intel_engine_cs **siblings,
> +			      unsigned int nsibling,
> +			      unsigned int nctx,
> +			      unsigned int flags)
> +#define CHAIN BIT(0)
> +{
> +	IGT_TIMEOUT(end_time);
> +	struct i915_request *request[16];
> +	struct i915_gem_context *ctx[16];
> +	struct intel_context *ve[16];
> +	unsigned long n, prime, nc;
> +	struct igt_live_test t;
> +	ktime_t times[2] = {};
> +	int err;
> +
> +	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
> +
> +	for (n = 0; n < nctx; n++) {
> +		ctx[n] = kernel_context(i915);
> +		if (!ctx[n]) {
> +			err = -ENOMEM;
> +			nctx = n;
> +			goto out;
> +		}
> +
> +		ve[n] = intel_execlists_create_virtual(ctx[n],
> +						       siblings, nsibling);
> +		if (IS_ERR(ve[n])) {
> +			kernel_context_close(ctx[n]);
> +			err = PTR_ERR(ve[n]);
> +			nctx = n;
> +			goto out;
> +		}
> +
> +		err = intel_context_pin(ve[n]);
> +		if (err) {
> +			intel_context_put(ve[n]);
> +			kernel_context_close(ctx[n]);
> +			nctx = n;
> +			goto out;
> +		}
> +	}
> +
> +	err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
> +	if (err)
> +		goto out;
> +
> +	for_each_prime_number_from(prime, 1, 8192) {
> +		times[1] = ktime_get_raw();
> +
> +		if (flags & CHAIN) {
> +			for (nc = 0; nc < nctx; nc++) {
> +				for (n = 0; n < prime; n++) {
> +					request[nc] =
> +						i915_request_create(ve[nc]);
> +					if (IS_ERR(request[nc])) {
> +						err = PTR_ERR(request[nc]);
> +						goto out;
> +					}
> +
> +					i915_request_add(request[nc]);
> +				}
> +			}
> +		} else {
> +			for (n = 0; n < prime; n++) {
> +				for (nc = 0; nc < nctx; nc++) {
> +					request[nc] =
> +						i915_request_create(ve[nc]);
> +					if (IS_ERR(request[nc])) {
> +						err = PTR_ERR(request[nc]);
> +						goto out;
> +					}
> +
> +					i915_request_add(request[nc]);
> +				}
> +			}
> +		}
> +
> +		for (nc = 0; nc < nctx; nc++) {
> +			if (i915_request_wait(request[nc],
> +					      I915_WAIT_LOCKED,
> +					      HZ / 10) < 0) {
> +				pr_err("%s(%s): wait for %llx:%lld timed out\n",
> +				       __func__, ve[0]->engine->name,
> +				       request[nc]->fence.context,
> +				       request[nc]->fence.seqno);
> +
> +				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
> +					  __func__, ve[0]->engine->name,
> +					  request[nc]->fence.context,
> +					  request[nc]->fence.seqno);
> +				GEM_TRACE_DUMP();
> +				i915_gem_set_wedged(i915);
> +				break;
> +			}
> +		}
> +
> +		times[1] = ktime_sub(ktime_get_raw(), times[1]);
> +		if (prime == 1)
> +			times[0] = times[1];
> +
> +		if (__igt_timeout(end_time, NULL))
> +			break;
> +	}
> +
> +	err = igt_live_test_end(&t);
> +	if (err)
> +		goto out;
> +
> +	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
> +		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
> +		prime, div64_u64(ktime_to_ns(times[1]), prime));
> +
> +out:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	for (nc = 0; nc < nctx; nc++) {
> +		intel_context_unpin(ve[nc]);
> +		intel_context_put(ve[nc]);
> +		kernel_context_close(ctx[nc]);
> +	}
> +	return err;
> +}
> +
> +static int live_virtual_engine(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	unsigned int class, inst;
> +	int err = -ENODEV;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for_each_engine(engine, i915, id) {
> +		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
> +		if (err) {
> +			pr_err("Failed to wrap engine %s: err=%d\n",
> +			       engine->name, err);
> +			goto out_unlock;
> +		}
> +	}
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		int nsibling, n;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;

Continue I think, to be able to use vcs0 + vcs2 on Icelake.

> +
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;

Could test single, no?

> +
> +		for (n = 1; n <= nsibling + 1; n++) {
> +			err = nop_virtual_engine(i915, siblings, nsibling,
> +						 n, 0);
> +			if (err)
> +				goto out_unlock;
> +		}
> +
> +		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
> +		if (err)
> +			goto out_unlock;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1313,6 +1492,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_chain_preempt),
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
> +		SUBTEST(live_virtual_engine),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 67f8a4a807a0..fe82d3571072 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -91,4 +91,9 @@ static inline bool __tasklet_enable(struct tasklet_struct *t)
>   	return atomic_dec_and_test(&t->count);
>   }
>   
> +static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
> +{
> +	return test_bit(TASKLET_STATE_SCHED, &t->state);
> +}
> +
>   #endif /* __I915_GEM_H__ */
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index ba7582d955d1..57b09f624bb4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -86,6 +86,7 @@
>    */
>   
>   #include <linux/log2.h>
> +#include <linux/nospec.h>
>   
>   #include <drm/i915_drm.h>
>   
> @@ -1209,7 +1210,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
>   	int ret;
>   
>   	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
> -	GEM_BUG_ON(ce->engine->id != RCS0);
>   
>   	ret = intel_context_lock_pinned(ce);
>   	if (ret)
> @@ -1397,7 +1397,102 @@ struct set_engines {
>   	struct i915_gem_engines *engines;
>   };
>   
> +static int
> +set_engines__load_balance(struct i915_user_extension __user *base, void *data)
> +{
> +	struct i915_context_engines_load_balance __user *ext =
> +		container_of_user(base, typeof(*ext), base);
> +	const struct set_engines *set = data;
> +	struct intel_engine_cs *stack[16];
> +	struct intel_engine_cs **siblings;
> +	struct intel_context *ce;
> +	u16 num_siblings, idx;
> +	unsigned int n;
> +	int err;
> +
> +	if (!HAS_EXECLISTS(set->ctx->i915))
> +		return -ENODEV;
> +
> +	if (USES_GUC_SUBMISSION(set->ctx->i915))
> +		return -ENODEV; /* not implement yet */
> +
> +	if (get_user(idx, &ext->engine_index))
> +		return -EFAULT;
> +
> +	if (idx >= set->engines->num_engines) {
> +		DRM_DEBUG("Invalid placement value, %d >= %d\n",
> +			  idx, set->engines->num_engines);
> +		return -EINVAL;
> +	}
> +
> +	idx = array_index_nospec(idx, set->engines->num_engines);
> +	if (set->engines->engines[idx]) {
> +		DRM_DEBUG("Invalid placement[%d], already occupied\n", idx);
> +		return -EEXIST;
> +	}
> +
> +	if (get_user(num_siblings, &ext->num_siblings))
> +		return -EFAULT;
> +
> +	err = check_user_mbz(&ext->flags);
> +	if (err)
> +		return err;
> +
> +	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> +		err = check_user_mbz(&ext->mbz64[n]);
> +		if (err)
> +			return err;
> +	}
> +
> +	siblings = stack;
> +	if (num_siblings > ARRAY_SIZE(stack)) {
> +		siblings = kmalloc_array(num_siblings,
> +					 sizeof(*siblings),
> +					 GFP_KERNEL);
> +		if (!siblings)
> +			return -ENOMEM;
> +	}
> +
> +	for (n = 0; n < num_siblings; n++) {
> +		struct i915_engine_class_instance ci;
> +
> +		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
> +			err = -EFAULT;
> +			goto out_siblings;
> +		}
> +
> +		siblings[n] = intel_engine_lookup_user(set->ctx->i915,
> +						       ci.engine_class,
> +						       ci.engine_instance);
> +		if (!siblings[n]) {
> +			DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n",
> +				  n, ci.engine_class, ci.engine_instance);
> +			err = -EINVAL;
> +			goto out_siblings;
> +		}
> +	}
> +
> +	ce = intel_execlists_create_virtual(set->ctx, siblings, n);
> +	if (IS_ERR(ce)) {
> +		err = PTR_ERR(ce);
> +		goto out_siblings;
> +	}
> +
> +	if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
> +		intel_context_put(ce);
> +		err = -EEXIST;
> +		goto out_siblings;
> +	}
> +
> +out_siblings:
> +	if (siblings != stack)
> +		kfree(siblings);
> +
> +	return err;
> +}
> +
>   static const i915_user_extension_fn set_engines__extensions[] = {
> +	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
>   };
>   
>   static int
> @@ -1696,14 +1791,29 @@ static int clone_engines(struct i915_gem_context *dst,
>   
>   	clone->i915 = dst->i915;
>   	for (n = 0; n < e->num_engines; n++) {
> +		struct intel_engine_cs *engine;
> +
>   		if (!e->engines[n]) {
>   			clone->engines[n] = NULL;
>   			continue;
>   		}
> +		engine = e->engines[n]->engine;
>   
> -		clone->engines[n] =
> -			intel_context_create(dst, e->engines[n]->engine);
> -		if (!clone->engines[n]) {
> +		/*
> +		 * Virtual engines are singletons; they can only exist
> +		 * inside a single context, because they embed their
> +		 * HW context... As each virtual context implies a single
> +		 * timeline (each engine can only dequeue a single request
> +		 * at any time), it would be surprising for two contexts
> +		 * to use the same engine. So let's create a copy of
> +		 * the virtual engine instead.
> +		 */
> +		if (intel_engine_is_virtual(engine))
> +			clone->engines[n] =
> +				intel_execlists_clone_virtual(dst, engine);
> +		else
> +			clone->engines[n] = intel_context_create(dst, engine);
> +		if (IS_ERR_OR_NULL(clone->engines[n])) {
>   			__free_engines(clone, n);
>   			goto err_unlock;
>   		}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 39bc4f54e272..b58d9c23a876 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -248,17 +248,26 @@ sched_lock_engine(const struct i915_sched_node *node,
>   		  struct intel_engine_cs *locked,
>   		  struct sched_cache *cache)
>   {
> -	struct intel_engine_cs *engine = node_to_request(node)->engine;
> +	const struct i915_request *rq = node_to_request(node);
> +	struct intel_engine_cs *engine;
>   
>   	GEM_BUG_ON(!locked);
>   
> -	if (engine != locked) {
> +	/*
> +	 * Virtual engines complicate acquiring the engine timeline lock,
> +	 * as their rq->engine pointer is not stable until under that
> +	 * engine lock. The simple ploy we use is to take the lock then
> +	 * check that the rq still belongs to the newly locked engine.
> +	 */
> +	while (locked != (engine = READ_ONCE(rq->engine))) {
>   		spin_unlock(&locked->timeline.lock);
>   		memset(cache, 0, sizeof(*cache));
>   		spin_lock(&engine->timeline.lock);
> +		locked = engine;
>   	}
>   
> -	return engine;
> +	GEM_BUG_ON(locked != engine);
> +	return locked;
>   }
>   
>   static bool inflight(const struct i915_request *rq,
> @@ -371,8 +380,11 @@ static void __i915_schedule(struct i915_request *rq,
>   		if (prio <= node->attr.priority || node_signaled(node))
>   			continue;
>   
> +		GEM_BUG_ON(node_to_request(node)->engine != engine);
> +
>   		node->attr.priority = prio;
>   		if (!list_empty(&node->link)) {
> +			GEM_BUG_ON(intel_engine_is_virtual(engine));
>   			if (!cache.priolist)
>   				cache.priolist =
>   					i915_sched_lookup_priolist(engine,
> diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
> index 5256a0b5c5f7..1688705f4a2b 100644
> --- a/drivers/gpu/drm/i915/i915_timeline_types.h
> +++ b/drivers/gpu/drm/i915/i915_timeline_types.h
> @@ -26,6 +26,7 @@ struct i915_timeline {
>   	spinlock_t lock;
>   #define TIMELINE_CLIENT 0 /* default subclass */
>   #define TIMELINE_ENGINE 1
> +#define TIMELINE_VIRTUAL 2
>   	struct mutex mutex; /* protects the flow of requests */
>   
>   	unsigned int pin_count;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7694113362d4..ff2ababc0984 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -137,6 +137,7 @@ struct i915_engine_class_instance {
>   	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
>   	__u16 engine_instance;
>   #define I915_ENGINE_CLASS_INVALID_NONE -1
> +#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
>   };
>   
>   /**
> @@ -1607,8 +1608,46 @@ struct drm_i915_gem_context_param_sseu {
>   	__u32 rsvd;
>   };
>   
> +/*
> + * i915_context_engines_load_balance:
> + *
> + * Enable load balancing across this set of engines.
> + *
> + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
> + * used will proxy the execbuffer request onto one of the set of engines
> + * in such a way as to distribute the load evenly across the set.
> + *
> + * The set of engines must be compatible (e.g. the same HW class) as they
> + * will share the same logical GPU context and ring.
> + *
> + * To intermix rendering with the virtual engine and direct rendering onto
> + * the backing engines (bypassing the load balancing proxy), the context must
> + * be defined to use a single timeline for all engines.
> + */
> +struct i915_context_engines_load_balance {
> +	struct i915_user_extension base;
> +
> +	__u16 engine_index;
> +	__u16 num_siblings;
> +	__u32 flags; /* all undefined flags must be zero */
> +
> +	__u64 mbz64[1]; /* reserved for future use; must be zero */

Why array of one was a serious question?

> +
> +	struct i915_engine_class_instance engines[0];
> +} __attribute__((packed));
> +
> +#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
> +	struct i915_user_extension base; \
> +	__u16 engine_index; \
> +	__u16 num_siblings; \
> +	__u32 flags; \
> +	__u64 mbz64[1]; \
> +	struct i915_engine_class_instance engines[N__]; \
> +} __attribute__((packed)) name__
> +
>   struct i915_context_param_engines {
>   	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
> +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
>   	struct i915_engine_class_instance engines[0];
>   } __attribute__((packed));
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 19/45] drm/i915: Load balancing across a virtual engine
  2019-04-25 12:14   ` Tvrtko Ursulin
@ 2019-04-25 12:23     ` Chris Wilson
  0 siblings, 0 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25 12:23 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-25 13:14:00)
> 
> On 25/04/2019 10:19, Chris Wilson wrote:
> > Having allowed the user to define a set of engines that they will want
> > to only use, we go one step further and allow them to bind those engines
> > into a single virtual instance. Submitting a batch to the virtual engine
> > will then forward it to any one of the set in a manner as best to
> > distribute load.  The virtual engine has a single timeline across all
> > engines (it operates as a single queue), so it is not able to concurrently
> > run batches across multiple engines by itself; that is left up to the user
> > to submit multiple concurrent batches to multiple queues. Multiple users
> > will be load balanced across the system.
> > 
> > The mechanism used for load balancing in this patch is a late greedy
> > balancer. When a request is ready for execution, it is added to each
> > engine's queue, and when an engine is ready for its next request it
> > claims it from the virtual engine. The first engine to do so, wins, i.e.
> > the request is executed at the earliest opportunity (idle moment) in the
> > system.
> > 
> > As not all HW is created equal, the user is still able to skip the
> > virtual engine and execute the batch on a specific engine, all within the
> > same queue. It will then be executed in order on the correct engine,
> > with execution on other virtual engines being moved away due to the load
> > detection.
> > 
> > A couple of areas for potential improvement left!
> > 
> > - The virtual engine always take priority over equal-priority tasks.
> > Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
> > and hopefully the virtual and real engines are not then congested (i.e.
> > all work is via virtual engines, or all work is to the real engine).
> > 
> > - We require the breadcrumb irq around every virtual engine request. For
> > normal engines, we eliminate the need for the slow round trip via
> > interrupt by using the submit fence and queueing in order. For virtual
> > engines, we have to allow any job to transfer to a new ring, and cannot
> > coalesce the submissions, so require the completion fence instead,
> > forcing the persistent use of interrupts.
> > 
> > - We only drip feed single requests through each virtual engine and onto
> > the physical engines, even if there was enough work to fill all ELSP,
> > leaving small stalls with an idle CS event at the end of every request.
> > Could we be greedy and fill both slots? Being lazy is virtuous for load
> > distribution on less-than-full workloads though.
> > 
> > Other areas of improvement are more general, such as reducing lock
> > contention, reducing dispatch overhead, looking at direct submission
> > rather than bouncing around tasklets etc.
> > 
> > sseu: Lift the restriction to allow sseu to be reconfigured on virtual
> > engines composed of RENDER_CLASS (rcs).
> > 
> > v2: macroize check_user_mbz()
> > v3: Cancel virtual engines on wedging
> > v4: Commence commenting
> > v5: Replace 64b sibling_mask with a list of class:instance
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c  |   6 +-
> >   drivers/gpu/drm/i915/gt/intel_engine_types.h |   8 +
> >   drivers/gpu/drm/i915/gt/intel_lrc.c          | 656 ++++++++++++++++++-
> >   drivers/gpu/drm/i915/gt/intel_lrc.h          |   9 +
> >   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 180 +++++
> >   drivers/gpu/drm/i915/i915_gem.h              |   5 +
> >   drivers/gpu/drm/i915/i915_gem_context.c      | 118 +++-
> >   drivers/gpu/drm/i915/i915_scheduler.c        |  18 +-
> >   drivers/gpu/drm/i915/i915_timeline_types.h   |   1 +
> >   include/uapi/drm/i915_drm.h                  |  39 ++
> >   10 files changed, 1012 insertions(+), 28 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > index 4283224249d4..35eef02b3f09 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > @@ -290,8 +290,12 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
> >                               break;
> >               }
> >               list_add(&rq->signal_link, pos);
> > -             if (pos == &ce->signals) /* catch transitions from empty list */
> > +             if (pos == &ce->signals) { /* catch transitions from empty */
> >                       list_move_tail(&ce->signal_link, &b->signalers);
> > +             } else if (ce->engine != rq->engine) { /* virtualised */
> > +                     list_move_tail(&ce->signal_link, &b->signalers);
> > +                     intel_engine_queue_breadcrumbs(rq->engine);
> > +             }
> >   
> >               set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> >               spin_unlock(&b->irq_lock);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index 9d64e33f8427..b07d3685e2cb 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -227,6 +227,7 @@ struct intel_engine_execlists {
> >        * @queue: queue of requests, in priority lists
> >        */
> >       struct rb_root_cached queue;
> > +     struct rb_root_cached virtual;
> >   
> >       /**
> >        * @csb_write: control register for Context Switch buffer
> > @@ -445,6 +446,7 @@ struct intel_engine_cs {
> >   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
> >   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
> >   #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
> > +#define I915_ENGINE_IS_VIRTUAL       BIT(5)
> >       unsigned int flags;
> >   
> >       /*
> > @@ -534,6 +536,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
> >       return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> >   }
> >   
> > +static inline bool
> > +intel_engine_is_virtual(const struct intel_engine_cs *engine)
> > +{
> > +     return engine->flags & I915_ENGINE_IS_VIRTUAL;
> > +}
> > +
> >   #define instdone_slice_mask(dev_priv__) \
> >       (IS_GEN(dev_priv__, 7) ? \
> >        1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 2e74c792104e..e8faf3417f9c 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -136,6 +136,7 @@
> >   #include "i915_drv.h"
> >   #include "i915_gem_render_state.h"
> >   #include "i915_vgpu.h"
> > +#include "intel_engine_pm.h"
> >   #include "intel_lrc_reg.h"
> >   #include "intel_mocs.h"
> >   #include "intel_reset.h"
> > @@ -165,6 +166,41 @@
> >   
> >   #define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
> >   
> > +struct virtual_engine {
> > +     struct intel_engine_cs base;
> > +     struct intel_context context;
> > +
> > +     /*
> > +      * We allow only a single request through the virtual engine at a time
> > +      * (each request in the timeline waits for the completion fence of
> > +      * the previous before being submitted). By restricting ourselves to
> > +      * only submitting a single request, each request is placed on to a
> > +      * physical to maximise load spreading (by virtue of the late greedy
> > +      * scheduling -- each real engine takes the next available request
> > +      * upon idling).
> > +      */
> > +     struct i915_request *request;
> > +
> > +     /*
> > +      * We keep a rbtree of available virtual engines inside each physical
> > +      * engine, sorted by priority. Here we preallocate the nodes we need
> > +      * for the virtual engine, indexed by physical_engine->id.
> > +      */
> > +     struct ve_node {
> > +             struct rb_node rb;
> > +             int prio;
> > +     } nodes[I915_NUM_ENGINES];
> > +
> > +     /* And finally, which physical engines this virtual engine maps onto. */
> > +     unsigned int num_siblings;
> > +     struct intel_engine_cs *siblings[0];
> > +};
> > +
> > +static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
> > +{
> > +     return container_of(engine, struct virtual_engine, base);
> > +}
> > +
> >   static int execlists_context_deferred_alloc(struct intel_context *ce,
> >                                           struct intel_engine_cs *engine);
> >   static void execlists_init_reg_state(u32 *reg_state,
> > @@ -228,7 +264,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
> >   }
> >   
> >   static inline bool need_preempt(const struct intel_engine_cs *engine,
> > -                             const struct i915_request *rq)
> > +                             const struct i915_request *rq,
> > +                             struct rb_node *rb)
> >   {
> >       int last_prio;
> >   
> > @@ -263,6 +300,25 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
> >           rq_prio(list_next_entry(rq, link)) > last_prio)
> >               return true;
> >   
> > +     if (rb) {
> > +             struct virtual_engine *ve =
> > +                     rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> > +             bool preempt = false;
> > +
> > +             if (engine == ve->siblings[0]) { /* only preempt one sibling */
> > +                     struct i915_request *next;
> > +
> > +                     rcu_read_lock();
> > +                     next = READ_ONCE(ve->request);
> > +                     if (next)
> > +                             preempt = rq_prio(next) > last_prio;
> > +                     rcu_read_unlock();
> > +             }
> > +
> > +             if (preempt)
> > +                     return preempt;
> > +     }
> > +
> >       /*
> >        * If the inflight context did not trigger the preemption, then maybe
> >        * it was the set of queued requests? Pick the highest priority in
> > @@ -381,6 +437,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
> >       list_for_each_entry_safe_reverse(rq, rn,
> >                                        &engine->timeline.requests,
> >                                        link) {
> > +             struct intel_engine_cs *owner;
> > +
> >               if (i915_request_completed(rq))
> >                       break;
> >   
> > @@ -389,16 +447,32 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
> >   
> >               GEM_BUG_ON(rq->hw_context->active);
> >   
> > -             GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> > -             if (rq_prio(rq) != prio) {
> > -                     prio = rq_prio(rq);
> > -                     pl = i915_sched_lookup_priolist(engine, prio);
> > -             }
> > -             GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> > +             /*
> > +              * Push the request back into the queue for later resubmission.
> > +              * If this request is not native to this physical engine (i.e.
> > +              * it came from a virtual source), push it back onto the virtual
> > +              * engine so that it can be moved across onto another physical
> > +              * engine as load dictates.
> > +              */
> > +             owner = rq->hw_context->engine;
> > +             if (likely(owner == engine)) {
> > +                     GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> > +                     if (rq_prio(rq) != prio) {
> > +                             prio = rq_prio(rq);
> > +                             pl = i915_sched_lookup_priolist(engine, prio);
> > +                     }
> > +                     GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> >   
> > -             list_add(&rq->sched.link, pl);
> > +                     list_add(&rq->sched.link, pl);
> > +                     active = rq;
> > +             } else {
> > +                     if (__i915_request_has_started(rq))
> > +                             rq->sched.attr.priority |= ACTIVE_PRIORITY;
> >   
> > -             active = rq;
> > +                     rq->engine = owner;
> > +                     owner->submit_request(rq);
> > +                     active = NULL;
> > +             }
> >       }
> >   
> >       /*
> > @@ -418,7 +492,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
> >        * in the priority queue, but they will not gain immediate access to
> >        * the GPU.
> >        */
> > -     if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
> > +     if ( ~prio & ACTIVE_PRIORITY &&
> > +         active && __i915_request_has_started(active)) {
> >               prio |= ACTIVE_PRIORITY;
> >               active->sched.attr.priority = prio;
> >               list_move_tail(&active->sched.link,
> > @@ -658,6 +733,72 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
> >                                                 execlists));
> >   }
> >   
> > +static void virtual_update_register_offsets(u32 *regs,
> > +                                         struct intel_engine_cs *engine)
> > +{
> > +     u32 base = engine->mmio_base;
> > +
> > +     regs[CTX_CONTEXT_CONTROL] =
> > +             i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
> > +     regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
> > +     regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
> > +     regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
> > +     regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
> > +
> > +     regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
> > +     regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
> > +     regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
> > +     regs[CTX_SECOND_BB_HEAD_U] =
> > +             i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
> > +     regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
> > +     regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
> > +
> > +     regs[CTX_CTX_TIMESTAMP] =
> > +             i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
> > +     regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
> > +     regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
> > +     regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
> > +     regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
> > +     regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
> > +     regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
> > +     regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
> > +     regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
> > +
> > +     if (engine->class == RENDER_CLASS) {
> > +             regs[CTX_RCS_INDIRECT_CTX] =
> > +                     i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
> > +             regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
> > +                     i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
> > +             regs[CTX_BB_PER_CTX_PTR] =
> > +                     i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
> > +
> > +             regs[CTX_R_PWR_CLK_STATE] =
> > +                     i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> > +     }
> > +}
> > +
> > +static bool virtual_matches(const struct virtual_engine *ve,
> > +                         const struct i915_request *rq,
> > +                         const struct intel_engine_cs *engine)
> > +{
> > +     const struct intel_engine_cs *active;
> > +
> > +     /*
> > +      * We track when the HW has completed saving the context image
> > +      * (i.e. when we have seen the final CS event switching out of
> > +      * the context) and must not overwrite the context image before
> > +      * then. This restricts us to only using the active engine
> > +      * while the previous virtualized request is inflight (so
> > +      * we reuse the register offsets). This is a very small
> > +      * hystersis on the greedy seelction algorithm.
> > +      */
> > +     active = READ_ONCE(ve->context.active);
> > +     if (active && active != engine)
> > +             return false;
> > +
> > +     return true;
> > +}
> > +
> >   static void execlists_dequeue(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> > @@ -690,6 +831,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >        * and context switches) submission.
> >        */
> >   
> > +     for (rb = rb_first_cached(&execlists->virtual); rb; ) {
> > +             struct virtual_engine *ve =
> > +                     rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> > +             struct i915_request *rq = READ_ONCE(ve->request);
> > +
> > +             if (!rq) { /* lazily cleanup after another engine handled rq */
> > +                     rb_erase_cached(rb, &execlists->virtual);
> > +                     RB_CLEAR_NODE(rb);
> > +                     rb = rb_first_cached(&execlists->virtual);
> > +                     continue;
> > +             }
> > +
> > +             if (!virtual_matches(ve, rq, engine)) {
> > +                     rb = rb_next(rb);
> > +                     continue;
> > +             }
> > +
> > +             break;
> > +     }
> > +
> >       if (last) {
> >               /*
> >                * Don't resubmit or switch until all outstanding
> > @@ -711,7 +872,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >               if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> >                       return;
> >   
> > -             if (need_preempt(engine, last)) {
> > +             if (need_preempt(engine, last, rb)) {
> >                       inject_preempt_context(engine);
> >                       return;
> >               }
> > @@ -751,6 +912,89 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >               last->tail = last->wa_tail;
> >       }
> >   
> > +     while (rb) { /* XXX virtual is always taking precedence */
> > +             struct virtual_engine *ve =
> > +                     rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> > +             struct i915_request *rq;
> > +
> > +             spin_lock(&ve->base.timeline.lock);
> > +
> > +             rq = ve->request;
> > +             if (unlikely(!rq)) { /* lost the race to a sibling */
> > +                     spin_unlock(&ve->base.timeline.lock);
> > +                     rb_erase_cached(rb, &execlists->virtual);
> > +                     RB_CLEAR_NODE(rb);
> > +                     rb = rb_first_cached(&execlists->virtual);
> > +                     continue;
> > +             }
> > +
> > +             GEM_BUG_ON(rq != ve->request);
> > +             GEM_BUG_ON(rq->engine != &ve->base);
> > +             GEM_BUG_ON(rq->hw_context != &ve->context);
> > +
> > +             if (rq_prio(rq) >= queue_prio(execlists)) {
> > +                     if (!virtual_matches(ve, rq, engine)) {
> > +                             spin_unlock(&ve->base.timeline.lock);
> > +                             rb = rb_next(rb);
> > +                             continue;
> > +                     }
> > +
> > +                     if (last && !can_merge_rq(last, rq)) {
> > +                             spin_unlock(&ve->base.timeline.lock);
> > +                             return; /* leave this rq for another engine */
> > +                     }
> > +
> > +                     GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
> > +                               engine->name,
> > +                               rq->fence.context,
> > +                               rq->fence.seqno,
> > +                               i915_request_completed(rq) ? "!" :
> > +                               i915_request_started(rq) ? "*" :
> > +                               "",
> > +                               yesno(engine != ve->siblings[0]));
> > +
> > +                     ve->request = NULL;
> > +                     ve->base.execlists.queue_priority_hint = INT_MIN;
> > +                     rb_erase_cached(rb, &execlists->virtual);
> > +                     RB_CLEAR_NODE(rb);
> > +
> > +                     rq->engine = engine;
> > +
> > +                     if (engine != ve->siblings[0]) {
> > +                             u32 *regs = ve->context.lrc_reg_state;
> > +                             unsigned int n;
> > +
> > +                             GEM_BUG_ON(READ_ONCE(ve->context.active));
> > +                             virtual_update_register_offsets(regs, engine);
> > +
> > +                             /*
> > +                              * Move the bound engine to the top of the list
> > +                              * for future execution. We then kick this
> > +                              * tasklet first before checking others, so that
> > +                              * we preferentially reuse this set of bound
> > +                              * registers.
> > +                              */
> > +                             for (n = 1; n < ve->num_siblings; n++) {
> > +                                     if (ve->siblings[n] == engine) {
> > +                                             swap(ve->siblings[n],
> > +                                                  ve->siblings[0]);
> > +                                             break;
> > +                                     }
> > +                             }
> > +
> > +                             GEM_BUG_ON(ve->siblings[0] != engine);
> > +                     }
> > +
> > +                     __i915_request_submit(rq);
> > +                     trace_i915_request_in(rq, port_index(port, execlists));
> > +                     submit = true;
> > +                     last = rq;
> > +             }
> > +
> > +             spin_unlock(&ve->base.timeline.lock);
> > +             break;
> > +     }
> > +
> >       while ((rb = rb_first_cached(&execlists->queue))) {
> >               struct i915_priolist *p = to_priolist(rb);
> >               struct i915_request *rq, *rn;
> > @@ -1874,6 +2118,25 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> >                              &execlists->csb_status[reset_value]);
> >   }
> >   
> > +static struct i915_request *active_request(struct i915_request *rq)
> > +{
> > +     const struct list_head * const list = &rq->engine->timeline.requests;
> > +     const struct intel_context * const context = rq->hw_context;
> > +     struct i915_request *active = NULL;
> > +
> > +     list_for_each_entry_from_reverse(rq, list, link) {
> > +             if (i915_request_completed(rq))
> > +                     break;
> > +
> > +             if (rq->hw_context != context)
> > +                     break;
> > +
> > +             active = rq;
> > +     }
> > +
> > +     return active;
> > +}
> > +
> >   static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> > @@ -1894,7 +2157,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
> >       if (!port_isset(execlists->port))
> >               goto out_clear;
> >   
> > -     ce = port_request(execlists->port)->hw_context;
> > +     rq = port_request(execlists->port);
> > +     ce = rq->hw_context;
> >   
> >       /*
> >        * Catch up with any missed context-switch interrupts.
> > @@ -1907,16 +2171,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
> >        */
> >       execlists_cancel_port_requests(execlists);
> >   
> > -     /* Push back any incomplete requests for replay after the reset. */
> > -     rq = __unwind_incomplete_requests(engine);
> > +     rq = active_request(rq);
> >       if (!rq)
> >               goto out_replay;
> >   
> > -     if (rq->hw_context != ce) { /* caught just before a CS event */
> > -             rq = NULL;
> > -             goto out_replay;
> > -     }
> > -
> >       /*
> >        * If this request hasn't started yet, e.g. it is waiting on a
> >        * semaphore, we need to avoid skipping the request or else we
> > @@ -1963,13 +2221,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
> >       }
> >       execlists_init_reg_state(regs, ce, engine, ce->ring);
> >   
> > -     /* Rerun the request; its payload has been neutered (if guilty). */
> >   out_replay:
> > +     /* Rerun the request; its payload has been neutered (if guilty). */
> >       ce->ring->head =
> >               rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
> >       intel_ring_update_space(ce->ring);
> >       __execlists_update_reg_state(ce, engine);
> >   
> > +     /* Push back any incomplete requests for replay after the reset. */
> > +     __unwind_incomplete_requests(engine);
> > +
> >   out_clear:
> >       execlists_clear_all_active(execlists);
> >   }
> > @@ -2043,6 +2304,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> >               i915_priolist_free(p);
> >       }
> >   
> > +     /* Cancel all attached virtual engines */
> > +     while ((rb = rb_first_cached(&execlists->virtual))) {
> > +             struct virtual_engine *ve =
> > +                     rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> > +
> > +             rb_erase_cached(rb, &execlists->virtual);
> > +             RB_CLEAR_NODE(rb);
> > +
> > +             spin_lock(&ve->base.timeline.lock);
> > +             if (ve->request) {
> > +                     ve->request->engine = engine;
> > +                     __i915_request_submit(ve->request);
> > +                     dma_fence_set_error(&ve->request->fence, -EIO);
> > +                     i915_request_mark_complete(ve->request);
> > +                     ve->base.execlists.queue_priority_hint = INT_MIN;
> > +                     ve->request = NULL;
> > +             }
> > +             spin_unlock(&ve->base.timeline.lock);
> > +     }
> > +
> >       /* Remaining _unready_ requests will be nop'ed when submitted */
> >   
> >       execlists->queue_priority_hint = INT_MIN;
> > @@ -2760,6 +3041,318 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
> >       return ret;
> >   }
> >   
> > +static void virtual_context_destroy(struct kref *kref)
> > +{
> > +     struct virtual_engine *ve =
> > +             container_of(kref, typeof(*ve), context.ref);
> > +     unsigned int n;
> > +
> > +     GEM_BUG_ON(ve->request);
> > +     GEM_BUG_ON(ve->context.active);
> > +
> > +     for (n = 0; n < ve->num_siblings; n++) {
> > +             struct intel_engine_cs *sibling = ve->siblings[n];
> > +             struct rb_node *node = &ve->nodes[sibling->id].rb;
> > +
> > +             if (RB_EMPTY_NODE(node))
> > +                     continue;
> > +
> > +             spin_lock_irq(&sibling->timeline.lock);
> > +
> > +             if (!RB_EMPTY_NODE(node))
> > +                     rb_erase_cached(node, &sibling->execlists.virtual);
> > +
> > +             spin_unlock_irq(&sibling->timeline.lock);
> > +     }
> > +     GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
> > +
> > +     if (ve->context.state)
> > +             __execlists_context_fini(&ve->context);
> > +
> > +     i915_timeline_fini(&ve->base.timeline);
> > +     kfree(ve);
> > +}
> > +
> > +static void virtual_engine_initial_hint(struct virtual_engine *ve)
> > +{
> > +     int swp;
> > +
> > +     /*
> > +      * Pick a random sibling on starting to help spread the load around.
> > +      *
> > +      * New contexts are typically created with exactly the same order
> > +      * of siblings, and often started in batches. Due to the way we iterate
> > +      * the array of sibling when submitting requests, sibling[0] is
> > +      * prioritised for dequeuing. If we make sure that sibling[0] is fairly
> > +      * randomised across the system, we also help spread the load by the
> > +      * first engine we inspect being different each time.
> > +      *
> > +      * NB This does not force us to execute on this engine, it will just
> > +      * typically be the first we inspect for submission.
> > +      */
> > +     swp = prandom_u32_max(ve->num_siblings);
> > +     if (!swp)
> > +             return;
> > +
> > +     swap(ve->siblings[swp], ve->siblings[0]);
> > +     virtual_update_register_offsets(ve->context.lrc_reg_state,
> > +                                     ve->siblings[0]);
> > +}
> > +
> > +static int virtual_context_pin(struct intel_context *ce)
> > +{
> > +     struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> > +     int err;
> > +
> > +     /* Note: we must use a real engine class for setting up reg state */
> > +     err = __execlists_context_pin(ce, ve->siblings[0]);
> > +     if (err)
> > +             return err;
> > +
> > +     virtual_engine_initial_hint(ve);
> > +     return 0;
> > +}
> > +
> > +static void virtual_context_enter(struct intel_context *ce)
> > +{
> > +     struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> > +     unsigned int n;
> > +
> > +     for (n = 0; n < ve->num_siblings; n++)
> > +             intel_engine_pm_get(ve->siblings[n]);
> > +}
> > +
> > +static void virtual_context_exit(struct intel_context *ce)
> > +{
> > +     struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> > +     unsigned int n;
> > +
> > +     for (n = 0; n < ve->num_siblings; n++)
> > +             intel_engine_pm_put(ve->siblings[n]);
> > +}
> > +
> > +static const struct intel_context_ops virtual_context_ops = {
> > +     .pin = virtual_context_pin,
> > +     .unpin = execlists_context_unpin,
> > +
> > +     .enter = virtual_context_enter,
> > +     .exit = virtual_context_exit,
> > +
> > +     .destroy = virtual_context_destroy,
> > +};
> > +
> > +static void virtual_submission_tasklet(unsigned long data)
> > +{
> > +     struct virtual_engine * const ve = (struct virtual_engine *)data;
> > +     const int prio = ve->base.execlists.queue_priority_hint;
> > +     unsigned int n;
> > +
> > +     local_irq_disable();
> > +     for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
> > +             struct intel_engine_cs *sibling = ve->siblings[n];
> > +             struct ve_node * const node = &ve->nodes[sibling->id];
> > +             struct rb_node **parent, *rb;
> > +             bool first;
> > +
> > +             spin_lock(&sibling->timeline.lock);
> > +
> > +             if (!RB_EMPTY_NODE(&node->rb)) {
> > +                     /*
> > +                      * Cheat and avoid rebalancing the tree if we can
> > +                      * reuse this node in situ.
> > +                      */
> > +                     first = rb_first_cached(&sibling->execlists.virtual) ==
> > +                             &node->rb;
> > +                     if (prio == node->prio || (prio > node->prio && first))
> > +                             goto submit_engine;
> > +
> > +                     rb_erase_cached(&node->rb, &sibling->execlists.virtual);
> > +             }
> > +
> > +             rb = NULL;
> > +             first = true;
> > +             parent = &sibling->execlists.virtual.rb_root.rb_node;
> > +             while (*parent) {
> > +                     struct ve_node *other;
> > +
> > +                     rb = *parent;
> > +                     other = rb_entry(rb, typeof(*other), rb);
> > +                     if (prio > other->prio) {
> > +                             parent = &rb->rb_left;
> > +                     } else {
> > +                             parent = &rb->rb_right;
> > +                             first = false;
> > +                     }
> > +             }
> > +
> > +             rb_link_node(&node->rb, rb, parent);
> > +             rb_insert_color_cached(&node->rb,
> > +                                    &sibling->execlists.virtual,
> > +                                    first);
> > +
> > +submit_engine:
> > +             GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
> > +             node->prio = prio;
> > +             if (first && prio > sibling->execlists.queue_priority_hint) {
> > +                     sibling->execlists.queue_priority_hint = prio;
> > +                     tasklet_hi_schedule(&sibling->execlists.tasklet);
> > +             }
> > +
> > +             spin_unlock(&sibling->timeline.lock);
> > +     }
> > +     local_irq_enable();
> > +}
> > +
> > +static void virtual_submit_request(struct i915_request *rq)
> > +{
> > +     struct virtual_engine *ve = to_virtual_engine(rq->engine);
> > +
> > +     GEM_TRACE("%s: rq=%llx:%lld\n",
> > +               ve->base.name,
> > +               rq->fence.context,
> > +               rq->fence.seqno);
> > +
> > +     GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
> > +
> > +     GEM_BUG_ON(ve->request);
> > +     ve->base.execlists.queue_priority_hint = rq_prio(rq);
> > +     WRITE_ONCE(ve->request, rq);
> > +
> > +     tasklet_schedule(&ve->base.execlists.tasklet);
> > +}
> > +
> > +struct intel_context *
> > +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> > +                            struct intel_engine_cs **siblings,
> > +                            unsigned int count)
> > +{
> > +     struct virtual_engine *ve;
> > +     unsigned int n;
> > +     int err;
> > +
> > +     if (count == 0)
> > +             return ERR_PTR(-EINVAL);
> > +
> > +     if (count == 1)
> > +             return intel_context_create(ctx, siblings[0]);
> > +
> > +     ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
> > +     if (!ve)
> > +             return ERR_PTR(-ENOMEM);
> > +
> > +     ve->base.i915 = ctx->i915;
> > +     ve->base.id = -1;
> > +     ve->base.class = OTHER_CLASS;
> > +     ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
> > +     ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
> > +     ve->base.flags = I915_ENGINE_IS_VIRTUAL;
> > +
> > +     snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
> > +
> > +     err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
> > +     if (err)
> > +             goto err_put;
> > +     i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
> > +
> > +     intel_engine_init_execlists(&ve->base);
> > +
> > +     ve->base.cops = &virtual_context_ops;
> > +     ve->base.request_alloc = execlists_request_alloc;
> > +
> > +     ve->base.schedule = i915_schedule;
> > +     ve->base.submit_request = virtual_submit_request;
> > +
> > +     ve->base.execlists.queue_priority_hint = INT_MIN;
> > +     tasklet_init(&ve->base.execlists.tasklet,
> > +                  virtual_submission_tasklet,
> > +                  (unsigned long)ve);
> > +
> > +     intel_context_init(&ve->context, ctx, &ve->base);
> > +
> > +     for (n = 0; n < count; n++) {
> > +             struct intel_engine_cs *sibling = siblings[n];
> > +
> > +             GEM_BUG_ON(!is_power_of_2(sibling->mask));
> > +             if (sibling->mask & ve->base.mask) {
> > +                     DRM_DEBUG("duplicate %s entry in load balancer\n",
> > +                               sibling->name);
> > +                     err = -EINVAL;
> > +                     goto err_put;
> > +             }
> > +
> > +             /*
> > +              * The virtual engine implementation is tightly coupled to
> > +              * the execlists backend -- we push out request directly
> > +              * into a tree inside each physical engine. We could support
> > +              * layering if we handle cloning of the requests and
> > +              * submitting a copy into each backend.
> > +              */
> > +             if (sibling->execlists.tasklet.func !=
> > +                 execlists_submission_tasklet) {
> > +                     err = -ENODEV;
> > +                     goto err_put;
> > +             }
> > +
> > +             GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
> > +             RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
> > +
> > +             ve->siblings[ve->num_siblings++] = sibling;
> > +             ve->base.mask |= sibling->mask;
> > +
> > +             /*
> > +              * All physical engines must be compatible for their emission
> > +              * functions (as we build the instructions during request
> > +              * construction and do not alter them before submission
> > +              * on the physical engine). We use the engine class as a guide
> > +              * here, although that could be refined.
> > +              */
> > +             if (ve->base.class != OTHER_CLASS) {
> > +                     if (ve->base.class != sibling->class) {
> > +                             DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> > +                                       sibling->class, ve->base.class);
> > +                             err = -EINVAL;
> > +                             goto err_put;
> > +                     }
> > +                     continue;
> > +             }
> > +
> > +             ve->base.class = sibling->class;
> > +             ve->base.uabi_class = sibling->uabi_class;
> > +             snprintf(ve->base.name, sizeof(ve->base.name),
> > +                      "v%dx%d", ve->base.class, count);
> > +             ve->base.context_size = sibling->context_size;
> > +
> > +             ve->base.emit_bb_start = sibling->emit_bb_start;
> > +             ve->base.emit_flush = sibling->emit_flush;
> > +             ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
> > +             ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
> > +             ve->base.emit_fini_breadcrumb_dw =
> > +                     sibling->emit_fini_breadcrumb_dw;
> > +     }
> > +
> > +     return &ve->context;
> > +
> > +err_put:
> > +     intel_context_put(&ve->context);
> > +     return ERR_PTR(err);
> > +}
> > +
> > +struct intel_context *
> > +intel_execlists_clone_virtual(struct i915_gem_context *ctx,
> > +                           struct intel_engine_cs *src)
> > +{
> > +     struct virtual_engine *se = to_virtual_engine(src);
> > +     struct intel_context *dst;
> > +
> > +     dst = intel_execlists_create_virtual(ctx,
> > +                                          se->siblings,
> > +                                          se->num_siblings);
> > +     if (IS_ERR(dst))
> > +             return dst;
> > +
> > +     return dst;
> > +}
> > +
> >   void intel_execlists_show_requests(struct intel_engine_cs *engine,
> >                                  struct drm_printer *m,
> >                                  void (*show_request)(struct drm_printer *m,
> > @@ -2817,6 +3410,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
> >               show_request(m, last, "\t\tQ ");
> >       }
> >   
> > +     last = NULL;
> > +     count = 0;
> > +     for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
> > +             struct virtual_engine *ve =
> > +                     rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> > +             struct i915_request *rq = READ_ONCE(ve->request);
> > +
> > +             if (rq) {
> > +                     if (count++ < max - 1)
> > +                             show_request(m, rq, "\t\tV ");
> > +                     else
> > +                             last = rq;
> > +             }
> > +     }
> > +     if (last) {
> > +             if (count > max) {
> > +                     drm_printf(m,
> > +                                "\t\t...skipping %d virtual requests...\n",
> > +                                count - max);
> > +             }
> > +             show_request(m, last, "\t\tV ");
> > +     }
> > +
> >       spin_unlock_irqrestore(&engine->timeline.lock, flags);
> >   }
> >   
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> > index a0dc907a7249..5530606052e5 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> > @@ -114,4 +114,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
> >                                                       const char *prefix),
> >                                  unsigned int max);
> >   
> > +struct intel_context *
> > +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> > +                            struct intel_engine_cs **siblings,
> > +                            unsigned int count);
> > +
> > +struct intel_context *
> > +intel_execlists_clone_virtual(struct i915_gem_context *ctx,
> > +                           struct intel_engine_cs *src);
> > +
> >   #endif /* _INTEL_LRC_H_ */
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > index 6f223f7facde..558399e5c7bb 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > @@ -1301,6 +1301,185 @@ static int live_preempt_smoke(void *arg)
> >       return err;
> >   }
> >   
> > +static int nop_virtual_engine(struct drm_i915_private *i915,
> > +                           struct intel_engine_cs **siblings,
> > +                           unsigned int nsibling,
> > +                           unsigned int nctx,
> > +                           unsigned int flags)
> > +#define CHAIN BIT(0)
> > +{
> > +     IGT_TIMEOUT(end_time);
> > +     struct i915_request *request[16];
> > +     struct i915_gem_context *ctx[16];
> > +     struct intel_context *ve[16];
> > +     unsigned long n, prime, nc;
> > +     struct igt_live_test t;
> > +     ktime_t times[2] = {};
> > +     int err;
> > +
> > +     GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
> > +
> > +     for (n = 0; n < nctx; n++) {
> > +             ctx[n] = kernel_context(i915);
> > +             if (!ctx[n]) {
> > +                     err = -ENOMEM;
> > +                     nctx = n;
> > +                     goto out;
> > +             }
> > +
> > +             ve[n] = intel_execlists_create_virtual(ctx[n],
> > +                                                    siblings, nsibling);
> > +             if (IS_ERR(ve[n])) {
> > +                     kernel_context_close(ctx[n]);
> > +                     err = PTR_ERR(ve[n]);
> > +                     nctx = n;
> > +                     goto out;
> > +             }
> > +
> > +             err = intel_context_pin(ve[n]);
> > +             if (err) {
> > +                     intel_context_put(ve[n]);
> > +                     kernel_context_close(ctx[n]);
> > +                     nctx = n;
> > +                     goto out;
> > +             }
> > +     }
> > +
> > +     err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
> > +     if (err)
> > +             goto out;
> > +
> > +     for_each_prime_number_from(prime, 1, 8192) {
> > +             times[1] = ktime_get_raw();
> > +
> > +             if (flags & CHAIN) {
> > +                     for (nc = 0; nc < nctx; nc++) {
> > +                             for (n = 0; n < prime; n++) {
> > +                                     request[nc] =
> > +                                             i915_request_create(ve[nc]);
> > +                                     if (IS_ERR(request[nc])) {
> > +                                             err = PTR_ERR(request[nc]);
> > +                                             goto out;
> > +                                     }
> > +
> > +                                     i915_request_add(request[nc]);
> > +                             }
> > +                     }
> > +             } else {
> > +                     for (n = 0; n < prime; n++) {
> > +                             for (nc = 0; nc < nctx; nc++) {
> > +                                     request[nc] =
> > +                                             i915_request_create(ve[nc]);
> > +                                     if (IS_ERR(request[nc])) {
> > +                                             err = PTR_ERR(request[nc]);
> > +                                             goto out;
> > +                                     }
> > +
> > +                                     i915_request_add(request[nc]);
> > +                             }
> > +                     }
> > +             }
> > +
> > +             for (nc = 0; nc < nctx; nc++) {
> > +                     if (i915_request_wait(request[nc],
> > +                                           I915_WAIT_LOCKED,
> > +                                           HZ / 10) < 0) {
> > +                             pr_err("%s(%s): wait for %llx:%lld timed out\n",
> > +                                    __func__, ve[0]->engine->name,
> > +                                    request[nc]->fence.context,
> > +                                    request[nc]->fence.seqno);
> > +
> > +                             GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
> > +                                       __func__, ve[0]->engine->name,
> > +                                       request[nc]->fence.context,
> > +                                       request[nc]->fence.seqno);
> > +                             GEM_TRACE_DUMP();
> > +                             i915_gem_set_wedged(i915);
> > +                             break;
> > +                     }
> > +             }
> > +
> > +             times[1] = ktime_sub(ktime_get_raw(), times[1]);
> > +             if (prime == 1)
> > +                     times[0] = times[1];
> > +
> > +             if (__igt_timeout(end_time, NULL))
> > +                     break;
> > +     }
> > +
> > +     err = igt_live_test_end(&t);
> > +     if (err)
> > +             goto out;
> > +
> > +     pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
> > +             nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
> > +             prime, div64_u64(ktime_to_ns(times[1]), prime));
> > +
> > +out:
> > +     if (igt_flush_test(i915, I915_WAIT_LOCKED))
> > +             err = -EIO;
> > +
> > +     for (nc = 0; nc < nctx; nc++) {
> > +             intel_context_unpin(ve[nc]);
> > +             intel_context_put(ve[nc]);
> > +             kernel_context_close(ctx[nc]);
> > +     }
> > +     return err;
> > +}
> > +
> > +static int live_virtual_engine(void *arg)
> > +{
> > +     struct drm_i915_private *i915 = arg;
> > +     struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> > +     struct intel_engine_cs *engine;
> > +     enum intel_engine_id id;
> > +     unsigned int class, inst;
> > +     int err = -ENODEV;
> > +
> > +     if (USES_GUC_SUBMISSION(i915))
> > +             return 0;
> > +
> > +     mutex_lock(&i915->drm.struct_mutex);
> > +
> > +     for_each_engine(engine, i915, id) {
> > +             err = nop_virtual_engine(i915, &engine, 1, 1, 0);
> > +             if (err) {
> > +                     pr_err("Failed to wrap engine %s: err=%d\n",
> > +                            engine->name, err);
> > +                     goto out_unlock;
> > +             }
> > +     }
> > +
> > +     for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> > +             int nsibling, n;
> > +
> > +             nsibling = 0;
> > +             for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> > +                     if (!i915->engine_class[class][inst])
> > +                             break;
> 
> Continue I think, to be able to use vcs0 + vcs2 on Icelake.
> 
> > +
> > +                     siblings[nsibling++] = i915->engine_class[class][inst];
> > +             }
> > +             if (nsibling < 2)
> > +                     continue;
> 
> Could test single, no?
> 
> > +
> > +             for (n = 1; n <= nsibling + 1; n++) {
> > +                     err = nop_virtual_engine(i915, siblings, nsibling,
> > +                                              n, 0);
> > +                     if (err)
> > +                             goto out_unlock;
> > +             }
> > +
> > +             err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
> > +             if (err)
> > +                     goto out_unlock;
> > +     }
> > +
> > +out_unlock:
> > +     mutex_unlock(&i915->drm.struct_mutex);
> > +     return err;
> > +}
> > +
> >   int intel_execlists_live_selftests(struct drm_i915_private *i915)
> >   {
> >       static const struct i915_subtest tests[] = {
> > @@ -1313,6 +1492,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
> >               SUBTEST(live_chain_preempt),
> >               SUBTEST(live_preempt_hang),
> >               SUBTEST(live_preempt_smoke),
> > +             SUBTEST(live_virtual_engine),
> >       };
> >   
> >       if (!HAS_EXECLISTS(i915))
> > diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> > index 67f8a4a807a0..fe82d3571072 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.h
> > +++ b/drivers/gpu/drm/i915/i915_gem.h
> > @@ -91,4 +91,9 @@ static inline bool __tasklet_enable(struct tasklet_struct *t)
> >       return atomic_dec_and_test(&t->count);
> >   }
> >   
> > +static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
> > +{
> > +     return test_bit(TASKLET_STATE_SCHED, &t->state);
> > +}
> > +
> >   #endif /* __I915_GEM_H__ */
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index ba7582d955d1..57b09f624bb4 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -86,6 +86,7 @@
> >    */
> >   
> >   #include <linux/log2.h>
> > +#include <linux/nospec.h>
> >   
> >   #include <drm/i915_drm.h>
> >   
> > @@ -1209,7 +1210,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
> >       int ret;
> >   
> >       GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
> > -     GEM_BUG_ON(ce->engine->id != RCS0);
> >   
> >       ret = intel_context_lock_pinned(ce);
> >       if (ret)
> > @@ -1397,7 +1397,102 @@ struct set_engines {
> >       struct i915_gem_engines *engines;
> >   };
> >   
> > +static int
> > +set_engines__load_balance(struct i915_user_extension __user *base, void *data)
> > +{
> > +     struct i915_context_engines_load_balance __user *ext =
> > +             container_of_user(base, typeof(*ext), base);
> > +     const struct set_engines *set = data;
> > +     struct intel_engine_cs *stack[16];
> > +     struct intel_engine_cs **siblings;
> > +     struct intel_context *ce;
> > +     u16 num_siblings, idx;
> > +     unsigned int n;
> > +     int err;
> > +
> > +     if (!HAS_EXECLISTS(set->ctx->i915))
> > +             return -ENODEV;
> > +
> > +     if (USES_GUC_SUBMISSION(set->ctx->i915))
> > +             return -ENODEV; /* not implement yet */
> > +
> > +     if (get_user(idx, &ext->engine_index))
> > +             return -EFAULT;
> > +
> > +     if (idx >= set->engines->num_engines) {
> > +             DRM_DEBUG("Invalid placement value, %d >= %d\n",
> > +                       idx, set->engines->num_engines);
> > +             return -EINVAL;
> > +     }
> > +
> > +     idx = array_index_nospec(idx, set->engines->num_engines);
> > +     if (set->engines->engines[idx]) {
> > +             DRM_DEBUG("Invalid placement[%d], already occupied\n", idx);
> > +             return -EEXIST;
> > +     }
> > +
> > +     if (get_user(num_siblings, &ext->num_siblings))
> > +             return -EFAULT;
> > +
> > +     err = check_user_mbz(&ext->flags);
> > +     if (err)
> > +             return err;
> > +
> > +     for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> > +             err = check_user_mbz(&ext->mbz64[n]);
> > +             if (err)
> > +                     return err;
> > +     }
> > +
> > +     siblings = stack;
> > +     if (num_siblings > ARRAY_SIZE(stack)) {
> > +             siblings = kmalloc_array(num_siblings,
> > +                                      sizeof(*siblings),
> > +                                      GFP_KERNEL);
> > +             if (!siblings)
> > +                     return -ENOMEM;
> > +     }
> > +
> > +     for (n = 0; n < num_siblings; n++) {
> > +             struct i915_engine_class_instance ci;
> > +
> > +             if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
> > +                     err = -EFAULT;
> > +                     goto out_siblings;
> > +             }
> > +
> > +             siblings[n] = intel_engine_lookup_user(set->ctx->i915,
> > +                                                    ci.engine_class,
> > +                                                    ci.engine_instance);
> > +             if (!siblings[n]) {
> > +                     DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n",
> > +                               n, ci.engine_class, ci.engine_instance);
> > +                     err = -EINVAL;
> > +                     goto out_siblings;
> > +             }
> > +     }
> > +
> > +     ce = intel_execlists_create_virtual(set->ctx, siblings, n);
> > +     if (IS_ERR(ce)) {
> > +             err = PTR_ERR(ce);
> > +             goto out_siblings;
> > +     }
> > +
> > +     if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
> > +             intel_context_put(ce);
> > +             err = -EEXIST;
> > +             goto out_siblings;
> > +     }
> > +
> > +out_siblings:
> > +     if (siblings != stack)
> > +             kfree(siblings);
> > +
> > +     return err;
> > +}
> > +
> >   static const i915_user_extension_fn set_engines__extensions[] = {
> > +     [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
> >   };
> >   
> >   static int
> > @@ -1696,14 +1791,29 @@ static int clone_engines(struct i915_gem_context *dst,
> >   
> >       clone->i915 = dst->i915;
> >       for (n = 0; n < e->num_engines; n++) {
> > +             struct intel_engine_cs *engine;
> > +
> >               if (!e->engines[n]) {
> >                       clone->engines[n] = NULL;
> >                       continue;
> >               }
> > +             engine = e->engines[n]->engine;
> >   
> > -             clone->engines[n] =
> > -                     intel_context_create(dst, e->engines[n]->engine);
> > -             if (!clone->engines[n]) {
> > +             /*
> > +              * Virtual engines are singletons; they can only exist
> > +              * inside a single context, because they embed their
> > +              * HW context... As each virtual context implies a single
> > +              * timeline (each engine can only dequeue a single request
> > +              * at any time), it would be surprising for two contexts
> > +              * to use the same engine. So let's create a copy of
> > +              * the virtual engine instead.
> > +              */
> > +             if (intel_engine_is_virtual(engine))
> > +                     clone->engines[n] =
> > +                             intel_execlists_clone_virtual(dst, engine);
> > +             else
> > +                     clone->engines[n] = intel_context_create(dst, engine);
> > +             if (IS_ERR_OR_NULL(clone->engines[n])) {
> >                       __free_engines(clone, n);
> >                       goto err_unlock;
> >               }
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> > index 39bc4f54e272..b58d9c23a876 100644
> > --- a/drivers/gpu/drm/i915/i915_scheduler.c
> > +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> > @@ -248,17 +248,26 @@ sched_lock_engine(const struct i915_sched_node *node,
> >                 struct intel_engine_cs *locked,
> >                 struct sched_cache *cache)
> >   {
> > -     struct intel_engine_cs *engine = node_to_request(node)->engine;
> > +     const struct i915_request *rq = node_to_request(node);
> > +     struct intel_engine_cs *engine;
> >   
> >       GEM_BUG_ON(!locked);
> >   
> > -     if (engine != locked) {
> > +     /*
> > +      * Virtual engines complicate acquiring the engine timeline lock,
> > +      * as their rq->engine pointer is not stable until under that
> > +      * engine lock. The simple ploy we use is to take the lock then
> > +      * check that the rq still belongs to the newly locked engine.
> > +      */
> > +     while (locked != (engine = READ_ONCE(rq->engine))) {
> >               spin_unlock(&locked->timeline.lock);
> >               memset(cache, 0, sizeof(*cache));
> >               spin_lock(&engine->timeline.lock);
> > +             locked = engine;
> >       }
> >   
> > -     return engine;
> > +     GEM_BUG_ON(locked != engine);
> > +     return locked;
> >   }
> >   
> >   static bool inflight(const struct i915_request *rq,
> > @@ -371,8 +380,11 @@ static void __i915_schedule(struct i915_request *rq,
> >               if (prio <= node->attr.priority || node_signaled(node))
> >                       continue;
> >   
> > +             GEM_BUG_ON(node_to_request(node)->engine != engine);
> > +
> >               node->attr.priority = prio;
> >               if (!list_empty(&node->link)) {
> > +                     GEM_BUG_ON(intel_engine_is_virtual(engine));
> >                       if (!cache.priolist)
> >                               cache.priolist =
> >                                       i915_sched_lookup_priolist(engine,
> > diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
> > index 5256a0b5c5f7..1688705f4a2b 100644
> > --- a/drivers/gpu/drm/i915/i915_timeline_types.h
> > +++ b/drivers/gpu/drm/i915/i915_timeline_types.h
> > @@ -26,6 +26,7 @@ struct i915_timeline {
> >       spinlock_t lock;
> >   #define TIMELINE_CLIENT 0 /* default subclass */
> >   #define TIMELINE_ENGINE 1
> > +#define TIMELINE_VIRTUAL 2
> >       struct mutex mutex; /* protects the flow of requests */
> >   
> >       unsigned int pin_count;
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 7694113362d4..ff2ababc0984 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -137,6 +137,7 @@ struct i915_engine_class_instance {
> >       __u16 engine_class; /* see enum drm_i915_gem_engine_class */
> >       __u16 engine_instance;
> >   #define I915_ENGINE_CLASS_INVALID_NONE -1
> > +#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
> >   };
> >   
> >   /**
> > @@ -1607,8 +1608,46 @@ struct drm_i915_gem_context_param_sseu {
> >       __u32 rsvd;
> >   };
> >   
> > +/*
> > + * i915_context_engines_load_balance:
> > + *
> > + * Enable load balancing across this set of engines.
> > + *
> > + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
> > + * used will proxy the execbuffer request onto one of the set of engines
> > + * in such a way as to distribute the load evenly across the set.
> > + *
> > + * The set of engines must be compatible (e.g. the same HW class) as they
> > + * will share the same logical GPU context and ring.
> > + *
> > + * To intermix rendering with the virtual engine and direct rendering onto
> > + * the backing engines (bypassing the load balancing proxy), the context must
> > + * be defined to use a single timeline for all engines.
> > + */
> > +struct i915_context_engines_load_balance {
> > +     struct i915_user_extension base;
> > +
> > +     __u16 engine_index;
> > +     __u16 num_siblings;
> > +     __u32 flags; /* all undefined flags must be zero */
> > +
> > +     __u64 mbz64[1]; /* reserved for future use; must be zero */
> 
> Why array of one was a serious question?

Sorry, I haven't actually pulled in the review comments yet from last
time for this portion. The topic for today were the 10 patches in the
previous series. This series only exists as it's ahead of the
preempt-to-busy + timeslice and I wanted to start getting some feedback
on those two.

(Optimistically hoping for Matthew to clear a few more of the GEM
patches in the meantime.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2)
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (48 preceding siblings ...)
  2019-04-25 12:09 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2) Patchwork
@ 2019-04-25 12:25 ` Patchwork
  2019-04-25 12:56 ` ✓ Fi.CI.BAT: success " Patchwork
  2019-04-25 21:52 ` ✗ Fi.CI.IGT: failure " Patchwork
  51 siblings, 0 replies; 74+ messages in thread
From: Patchwork @ 2019-04-25 12:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2)
URL   : https://patchwork.freedesktop.org/series/59933/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Seal races between async GPU cancellation, retirement and signaling
Okay!

Commit: drm/i915/gvt: Pin the per-engine GVT shadow contexts
Okay!

Commit: drm/i915: Export intel_context_instance()
-O:drivers/gpu/drm/i915/gt/intel_context.c:131:22: warning: context imbalance in 'intel_context_pin_lock' - wrong count at exit
+drivers/gpu/drm/i915/gt/intel_context.c:131:22: warning: context imbalance in 'intel_context_pin_lock' - wrong count at exit
+drivers/gpu/drm/i915/gt/intel_context.c:150:6: warning: context imbalance in 'intel_context_pin_unlock' - wrong count at exit

Commit: drm/i915/selftests: Use the real kernel context for sseu isolation tests
Okay!

Commit: drm/i915/selftests: Pass around intel_context for sseu
Okay!

Commit: drm/i915: Pass intel_context to intel_context_pin_lock()
-O:drivers/gpu/drm/i915/gt/intel_context.c:131:22: warning: context imbalance in 'intel_context_pin_lock' - wrong count at exit
-O:drivers/gpu/drm/i915/gt/intel_context.c:150:6: warning: context imbalance in 'intel_context_pin_unlock' - wrong count at exit

Commit: drm/i915: Split engine setup/init into two phases
Okay!

Commit: drm/i915: Switch back to an array of logical per-engine HW contexts
+./include/linux/overflow.h:285:13: error: incorrect type in conditional
+./include/linux/overflow.h:285:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/overflow.h:285:13:    got void
+./include/linux/overflow.h:285:13: warning: call with no type!
+./include/linux/overflow.h:287:13: error: incorrect type in conditional
+./include/linux/overflow.h:287:13: error: undefined identifier '__builtin_add_overflow'
+./include/linux/overflow.h:287:13:    got void
+./include/linux/overflow.h:287:13: warning: call with no type!

Commit: drm/i915: Remove intel_context.active_link
Okay!

Commit: drm/i915: Move i915_request_alloc into selftests/
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915/execlists: Flush the tasklet on parking
Okay!

Commit: drm/i915: Move the engine->destroy() vfunc onto the engine
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3611:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3605:16: warning: expression using sizeof(void)

Commit: drm/i915: Convert inconsistent static engine tables into an init error
Okay!

Commit: drm/i915: Make engine_mask & num_engines static
Okay!

Commit: drm/i915: Restore control over ppgtt for context creation ABI
Okay!

Commit: drm/i915: Allow a context to define its set of engines
+drivers/gpu/drm/i915/i915_utils.h:84:13: error: incorrect type in conditional
+drivers/gpu/drm/i915/i915_utils.h:84:13: error: undefined identifier '__builtin_mul_overflow'
+drivers/gpu/drm/i915/i915_utils.h:84:13:    got void
+drivers/gpu/drm/i915/i915_utils.h:84:13: warning: call with no type!
+drivers/gpu/drm/i915/i915_utils.h:87:13: error: incorrect type in conditional
+drivers/gpu/drm/i915/i915_utils.h:87:13: error: undefined identifier '__builtin_add_overflow'
+drivers/gpu/drm/i915/i915_utils.h:87:13:    got void
+drivers/gpu/drm/i915/i915_utils.h:87:13: warning: call with no type!
+./include/linux/overflow.h:285:13: error: incorrect type in conditional
+./include/linux/overflow.h:285:13: error: not a function <noident>
+./include/linux/overflow.h:285:13:    got void
+./include/linux/overflow.h:287:13: error: incorrect type in conditional
+./include/linux/overflow.h:287:13: error: not a function <noident>
+./include/linux/overflow.h:287:13:    got void

Commit: drm/i915: Re-expose SINGLE_TIMELINE flags for context creation
Okay!

Commit: drm/i915: Allow userspace to clone contexts on creation
+drivers/gpu/drm/i915/i915_gem_context.c:1844:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1845:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1846:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1847:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1848:17: error: bad integer constant expression
+drivers/gpu/drm/i915/i915_gem_context.c:1849:17: error: bad integer constant expression
-drivers/gpu/drm/i915/i915_utils.h:84:13: warning: call with no type!
-drivers/gpu/drm/i915/i915_utils.h:87:13: warning: call with no type!
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:1266:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:1266:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:454:16: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:571:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:571:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:693:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/i915_gem_context.c:693:33: warning: expression using sizeof(void)
+./include/linux/overflow.h:285:13: error: incorrect type in conditional
+./include/linux/overflow.h:285:13: error: not a function <noident>
-./include/linux/overflow.h:285:13: warning: call with no type!
+./include/linux/overflow.h:285:13:    got void
+./include/linux/overflow.h:287:13: error: incorrect type in conditional
+./include/linux/overflow.h:287:13: error: not a function <noident>
-./include/linux/overflow.h:287:13: warning: call with no type!
+./include/linux/overflow.h:287:13:    got void
-./include/linux/slab.h:666:13: warning: call with no type!

Commit: drm/i915: Load balancing across a virtual engine
+./include/linux/overflow.h:285:13: error: incorrect type in conditional
+./include/linux/overflow.h:285:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/overflow.h:285:13:    got void
+./include/linux/overflow.h:285:13: warning: call with no type!
+./include/linux/overflow.h:287:13: error: incorrect type in conditional
+./include/linux/overflow.h:287:13: error: undefined identifier '__builtin_add_overflow'
+./include/linux/overflow.h:287:13:    got void
+./include/linux/overflow.h:287:13: warning: call with no type!
+./include/linux/slab.h:666:13: error: not a function <noident>

Commit: drm/i915: Apply an execution_mask to the virtual_engine
Okay!

Commit: drm/i915: Extend execution fence to support a callback
Okay!

Commit: drm/i915/execlists: Virtual engine bonding
Okay!

Commit: drm/i915: Allow specification of parallel execbuf
Okay!

Commit: drm/i915: Split GEM object type definition to its own header
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3605:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3606:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Pull GEM ioctls interface to its own file
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3606:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3568:16: warning: expression using sizeof(void)

Commit: drm/i915: Move object->pages API to i915_gem_object.[ch]
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3568:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3437:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move shmem object setup to its own file
+drivers/gpu/drm/i915/gem/i915_gem_shmem.c:482:36: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:4913:36: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3437:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3427:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move phys objects to its own file
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3427:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3425:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move mmap and friends to its own file
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:191:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:191:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:191:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:191:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:306:32: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:306:32: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1502:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1502:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1502:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1502:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1617:32: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1617:32: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3425:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3424:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move GEM domain management to its own file
+drivers/gpu/drm/i915/gem/i915_gem_domain.c:447:34: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_domain.c:447:34: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem.c:889:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem.c:889:39: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1951:34: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1951:34: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:889:39: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:889:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3424:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3398:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move more GEM objects under gem/
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2017:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2018:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2019:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2020:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2021:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_context.c:2022:17: error: bad integer constant expression
+drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1496:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:55:39: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:339:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/huge_pages.c:202:36: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/huge_pages.c:202:36: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2017:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2018:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2019:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2020:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2021:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_context.c:2022:17: error: bad integer constant expression
-drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1496:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:55:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:339:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/huge_pages.c:202:36: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/huge_pages.c:202:36: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Pull scatterlist utils out of i915_gem.h
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3398:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3288:16: warning: expression using sizeof(void)

Commit: drm/i915: Move GEM object domain management from struct_mutex to local
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2)
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (49 preceding siblings ...)
  2019-04-25 12:25 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-04-25 12:56 ` Patchwork
  2019-04-25 21:52 ` ✗ Fi.CI.IGT: failure " Patchwork
  51 siblings, 0 replies; 74+ messages in thread
From: Patchwork @ 2019-04-25 12:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2)
URL   : https://patchwork.freedesktop.org/series/59933/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5999 -> Patchwork_12873
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/59933/revisions/2/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_12873:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@gem_exec_basic@gtt-vebox:
    - {fi-cml-u}:         NOTRUN -> [SKIP][1] +72 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/fi-cml-u/igt@gem_exec_basic@gtt-vebox.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - {fi-icl-u2}:        [PASS][2] -> [FAIL][3]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_chamelium@dp-edid-read:
    - {fi-icl-u2}:        [SKIP][4] ([fdo#109316]) -> [FAIL][5] +2 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/fi-icl-u2/igt@kms_chamelium@dp-edid-read.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/fi-icl-u2/igt@kms_chamelium@dp-edid-read.html

  
New tests
---------

  New tests have been introduced between CI_DRM_5999 and Patchwork_12873:

### New IGT tests (1) ###

  * igt@i915_selftest@live_mman:
    - Statuses : 32 pass(s)
    - Exec time: [5.67, 57.40] s

  

Known issues
------------

  Here are the changes found in Patchwork_12873 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - fi-apl-guc:         [PASS][6] -> [INCOMPLETE][7] ([fdo#103927] / [fdo#108743])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/fi-apl-guc/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/fi-apl-guc/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  
#### Possible fixes ####

  * igt@i915_selftest@live_contexts:
    - fi-bdw-gvtdvm:      [DMESG-FAIL][8] ([fdo#110235 ]) -> [PASS][9]
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html

  * igt@i915_selftest@live_hangcheck:
    - fi-skl-iommu:       [INCOMPLETE][10] ([fdo#108602] / [fdo#108744]) -> [PASS][11]
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/fi-skl-iommu/igt@i915_selftest@live_hangcheck.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/fi-skl-iommu/igt@i915_selftest@live_hangcheck.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#108602]: https://bugs.freedesktop.org/show_bug.cgi?id=108602
  [fdo#108743]: https://bugs.freedesktop.org/show_bug.cgi?id=108743
  [fdo#108744]: https://bugs.freedesktop.org/show_bug.cgi?id=108744
  [fdo#109316]: https://bugs.freedesktop.org/show_bug.cgi?id=109316
  [fdo#110235 ]: https://bugs.freedesktop.org/show_bug.cgi?id=110235 


Participating hosts (41 -> 39)
------------------------------

  Additional (6): fi-bsw-n3050 fi-kbl-guc fi-icl-y fi-blb-e6850 fi-bsw-kefka fi-cml-u 
  Missing    (8): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-kbl-x1275 fi-gdg-551 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_5999 -> Patchwork_12873

  CI_DRM_5999: bde6790dd103ba3c4c127bbc061b90ea3f406762 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4964: ffe44144a2285788ae07768a9240b27ea8182d5c @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12873: 6929e616e34affaa9a4bede207e9299f829946db @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

6929e616e34a drm/i915/execlists: Minimalistic timeslicing
101fe6bd3ea6 drm/i915/execlists: Preempt-to-busy
f53efba5827a drm/i915: Replace engine->timeline with a plain list
d1fea1fe9b8f drm/i915: Stop retiring along engine
739b15ab58c4 drm/i915: Keep contexts pinned until after the next kernel context switch
9a9265d75803 drm/i915: Rename intel_context.active to .inflight
2681bf330a73 drm/i915: Move object close under its own lock
ae17ba67db17 drm/i915: Drop the deferred active reference
be441e888707 drm/i915: Move GEM client throttling to its own file
6867c699b8af drm/i915: Move GEM object busy checking to its own file
9a54a1d6a267 drm/i915: Move GEM object waiting to its own file
050777b9f9bc drm/i915: Move GEM object domain management from struct_mutex to local
1ff0c2bc3c2d drm/i915: Pull scatterlist utils out of i915_gem.h
8803cd7965eb drm/i915: Move more GEM objects under gem/
05d08fd5b328 drm/i915: Move GEM domain management to its own file
572207c2d7d5 drm/i915: Move mmap and friends to its own file
6ec8b7a69a75 drm/i915: Move phys objects to its own file
bdca8a1a1d45 drm/i915: Move shmem object setup to its own file
df1d268673d8 drm/i915: Move object->pages API to i915_gem_object.[ch]
236de6708974 drm/i915: Pull GEM ioctls interface to its own file
e227c4d30f7f drm/i915: Split GEM object type definition to its own header
8876d041268e drm/i915: Allow specification of parallel execbuf
3959f70876c9 drm/i915/execlists: Virtual engine bonding
60b05b785259 drm/i915: Extend execution fence to support a callback
7341983aa331 drm/i915: Apply an execution_mask to the virtual_engine
98d6ac93376e drm/i915: Load balancing across a virtual engine
c01464b0b711 drm/i915: Allow userspace to clone contexts on creation
5c7f4ce704b1 drm/i915: Re-expose SINGLE_TIMELINE flags for context creation
b473bbe31d59 drm/i915: Allow a context to define its set of engines
ab1288834bb0 drm/i915: Restore control over ppgtt for context creation ABI
2f165b9338cf drm/i915: Make engine_mask & num_engines static
82fefd1a02ef drm/i915: Convert inconsistent static engine tables into an init error
8a37ac514d57 drm/i915: Move the engine->destroy() vfunc onto the engine
016950b57bf5 drm/i915/execlists: Flush the tasklet on parking
f3aceb963930 drm/i915: Move i915_request_alloc into selftests/
eed6f45b1618 drm/i915: Remove intel_context.active_link
ab7626910592 drm/i915: Switch back to an array of logical per-engine HW contexts
071a62f7b83a drm/i915: Split engine setup/init into two phases
cdfbf2fbb191 drm/i915: Pass intel_context to intel_context_pin_lock()
d9d3f9889620 drm/i915/selftests: Pass around intel_context for sseu
9b953b2cad13 drm/i915/selftests: Use the real kernel context for sseu isolation tests
03c73e53b187 drm/i915: Export intel_context_instance()
5861ea2d3814 drm/i915/gvt: Pin the per-engine GVT shadow contexts
d5f35de3a4aa drm/i915: Seal races between async GPU cancellation, retirement and signaling

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2)
  2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (50 preceding siblings ...)
  2019-04-25 12:56 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-04-25 21:52 ` Patchwork
  51 siblings, 0 replies; 74+ messages in thread
From: Patchwork @ 2019-04-25 21:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2)
URL   : https://patchwork.freedesktop.org/series/59933/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5999_full -> Patchwork_12873_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_12873_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12873_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_12873_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_ppgtt@blt-vs-render-ctxn:
    - shard-iclb:         [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-iclb8/igt@gem_ppgtt@blt-vs-render-ctxn.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-iclb2/igt@gem_ppgtt@blt-vs-render-ctxn.html

  * igt@i915_selftest@mock_requests:
    - shard-skl:          [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-skl3/igt@i915_selftest@mock_requests.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-skl1/igt@i915_selftest@mock_requests.html

  
New tests
---------

  New tests have been introduced between CI_DRM_5999_full and Patchwork_12873_full:

### New IGT tests (2) ###

  * igt@i915_selftest@live_mman:
    - Statuses : 5 pass(s)
    - Exec time: [6.32, 61.55] s

  * igt@i915_selftest@mock_phys:
    - Statuses : 5 pass(s)
    - Exec time: [0.16, 0.37] s

  

Known issues
------------

  Here are the changes found in Patchwork_12873_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_tiled_swapping@non-threaded:
    - shard-glk:          [PASS][5] -> [DMESG-WARN][6] ([fdo#108686])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-glk9/igt@gem_tiled_swapping@non-threaded.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-glk3/igt@gem_tiled_swapping@non-threaded.html
    - shard-hsw:          [PASS][7] -> [INCOMPLETE][8] ([fdo#103540])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-hsw2/igt@gem_tiled_swapping@non-threaded.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-hsw1/igt@gem_tiled_swapping@non-threaded.html

  * igt@kms_color@pipe-a-ctm-green-to-red:
    - shard-skl:          [PASS][9] -> [FAIL][10] ([fdo#107201])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-skl5/igt@kms_color@pipe-a-ctm-green-to-red.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-skl9/igt@kms_color@pipe-a-ctm-green-to-red.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-gtt-ytiled:
    - shard-glk:          [PASS][11] -> [FAIL][12] ([fdo#103184] / [fdo#108145])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-glk1/igt@kms_draw_crc@draw-method-xrgb8888-mmap-gtt-ytiled.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-glk6/igt@kms_draw_crc@draw-method-xrgb8888-mmap-gtt-ytiled.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-render:
    - shard-iclb:         [PASS][13] -> [FAIL][14] ([fdo#103167]) +3 similar issues
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-render.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-iclb2/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-rte:
    - shard-iclb:         [PASS][15] -> [FAIL][16] ([fdo#103167] / [fdo#110378])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-1p-rte.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-1p-rte.html

  * igt@kms_plane@pixel-format-pipe-a-planes-source-clamping:
    - shard-glk:          [PASS][17] -> [SKIP][18] ([fdo#109271])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-glk9/igt@kms_plane@pixel-format-pipe-a-planes-source-clamping.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-glk2/igt@kms_plane@pixel-format-pipe-a-planes-source-clamping.html

  * igt@kms_plane_scaling@pipe-b-scaler-with-clipping-clamping:
    - shard-glk:          [PASS][19] -> [SKIP][20] ([fdo#109271] / [fdo#109278])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-glk9/igt@kms_plane_scaling@pipe-b-scaler-with-clipping-clamping.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-glk3/igt@kms_plane_scaling@pipe-b-scaler-with-clipping-clamping.html

  * igt@kms_psr@psr2_primary_mmap_gtt:
    - shard-iclb:         [PASS][21] -> [SKIP][22] ([fdo#109441]) +4 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-iclb2/igt@kms_psr@psr2_primary_mmap_gtt.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-iclb4/igt@kms_psr@psr2_primary_mmap_gtt.html

  * igt@kms_setmode@basic:
    - shard-apl:          [PASS][23] -> [FAIL][24] ([fdo#99912])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-apl6/igt@kms_setmode@basic.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-apl8/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-c-ts-continuation-suspend:
    - shard-apl:          [PASS][25] -> [DMESG-WARN][26] ([fdo#108566]) +3 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-apl4/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-apl3/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
    - shard-kbl:          [PASS][27] -> [DMESG-WARN][28] ([fdo#108566]) +4 similar issues
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-kbl5/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-kbl5/igt@kms_vblank@pipe-c-ts-continuation-suspend.html

  * igt@perf_pmu@rc6-runtime-pm-long:
    - shard-iclb:         [PASS][29] -> [FAIL][30] ([fdo#105010])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-iclb1/igt@perf_pmu@rc6-runtime-pm-long.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-iclb4/igt@perf_pmu@rc6-runtime-pm-long.html

  
#### Possible fixes ####

  * igt@gem_eio@in-flight-suspend:
    - shard-apl:          [DMESG-WARN][31] ([fdo#108566]) -> [PASS][32] +2 similar issues
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-apl8/igt@gem_eio@in-flight-suspend.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-apl5/igt@gem_eio@in-flight-suspend.html

  * igt@gem_softpin@noreloc-s3:
    - shard-skl:          [INCOMPLETE][33] ([fdo#104108] / [fdo#107773]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-skl2/igt@gem_softpin@noreloc-s3.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-skl3/igt@gem_softpin@noreloc-s3.html

  * igt@i915_pm_rpm@i2c:
    - shard-iclb:         [DMESG-WARN][35] ([fdo#109982]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-iclb2/igt@i915_pm_rpm@i2c.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-iclb4/igt@i915_pm_rpm@i2c.html

  * igt@kms_cursor_crc@cursor-size-change:
    - shard-apl:          [FAIL][37] ([fdo#103232]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-apl8/igt@kms_cursor_crc@cursor-size-change.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-apl6/igt@kms_cursor_crc@cursor-size-change.html

  * igt@kms_cursor_legacy@2x-long-nonblocking-modeset-vs-cursor-atomic:
    - shard-glk:          [FAIL][39] ([fdo#106509] / [fdo#107409]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-glk5/igt@kms_cursor_legacy@2x-long-nonblocking-modeset-vs-cursor-atomic.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-glk4/igt@kms_cursor_legacy@2x-long-nonblocking-modeset-vs-cursor-atomic.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-kbl:          [DMESG-WARN][41] ([fdo#108566]) -> [PASS][42] +6 similar issues
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-kbl6/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-kbl2/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-pwrite:
    - shard-iclb:         [FAIL][43] ([fdo#103167]) -> [PASS][44] +8 similar issues
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-iclb2/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-pwrite.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-pwrite.html

  * igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping:
    - shard-glk:          [SKIP][45] ([fdo#109271] / [fdo#109278]) -> [PASS][46] +1 similar issue
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-glk8/igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-glk9/igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping.html

  * igt@kms_psr@psr2_cursor_plane_onoff:
    - shard-iclb:         [SKIP][47] ([fdo#109441]) -> [PASS][48] +2 similar issues
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-iclb8/igt@kms_psr@psr2_cursor_plane_onoff.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-iclb2/igt@kms_psr@psr2_cursor_plane_onoff.html

  * igt@kms_sysfs_edid_timing:
    - shard-iclb:         [FAIL][49] ([fdo#100047]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-iclb3/igt@kms_sysfs_edid_timing.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-iclb5/igt@kms_sysfs_edid_timing.html

  
#### Warnings ####

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-indfb-msflip-blt:
    - shard-skl:          [FAIL][51] ([fdo#108040]) -> [FAIL][52] ([fdo#103167])
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_5999/shard-skl10/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-indfb-msflip-blt.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/shard-skl2/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-indfb-msflip-blt.html

  
  [fdo#100047]: https://bugs.freedesktop.org/show_bug.cgi?id=100047
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103184]: https://bugs.freedesktop.org/show_bug.cgi?id=103184
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#106509]: https://bugs.freedesktop.org/show_bug.cgi?id=106509
  [fdo#107201]: https://bugs.freedesktop.org/show_bug.cgi?id=107201
  [fdo#107409]: https://bugs.freedesktop.org/show_bug.cgi?id=107409
  [fdo#107773]: https://bugs.freedesktop.org/show_bug.cgi?id=107773
  [fdo#108040]: https://bugs.freedesktop.org/show_bug.cgi?id=108040
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109982]: https://bugs.freedesktop.org/show_bug.cgi?id=109982
  [fdo#110378]: https://bugs.freedesktop.org/show_bug.cgi?id=110378
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_5999 -> Patchwork_12873

  CI_DRM_5999: bde6790dd103ba3c4c127bbc061b90ea3f406762 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4964: ffe44144a2285788ae07768a9240b27ea8182d5c @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12873: 6929e616e34affaa9a4bede207e9299f829946db @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12873/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-25 10:42   ` Chris Wilson
@ 2019-04-26 10:40     ` Tvrtko Ursulin
  0 siblings, 0 replies; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-26 10:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/04/2019 11:42, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-25 11:35:01)
>>
>> On 25/04/2019 10:19, Chris Wilson wrote:
>>> Currently there is an underlying assumption that i915_request_unsubmit()
>>> is synchronous wrt the GPU -- that is the request is no longer in flight
>>> as we remove it. In the near future that may change, and this may upset
>>> our signaling as we can process an interrupt for that request while it
>>> is no longer in flight.
>>>
>>> CPU0                                  CPU1
>>> intel_engine_breadcrumbs_irq
>>> (queue request completion)
>>>                                        i915_request_cancel_signaling
>>> ...                                   ...
>>>                                        i915_request_enable_signaling
>>> dma_fence_signal
>>>
>>> Hence in the time it took us to drop the lock to signal the request, a
>>> preemption event may have occurred and re-queued the request. In the
>>> process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
>>> so reused the rq->signal_link that was in use on CPU0, leading to bad
>>> pointer chasing in intel_engine_breadcrumbs_irq.
>>>
>>> A related issue was that if someone started listening for a signal on a
>>> completed but no longer in-flight request, we missed the opportunity to
>>> immediately signal that request.
>>>
>>> Furthermore, as intel_contexts may be immediately released during
>>> request retirement, in order to be entirely sure that
>>> intel_engine_breadcrumbs_irq may no longer dereference the intel_context
>>> (ce->signals and ce->signal_link), we must wait for irq spinlock.
>>>
>>> In order to prevent the race, we use a bit in the fence.flags to signal
>>> the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
>>> For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
>>> quickly signals to any outside observer that the fence is indeed signaled.
>>>
>>> Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/dma-buf/dma-fence.c                 |  1 +
>>>    drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 58 +++++++++++++--------
>>>    drivers/gpu/drm/i915/i915_request.c         |  1 +
>>>    3 files changed, 39 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>>> index 3aa8733f832a..9bf06042619a 100644
>>> --- a/drivers/dma-buf/dma-fence.c
>>> +++ b/drivers/dma-buf/dma-fence.c
>>> @@ -29,6 +29,7 @@
>>>    
>>>    EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
>>>    EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
>>> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
>>>    
>>>    static DEFINE_SPINLOCK(dma_fence_stub_lock);
>>>    static struct dma_fence dma_fence_stub;
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
>>> index 3cbffd400b1b..4283224249d4 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
>>> @@ -23,6 +23,7 @@
>>>     */
>>>    
>>>    #include <linux/kthread.h>
>>> +#include <trace/events/dma_fence.h>
>>>    #include <uapi/linux/sched/types.h>
>>>    
>>>    #include "i915_drv.h"
>>> @@ -83,6 +84,7 @@ static inline bool __request_completed(const struct i915_request *rq)
>>>    void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_breadcrumbs *b = &engine->breadcrumbs;
>>> +     const ktime_t timestamp = ktime_get();
>>>        struct intel_context *ce, *cn;
>>>        struct list_head *pos, *next;
>>>        LIST_HEAD(signal);
>>> @@ -104,6 +106,11 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>>>    
>>>                        GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
>>>                                             &rq->fence.flags));
>>> +                     clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>>> +
>>> +                     if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>>> +                                          &rq->fence.flags))
>>> +                             continue;
>>>    
>>>                        /*
>>>                         * Queue for execution after dropping the signaling
>>> @@ -111,14 +118,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>>>                         * more signalers to the same context or engine.
>>>                         */
>>>                        i915_request_get(rq);
>>> -
>>> -                     /*
>>> -                      * We may race with direct invocation of
>>> -                      * dma_fence_signal(), e.g. i915_request_retire(),
>>> -                      * so we need to acquire our reference to the request
>>> -                      * before we cancel the breadcrumb.
>>> -                      */
>>> -                     clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>>>                        list_add_tail(&rq->signal_link, &signal);
>>>                }
>>>    
>>> @@ -140,8 +139,21 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>>>        list_for_each_safe(pos, next, &signal) {
>>>                struct i915_request *rq =
>>>                        list_entry(pos, typeof(*rq), signal_link);
>>> +             struct dma_fence_cb *cur, *tmp;
>>> +
>>> +             trace_dma_fence_signaled(&rq->fence);
>>> +
>>> +             rq->fence.timestamp = timestamp;
>>> +             set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &rq->fence.flags);
>>> +
>>> +             spin_lock(&rq->lock);
>>> +             list_for_each_entry_safe(cur, tmp, &rq->fence.cb_list, node) {
>>> +                     INIT_LIST_HEAD(&cur->node);
>>> +                     cur->func(&rq->fence, cur);
>>> +             }
>>> +             INIT_LIST_HEAD(&rq->fence.cb_list);
>>> +             spin_unlock(&rq->lock);
>>>    
>>> -             dma_fence_signal(&rq->fence);
>>
>> I posted some comments on this patch already. In essence it was a
>> suggestion to not open-code-and-optimize dma_fence_signal, but split it
>> into two low-level helpers and export from the parent location. Like
>> __dma_fence_maybe/start_signal and __dma_fence_finish_signal.
>>
>> bool __dma_fence_start_signal(...)
>> {
>>          return test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>>                                  fence->flags);
>> }
>> EXPORT_SYMBOL(...)
>>
>> void __dma_fence_finish_signal(...)
>> {
>>          trace_dma_fence_signaled...
>>          timestamp...
>>          callbacks...
>> }
>> EXPORT_SYMBOL(...)
>>
>> This way we don't add coupling to low level implementation details in i915.
> 
> You mean midlayer implementation details :-p
> 
> The alternative I mentioned was that we use a redundant bit. But that
> has duplicity of purpose.
> 
> The patch needs to be stable-friendly for the fixes, so I opted for
> taking control.

I might buy it if you put a big fat comment this is open-coded 
dma_fence_singal and follow up with a patch to move code back into the 
midlayer following the stable marked patch. ;)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 24/45] drm/i915: Split GEM object type definition to its own header
  2019-04-25  9:19 ` [PATCH 24/45] drm/i915: Split GEM object type definition to its own header Chris Wilson
@ 2019-04-26 12:12   ` Jani Nikula
  2019-04-29  9:36     ` Joonas Lahtinen
  0 siblings, 1 reply; 74+ messages in thread
From: Jani Nikula @ 2019-04-26 12:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Matthew Auld

On Thu, 25 Apr 2019, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> For convenience in avoiding inline spaghetti, keep the type definition
> as a separate header.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile                 |   1 +
>  drivers/gpu/drm/i915/gem/Makefile             |   1 +
>  drivers/gpu/drm/i915/gem/Makefile.header-test |  16 +
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 285 +++++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |   1 +
>  drivers/gpu/drm/i915/i915_drv.h               |   3 +-
>  drivers/gpu/drm/i915/i915_gem_batch_pool.h    |   3 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
>  drivers/gpu/drm/i915/i915_gem_object.h        | 295 +-----------------
>  9 files changed, 312 insertions(+), 294 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/gem/Makefile
>  create mode 100644 drivers/gpu/drm/i915/gem/Makefile.header-test
>  create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 58643373495c..def781c9ea69 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -85,6 +85,7 @@ gt-$(CONFIG_DRM_I915_SELFTEST) += \
>  i915-y += $(gt-y)
>  
>  # GEM (Graphics Execution Management) code
> +obj-y += gem/

I think this warrants similar treatment as gt/. Posted standalone
instead of hidden in a series, explicit acks.

Provided the split makes sense to Joonas,

Acked-by: Jani Nikula <jani.nikula@intel.com>


>  i915-y += \
>  	  i915_active.o \
>  	  i915_cmd_parser.o \
> diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile
> new file mode 100644
> index 000000000000..07e7b8b840ea
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gem/Makefile
> @@ -0,0 +1 @@
> +include $(src)/Makefile.header-test # Extra header tests
> diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test
> new file mode 100644
> index 000000000000..61e06cbb4b32
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gem/Makefile.header-test
> @@ -0,0 +1,16 @@
> +# SPDX-License-Identifier: MIT
> +# Copyright © 2019 Intel Corporation
> +
> +# Test the headers are compilable as standalone units
> +header_test := $(notdir $(wildcard $(src)/*.h))
> +
> +quiet_cmd_header_test = HDRTEST $@
> +      cmd_header_test = echo "\#include \"$(<F)\"" > $@
> +
> +header_test_%.c: %.h
> +	$(call cmd,header_test)
> +
> +extra-$(CONFIG_DRM_I915_WERROR) += \
> +	$(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
> +
> +clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> new file mode 100644
> index 000000000000..e4b50944f553
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -0,0 +1,285 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2016 Intel Corporation
> + */
> +
> +#ifndef __I915_GEM_OBJECT_TYPES_H__
> +#define __I915_GEM_OBJECT_TYPES_H__
> +
> +#include <linux/reservation.h>
> +
> +#include <drm/drm_gem.h>
> +
> +#include "../i915_active.h"
> +#include "../i915_selftest.h"
> +
> +struct drm_i915_gem_object;
> +
> +/*
> + * struct i915_lut_handle tracks the fast lookups from handle to vma used
> + * for execbuf. Although we use a radixtree for that mapping, in order to
> + * remove them as the object or context is closed, we need a secondary list
> + * and a translation entry (i915_lut_handle).
> + */
> +struct i915_lut_handle {
> +	struct list_head obj_link;
> +	struct list_head ctx_link;
> +	struct i915_gem_context *ctx;
> +	u32 handle;
> +};
> +
> +struct drm_i915_gem_object_ops {
> +	unsigned int flags;
> +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
> +#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
> +#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
> +#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
> +
> +	/* Interface between the GEM object and its backing storage.
> +	 * get_pages() is called once prior to the use of the associated set
> +	 * of pages before to binding them into the GTT, and put_pages() is
> +	 * called after we no longer need them. As we expect there to be
> +	 * associated cost with migrating pages between the backing storage
> +	 * and making them available for the GPU (e.g. clflush), we may hold
> +	 * onto the pages after they are no longer referenced by the GPU
> +	 * in case they may be used again shortly (for example migrating the
> +	 * pages to a different memory domain within the GTT). put_pages()
> +	 * will therefore most likely be called when the object itself is
> +	 * being released or under memory pressure (where we attempt to
> +	 * reap pages for the shrinker).
> +	 */
> +	int (*get_pages)(struct drm_i915_gem_object *obj);
> +	void (*put_pages)(struct drm_i915_gem_object *obj,
> +			  struct sg_table *pages);
> +
> +	int (*pwrite)(struct drm_i915_gem_object *obj,
> +		      const struct drm_i915_gem_pwrite *arg);
> +
> +	int (*dmabuf_export)(struct drm_i915_gem_object *obj);
> +	void (*release)(struct drm_i915_gem_object *obj);
> +};
> +
> +struct drm_i915_gem_object {
> +	struct drm_gem_object base;
> +
> +	const struct drm_i915_gem_object_ops *ops;
> +
> +	struct {
> +		/**
> +		 * @vma.lock: protect the list/tree of vmas
> +		 */
> +		spinlock_t lock;
> +
> +		/**
> +		 * @vma.list: List of VMAs backed by this object
> +		 *
> +		 * The VMA on this list are ordered by type, all GGTT vma are
> +		 * placed at the head and all ppGTT vma are placed at the tail.
> +		 * The different types of GGTT vma are unordered between
> +		 * themselves, use the @vma.tree (which has a defined order
> +		 * between all VMA) to quickly find an exact match.
> +		 */
> +		struct list_head list;
> +
> +		/**
> +		 * @vma.tree: Ordered tree of VMAs backed by this object
> +		 *
> +		 * All VMA created for this object are placed in the @vma.tree
> +		 * for fast retrieval via a binary search in
> +		 * i915_vma_instance(). They are also added to @vma.list for
> +		 * easy iteration.
> +		 */
> +		struct rb_root tree;
> +	} vma;
> +
> +	/**
> +	 * @lut_list: List of vma lookup entries in use for this object.
> +	 *
> +	 * If this object is closed, we need to remove all of its VMA from
> +	 * the fast lookup index in associated contexts; @lut_list provides
> +	 * this translation from object to context->handles_vma.
> +	 */
> +	struct list_head lut_list;
> +
> +	/** Stolen memory for this object, instead of being backed by shmem. */
> +	struct drm_mm_node *stolen;
> +	union {
> +		struct rcu_head rcu;
> +		struct llist_node freed;
> +	};
> +
> +	/**
> +	 * Whether the object is currently in the GGTT mmap.
> +	 */
> +	unsigned int userfault_count;
> +	struct list_head userfault_link;
> +
> +	struct list_head batch_pool_link;
> +	I915_SELFTEST_DECLARE(struct list_head st_link);
> +
> +	unsigned long flags;
> +
> +	/**
> +	 * Have we taken a reference for the object for incomplete GPU
> +	 * activity?
> +	 */
> +#define I915_BO_ACTIVE_REF 0
> +
> +	/*
> +	 * Is the object to be mapped as read-only to the GPU
> +	 * Only honoured if hardware has relevant pte bit
> +	 */
> +	unsigned int cache_level:3;
> +	unsigned int cache_coherent:2;
> +#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
> +#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> +	unsigned int cache_dirty:1;
> +
> +	/**
> +	 * @read_domains: Read memory domains.
> +	 *
> +	 * These monitor which caches contain read/write data related to the
> +	 * object. When transitioning from one set of domains to another,
> +	 * the driver is called to ensure that caches are suitably flushed and
> +	 * invalidated.
> +	 */
> +	u16 read_domains;
> +
> +	/**
> +	 * @write_domain: Corresponding unique write memory domain.
> +	 */
> +	u16 write_domain;
> +
> +	atomic_t frontbuffer_bits;
> +	unsigned int frontbuffer_ggtt_origin; /* write once */
> +	struct i915_active_request frontbuffer_write;
> +
> +	/** Current tiling stride for the object, if it's tiled. */
> +	unsigned int tiling_and_stride;
> +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
> +#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
> +#define STRIDE_MASK (~TILING_MASK)
> +
> +	/** Count of VMA actually bound by this object */
> +	unsigned int bind_count;
> +	unsigned int active_count;
> +	/** Count of how many global VMA are currently pinned for use by HW */
> +	unsigned int pin_global;
> +
> +	struct {
> +		struct mutex lock; /* protects the pages and their use */
> +		atomic_t pages_pin_count;
> +
> +		struct sg_table *pages;
> +		void *mapping;
> +
> +		/* TODO: whack some of this into the error state */
> +		struct i915_page_sizes {
> +			/**
> +			 * The sg mask of the pages sg_table. i.e the mask of
> +			 * of the lengths for each sg entry.
> +			 */
> +			unsigned int phys;
> +
> +			/**
> +			 * The gtt page sizes we are allowed to use given the
> +			 * sg mask and the supported page sizes. This will
> +			 * express the smallest unit we can use for the whole
> +			 * object, as well as the larger sizes we may be able
> +			 * to use opportunistically.
> +			 */
> +			unsigned int sg;
> +
> +			/**
> +			 * The actual gtt page size usage. Since we can have
> +			 * multiple vma associated with this object we need to
> +			 * prevent any trampling of state, hence a copy of this
> +			 * struct also lives in each vma, therefore the gtt
> +			 * value here should only be read/write through the vma.
> +			 */
> +			unsigned int gtt;
> +		} page_sizes;
> +
> +		I915_SELFTEST_DECLARE(unsigned int page_mask);
> +
> +		struct i915_gem_object_page_iter {
> +			struct scatterlist *sg_pos;
> +			unsigned int sg_idx; /* in pages, but 32bit eek! */
> +
> +			struct radix_tree_root radix;
> +			struct mutex lock; /* protects this cache */
> +		} get_page;
> +
> +		/**
> +		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
> +		 * locked by i915->mm.obj_lock.
> +		 */
> +		struct list_head link;
> +
> +		/**
> +		 * Advice: are the backing pages purgeable?
> +		 */
> +		unsigned int madv:2;
> +
> +		/**
> +		 * This is set if the object has been written to since the
> +		 * pages were last acquired.
> +		 */
> +		bool dirty:1;
> +
> +		/**
> +		 * This is set if the object has been pinned due to unknown
> +		 * swizzling.
> +		 */
> +		bool quirked:1;
> +	} mm;
> +
> +	/** Breadcrumb of last rendering to the buffer.
> +	 * There can only be one writer, but we allow for multiple readers.
> +	 * If there is a writer that necessarily implies that all other
> +	 * read requests are complete - but we may only be lazily clearing
> +	 * the read requests. A read request is naturally the most recent
> +	 * request on a ring, so we may have two different write and read
> +	 * requests on one ring where the write request is older than the
> +	 * read request. This allows for the CPU to read from an active
> +	 * buffer by only waiting for the write to complete.
> +	 */
> +	struct reservation_object *resv;
> +
> +	/** References from framebuffers, locks out tiling changes. */
> +	unsigned int framebuffer_references;
> +
> +	/** Record of address bit 17 of each page at last unbind. */
> +	unsigned long *bit_17;
> +
> +	union {
> +		struct i915_gem_userptr {
> +			uintptr_t ptr;
> +
> +			struct i915_mm_struct *mm;
> +			struct i915_mmu_object *mmu_object;
> +			struct work_struct *work;
> +		} userptr;
> +
> +		unsigned long scratch;
> +
> +		void *gvt_info;
> +	};
> +
> +	/** for phys allocated objects */
> +	struct drm_dma_handle *phys_handle;
> +
> +	struct reservation_object __builtin_resv;
> +};
> +
> +static inline struct drm_i915_gem_object *
> +to_intel_bo(struct drm_gem_object *gem)
> +{
> +	/* Assert that to_intel_bo(NULL) == NULL */
> +	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
> +
> +	return container_of(gem, struct drm_i915_gem_object, base);
> +}
> +
> +#endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index ca8d95a5708d..ae73c6596d08 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -29,6 +29,7 @@
>  #define I915_CMD_HASH_ORDER 9
>  
>  struct dma_fence;
> +struct drm_i915_gem_object;
>  struct drm_i915_reg_table;
>  struct i915_gem_context;
>  struct i915_request;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5ca4df9a7428..0cf87495e11b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -80,7 +80,6 @@
>  #include "i915_gem.h"
>  #include "i915_gem_context.h"
>  #include "i915_gem_fence_reg.h"
> -#include "i915_gem_object.h"
>  #include "i915_gem_gtt.h"
>  #include "i915_gpu_error.h"
>  #include "i915_request.h"
> @@ -135,6 +134,8 @@ bool i915_error_injected(void);
>  	__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
>  		      fmt, ##__VA_ARGS__)
>  
> +struct drm_i915_gem_object;
> +
>  enum hpd_pin {
>  	HPD_NONE = 0,
>  	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> index 56947daaaf65..feeeeeaa54d8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> @@ -9,6 +9,7 @@
>  
>  #include <linux/types.h>
>  
> +struct drm_i915_gem_object;
>  struct intel_engine_cs;
>  
>  struct i915_gem_batch_pool {
> @@ -19,7 +20,7 @@ struct i915_gem_batch_pool {
>  void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
>  			      struct intel_engine_cs *engine);
>  void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
> -struct drm_i915_gem_object*
> +struct drm_i915_gem_object *
>  i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
>  
>  #endif /* I915_GEM_BATCH_POOL_H */
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 2fafa04c45ec..593079f30fbe 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -61,6 +61,7 @@
>  
>  struct drm_i915_file_private;
>  struct drm_i915_fence_reg;
> +struct drm_i915_gem_object;
>  struct i915_vma;
>  
>  typedef u32 gen6_pte_t;
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index ca93a40c0c87..3666b0c5f6ca 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -1,308 +1,19 @@
>  /*
> - * Copyright © 2016 Intel Corporation
> - *
> - * Permission is hereby granted, free of charge, to any person obtaining a
> - * copy of this software and associated documentation files (the "Software"),
> - * to deal in the Software without restriction, including without limitation
> - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> - * and/or sell copies of the Software, and to permit persons to whom the
> - * Software is furnished to do so, subject to the following conditions:
> - *
> - * The above copyright notice and this permission notice (including the next
> - * paragraph) shall be included in all copies or substantial portions of the
> - * Software.
> - *
> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> - * IN THE SOFTWARE.
> + * SPDX-License-Identifier: MIT
>   *
> + * Copyright © 2016 Intel Corporation
>   */
>  
>  #ifndef __I915_GEM_OBJECT_H__
>  #define __I915_GEM_OBJECT_H__
>  
> -#include <linux/reservation.h>
> -
> -#include <drm/drm_vma_manager.h>
>  #include <drm/drm_gem.h>
>  #include <drm/drm_file.h>
>  #include <drm/drm_device.h>
>  
>  #include <drm/i915_drm.h>
>  
> -#include "i915_request.h"
> -#include "i915_selftest.h"
> -
> -struct drm_i915_gem_object;
> -
> -/*
> - * struct i915_lut_handle tracks the fast lookups from handle to vma used
> - * for execbuf. Although we use a radixtree for that mapping, in order to
> - * remove them as the object or context is closed, we need a secondary list
> - * and a translation entry (i915_lut_handle).
> - */
> -struct i915_lut_handle {
> -	struct list_head obj_link;
> -	struct list_head ctx_link;
> -	struct i915_gem_context *ctx;
> -	u32 handle;
> -};
> -
> -struct drm_i915_gem_object_ops {
> -	unsigned int flags;
> -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
> -#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
> -#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
> -#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
> -
> -	/* Interface between the GEM object and its backing storage.
> -	 * get_pages() is called once prior to the use of the associated set
> -	 * of pages before to binding them into the GTT, and put_pages() is
> -	 * called after we no longer need them. As we expect there to be
> -	 * associated cost with migrating pages between the backing storage
> -	 * and making them available for the GPU (e.g. clflush), we may hold
> -	 * onto the pages after they are no longer referenced by the GPU
> -	 * in case they may be used again shortly (for example migrating the
> -	 * pages to a different memory domain within the GTT). put_pages()
> -	 * will therefore most likely be called when the object itself is
> -	 * being released or under memory pressure (where we attempt to
> -	 * reap pages for the shrinker).
> -	 */
> -	int (*get_pages)(struct drm_i915_gem_object *);
> -	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
> -
> -	int (*pwrite)(struct drm_i915_gem_object *,
> -		      const struct drm_i915_gem_pwrite *);
> -
> -	int (*dmabuf_export)(struct drm_i915_gem_object *);
> -	void (*release)(struct drm_i915_gem_object *);
> -};
> -
> -struct drm_i915_gem_object {
> -	struct drm_gem_object base;
> -
> -	const struct drm_i915_gem_object_ops *ops;
> -
> -	struct {
> -		/**
> -		 * @vma.lock: protect the list/tree of vmas
> -		 */
> -		spinlock_t lock;
> -
> -		/**
> -		 * @vma.list: List of VMAs backed by this object
> -		 *
> -		 * The VMA on this list are ordered by type, all GGTT vma are
> -		 * placed at the head and all ppGTT vma are placed at the tail.
> -		 * The different types of GGTT vma are unordered between
> -		 * themselves, use the @vma.tree (which has a defined order
> -		 * between all VMA) to quickly find an exact match.
> -		 */
> -		struct list_head list;
> -
> -		/**
> -		 * @vma.tree: Ordered tree of VMAs backed by this object
> -		 *
> -		 * All VMA created for this object are placed in the @vma.tree
> -		 * for fast retrieval via a binary search in
> -		 * i915_vma_instance(). They are also added to @vma.list for
> -		 * easy iteration.
> -		 */
> -		struct rb_root tree;
> -	} vma;
> -
> -	/**
> -	 * @lut_list: List of vma lookup entries in use for this object.
> -	 *
> -	 * If this object is closed, we need to remove all of its VMA from
> -	 * the fast lookup index in associated contexts; @lut_list provides
> -	 * this translation from object to context->handles_vma.
> -	 */
> -	struct list_head lut_list;
> -
> -	/** Stolen memory for this object, instead of being backed by shmem. */
> -	struct drm_mm_node *stolen;
> -	union {
> -		struct rcu_head rcu;
> -		struct llist_node freed;
> -	};
> -
> -	/**
> -	 * Whether the object is currently in the GGTT mmap.
> -	 */
> -	unsigned int userfault_count;
> -	struct list_head userfault_link;
> -
> -	struct list_head batch_pool_link;
> -	I915_SELFTEST_DECLARE(struct list_head st_link);
> -
> -	unsigned long flags;
> -
> -	/**
> -	 * Have we taken a reference for the object for incomplete GPU
> -	 * activity?
> -	 */
> -#define I915_BO_ACTIVE_REF 0
> -
> -	/*
> -	 * Is the object to be mapped as read-only to the GPU
> -	 * Only honoured if hardware has relevant pte bit
> -	 */
> -	unsigned int cache_level:3;
> -	unsigned int cache_coherent:2;
> -#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
> -#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> -	unsigned int cache_dirty:1;
> -
> -	/**
> -	 * @read_domains: Read memory domains.
> -	 *
> -	 * These monitor which caches contain read/write data related to the
> -	 * object. When transitioning from one set of domains to another,
> -	 * the driver is called to ensure that caches are suitably flushed and
> -	 * invalidated.
> -	 */
> -	u16 read_domains;
> -
> -	/**
> -	 * @write_domain: Corresponding unique write memory domain.
> -	 */
> -	u16 write_domain;
> -
> -	atomic_t frontbuffer_bits;
> -	unsigned int frontbuffer_ggtt_origin; /* write once */
> -	struct i915_active_request frontbuffer_write;
> -
> -	/** Current tiling stride for the object, if it's tiled. */
> -	unsigned int tiling_and_stride;
> -#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
> -#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
> -#define STRIDE_MASK (~TILING_MASK)
> -
> -	/** Count of VMA actually bound by this object */
> -	unsigned int bind_count;
> -	unsigned int active_count;
> -	/** Count of how many global VMA are currently pinned for use by HW */
> -	unsigned int pin_global;
> -
> -	struct {
> -		struct mutex lock; /* protects the pages and their use */
> -		atomic_t pages_pin_count;
> -
> -		struct sg_table *pages;
> -		void *mapping;
> -
> -		/* TODO: whack some of this into the error state */
> -		struct i915_page_sizes {
> -			/**
> -			 * The sg mask of the pages sg_table. i.e the mask of
> -			 * of the lengths for each sg entry.
> -			 */
> -			unsigned int phys;
> -
> -			/**
> -			 * The gtt page sizes we are allowed to use given the
> -			 * sg mask and the supported page sizes. This will
> -			 * express the smallest unit we can use for the whole
> -			 * object, as well as the larger sizes we may be able
> -			 * to use opportunistically.
> -			 */
> -			unsigned int sg;
> -
> -			/**
> -			 * The actual gtt page size usage. Since we can have
> -			 * multiple vma associated with this object we need to
> -			 * prevent any trampling of state, hence a copy of this
> -			 * struct also lives in each vma, therefore the gtt
> -			 * value here should only be read/write through the vma.
> -			 */
> -			unsigned int gtt;
> -		} page_sizes;
> -
> -		I915_SELFTEST_DECLARE(unsigned int page_mask);
> -
> -		struct i915_gem_object_page_iter {
> -			struct scatterlist *sg_pos;
> -			unsigned int sg_idx; /* in pages, but 32bit eek! */
> -
> -			struct radix_tree_root radix;
> -			struct mutex lock; /* protects this cache */
> -		} get_page;
> -
> -		/**
> -		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
> -		 * locked by i915->mm.obj_lock.
> -		 */
> -		struct list_head link;
> -
> -		/**
> -		 * Advice: are the backing pages purgeable?
> -		 */
> -		unsigned int madv:2;
> -
> -		/**
> -		 * This is set if the object has been written to since the
> -		 * pages were last acquired.
> -		 */
> -		bool dirty:1;
> -
> -		/**
> -		 * This is set if the object has been pinned due to unknown
> -		 * swizzling.
> -		 */
> -		bool quirked:1;
> -	} mm;
> -
> -	/** Breadcrumb of last rendering to the buffer.
> -	 * There can only be one writer, but we allow for multiple readers.
> -	 * If there is a writer that necessarily implies that all other
> -	 * read requests are complete - but we may only be lazily clearing
> -	 * the read requests. A read request is naturally the most recent
> -	 * request on a ring, so we may have two different write and read
> -	 * requests on one ring where the write request is older than the
> -	 * read request. This allows for the CPU to read from an active
> -	 * buffer by only waiting for the write to complete.
> -	 */
> -	struct reservation_object *resv;
> -
> -	/** References from framebuffers, locks out tiling changes. */
> -	unsigned int framebuffer_references;
> -
> -	/** Record of address bit 17 of each page at last unbind. */
> -	unsigned long *bit_17;
> -
> -	union {
> -		struct i915_gem_userptr {
> -			uintptr_t ptr;
> -
> -			struct i915_mm_struct *mm;
> -			struct i915_mmu_object *mmu_object;
> -			struct work_struct *work;
> -		} userptr;
> -
> -		unsigned long scratch;
> -
> -		void *gvt_info;
> -	};
> -
> -	/** for phys allocated objects */
> -	struct drm_dma_handle *phys_handle;
> -
> -	struct reservation_object __builtin_resv;
> -};
> -
> -static inline struct drm_i915_gem_object *
> -to_intel_bo(struct drm_gem_object *gem)
> -{
> -	/* Assert that to_intel_bo(NULL) == NULL */
> -	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
> -
> -	return container_of(gem, struct drm_i915_gem_object, base);
> -}
> +#include "gem/i915_gem_object_types.h"
>  
>  struct drm_i915_gem_object *i915_gem_object_alloc(void);
>  void i915_gem_object_free(struct drm_i915_gem_object *obj);

-- 
Jani Nikula, Intel Open Source Graphics Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 24/45] drm/i915: Split GEM object type definition to its own header
  2019-04-26 12:12   ` Jani Nikula
@ 2019-04-29  9:36     ` Joonas Lahtinen
  2019-04-29 17:52       ` Rodrigo Vivi
  0 siblings, 1 reply; 74+ messages in thread
From: Joonas Lahtinen @ 2019-04-29  9:36 UTC (permalink / raw)
  To: Chris Wilson, Jani Nikula, intel-gfx; +Cc: Matthew Auld

Quoting Jani Nikula (2019-04-26 15:12:49)
> On Thu, 25 Apr 2019, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > For convenience in avoiding inline spaghetti, keep the type definition
> > as a separate header.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> > ---
> >  drivers/gpu/drm/i915/Makefile                 |   1 +
> >  drivers/gpu/drm/i915/gem/Makefile             |   1 +
> >  drivers/gpu/drm/i915/gem/Makefile.header-test |  16 +
> >  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 285 +++++++++++++++++
> >  drivers/gpu/drm/i915/gt/intel_engine_types.h  |   1 +
> >  drivers/gpu/drm/i915/i915_drv.h               |   3 +-
> >  drivers/gpu/drm/i915/i915_gem_batch_pool.h    |   3 +-
> >  drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
> >  drivers/gpu/drm/i915/i915_gem_object.h        | 295 +-----------------
> >  9 files changed, 312 insertions(+), 294 deletions(-)
> >  create mode 100644 drivers/gpu/drm/i915/gem/Makefile
> >  create mode 100644 drivers/gpu/drm/i915/gem/Makefile.header-test
> >  create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> >
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 58643373495c..def781c9ea69 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -85,6 +85,7 @@ gt-$(CONFIG_DRM_I915_SELFTEST) += \
> >  i915-y += $(gt-y)
> >  
> >  # GEM (Graphics Execution Management) code
> > +obj-y += gem/
> 
> I think this warrants similar treatment as gt/. Posted standalone
> instead of hidden in a series, explicit acks.
> 
> Provided the split makes sense to Joonas,
> 
> Acked-by: Jani Nikula <jani.nikula@intel.com>

Yeah, makes sense to me.

Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas

> 
> 
> >  i915-y += \
> >         i915_active.o \
> >         i915_cmd_parser.o \
> > diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile
> > new file mode 100644
> > index 000000000000..07e7b8b840ea
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/gem/Makefile
> > @@ -0,0 +1 @@
> > +include $(src)/Makefile.header-test # Extra header tests
> > diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test
> > new file mode 100644
> > index 000000000000..61e06cbb4b32
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/gem/Makefile.header-test
> > @@ -0,0 +1,16 @@
> > +# SPDX-License-Identifier: MIT
> > +# Copyright © 2019 Intel Corporation
> > +
> > +# Test the headers are compilable as standalone units
> > +header_test := $(notdir $(wildcard $(src)/*.h))
> > +
> > +quiet_cmd_header_test = HDRTEST $@
> > +      cmd_header_test = echo "\#include \"$(<F)\"" > $@
> > +
> > +header_test_%.c: %.h
> > +     $(call cmd,header_test)
> > +
> > +extra-$(CONFIG_DRM_I915_WERROR) += \
> > +     $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
> > +
> > +clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > new file mode 100644
> > index 000000000000..e4b50944f553
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > @@ -0,0 +1,285 @@
> > +/*
> > + * SPDX-License-Identifier: MIT
> > + *
> > + * Copyright © 2016 Intel Corporation
> > + */
> > +
> > +#ifndef __I915_GEM_OBJECT_TYPES_H__
> > +#define __I915_GEM_OBJECT_TYPES_H__
> > +
> > +#include <linux/reservation.h>
> > +
> > +#include <drm/drm_gem.h>
> > +
> > +#include "../i915_active.h"
> > +#include "../i915_selftest.h"
> > +
> > +struct drm_i915_gem_object;
> > +
> > +/*
> > + * struct i915_lut_handle tracks the fast lookups from handle to vma used
> > + * for execbuf. Although we use a radixtree for that mapping, in order to
> > + * remove them as the object or context is closed, we need a secondary list
> > + * and a translation entry (i915_lut_handle).
> > + */
> > +struct i915_lut_handle {
> > +     struct list_head obj_link;
> > +     struct list_head ctx_link;
> > +     struct i915_gem_context *ctx;
> > +     u32 handle;
> > +};
> > +
> > +struct drm_i915_gem_object_ops {
> > +     unsigned int flags;
> > +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE      BIT(0)
> > +#define I915_GEM_OBJECT_IS_SHRINKABLE        BIT(1)
> > +#define I915_GEM_OBJECT_IS_PROXY     BIT(2)
> > +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
> > +
> > +     /* Interface between the GEM object and its backing storage.
> > +      * get_pages() is called once prior to the use of the associated set
> > +      * of pages before to binding them into the GTT, and put_pages() is
> > +      * called after we no longer need them. As we expect there to be
> > +      * associated cost with migrating pages between the backing storage
> > +      * and making them available for the GPU (e.g. clflush), we may hold
> > +      * onto the pages after they are no longer referenced by the GPU
> > +      * in case they may be used again shortly (for example migrating the
> > +      * pages to a different memory domain within the GTT). put_pages()
> > +      * will therefore most likely be called when the object itself is
> > +      * being released or under memory pressure (where we attempt to
> > +      * reap pages for the shrinker).
> > +      */
> > +     int (*get_pages)(struct drm_i915_gem_object *obj);
> > +     void (*put_pages)(struct drm_i915_gem_object *obj,
> > +                       struct sg_table *pages);
> > +
> > +     int (*pwrite)(struct drm_i915_gem_object *obj,
> > +                   const struct drm_i915_gem_pwrite *arg);
> > +
> > +     int (*dmabuf_export)(struct drm_i915_gem_object *obj);
> > +     void (*release)(struct drm_i915_gem_object *obj);
> > +};
> > +
> > +struct drm_i915_gem_object {
> > +     struct drm_gem_object base;
> > +
> > +     const struct drm_i915_gem_object_ops *ops;
> > +
> > +     struct {
> > +             /**
> > +              * @vma.lock: protect the list/tree of vmas
> > +              */
> > +             spinlock_t lock;
> > +
> > +             /**
> > +              * @vma.list: List of VMAs backed by this object
> > +              *
> > +              * The VMA on this list are ordered by type, all GGTT vma are
> > +              * placed at the head and all ppGTT vma are placed at the tail.
> > +              * The different types of GGTT vma are unordered between
> > +              * themselves, use the @vma.tree (which has a defined order
> > +              * between all VMA) to quickly find an exact match.
> > +              */
> > +             struct list_head list;
> > +
> > +             /**
> > +              * @vma.tree: Ordered tree of VMAs backed by this object
> > +              *
> > +              * All VMA created for this object are placed in the @vma.tree
> > +              * for fast retrieval via a binary search in
> > +              * i915_vma_instance(). They are also added to @vma.list for
> > +              * easy iteration.
> > +              */
> > +             struct rb_root tree;
> > +     } vma;
> > +
> > +     /**
> > +      * @lut_list: List of vma lookup entries in use for this object.
> > +      *
> > +      * If this object is closed, we need to remove all of its VMA from
> > +      * the fast lookup index in associated contexts; @lut_list provides
> > +      * this translation from object to context->handles_vma.
> > +      */
> > +     struct list_head lut_list;
> > +
> > +     /** Stolen memory for this object, instead of being backed by shmem. */
> > +     struct drm_mm_node *stolen;
> > +     union {
> > +             struct rcu_head rcu;
> > +             struct llist_node freed;
> > +     };
> > +
> > +     /**
> > +      * Whether the object is currently in the GGTT mmap.
> > +      */
> > +     unsigned int userfault_count;
> > +     struct list_head userfault_link;
> > +
> > +     struct list_head batch_pool_link;
> > +     I915_SELFTEST_DECLARE(struct list_head st_link);
> > +
> > +     unsigned long flags;
> > +
> > +     /**
> > +      * Have we taken a reference for the object for incomplete GPU
> > +      * activity?
> > +      */
> > +#define I915_BO_ACTIVE_REF 0
> > +
> > +     /*
> > +      * Is the object to be mapped as read-only to the GPU
> > +      * Only honoured if hardware has relevant pte bit
> > +      */
> > +     unsigned int cache_level:3;
> > +     unsigned int cache_coherent:2;
> > +#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
> > +#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> > +     unsigned int cache_dirty:1;
> > +
> > +     /**
> > +      * @read_domains: Read memory domains.
> > +      *
> > +      * These monitor which caches contain read/write data related to the
> > +      * object. When transitioning from one set of domains to another,
> > +      * the driver is called to ensure that caches are suitably flushed and
> > +      * invalidated.
> > +      */
> > +     u16 read_domains;
> > +
> > +     /**
> > +      * @write_domain: Corresponding unique write memory domain.
> > +      */
> > +     u16 write_domain;
> > +
> > +     atomic_t frontbuffer_bits;
> > +     unsigned int frontbuffer_ggtt_origin; /* write once */
> > +     struct i915_active_request frontbuffer_write;
> > +
> > +     /** Current tiling stride for the object, if it's tiled. */
> > +     unsigned int tiling_and_stride;
> > +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
> > +#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
> > +#define STRIDE_MASK (~TILING_MASK)
> > +
> > +     /** Count of VMA actually bound by this object */
> > +     unsigned int bind_count;
> > +     unsigned int active_count;
> > +     /** Count of how many global VMA are currently pinned for use by HW */
> > +     unsigned int pin_global;
> > +
> > +     struct {
> > +             struct mutex lock; /* protects the pages and their use */
> > +             atomic_t pages_pin_count;
> > +
> > +             struct sg_table *pages;
> > +             void *mapping;
> > +
> > +             /* TODO: whack some of this into the error state */
> > +             struct i915_page_sizes {
> > +                     /**
> > +                      * The sg mask of the pages sg_table. i.e the mask of
> > +                      * of the lengths for each sg entry.
> > +                      */
> > +                     unsigned int phys;
> > +
> > +                     /**
> > +                      * The gtt page sizes we are allowed to use given the
> > +                      * sg mask and the supported page sizes. This will
> > +                      * express the smallest unit we can use for the whole
> > +                      * object, as well as the larger sizes we may be able
> > +                      * to use opportunistically.
> > +                      */
> > +                     unsigned int sg;
> > +
> > +                     /**
> > +                      * The actual gtt page size usage. Since we can have
> > +                      * multiple vma associated with this object we need to
> > +                      * prevent any trampling of state, hence a copy of this
> > +                      * struct also lives in each vma, therefore the gtt
> > +                      * value here should only be read/write through the vma.
> > +                      */
> > +                     unsigned int gtt;
> > +             } page_sizes;
> > +
> > +             I915_SELFTEST_DECLARE(unsigned int page_mask);
> > +
> > +             struct i915_gem_object_page_iter {
> > +                     struct scatterlist *sg_pos;
> > +                     unsigned int sg_idx; /* in pages, but 32bit eek! */
> > +
> > +                     struct radix_tree_root radix;
> > +                     struct mutex lock; /* protects this cache */
> > +             } get_page;
> > +
> > +             /**
> > +              * Element within i915->mm.unbound_list or i915->mm.bound_list,
> > +              * locked by i915->mm.obj_lock.
> > +              */
> > +             struct list_head link;
> > +
> > +             /**
> > +              * Advice: are the backing pages purgeable?
> > +              */
> > +             unsigned int madv:2;
> > +
> > +             /**
> > +              * This is set if the object has been written to since the
> > +              * pages were last acquired.
> > +              */
> > +             bool dirty:1;
> > +
> > +             /**
> > +              * This is set if the object has been pinned due to unknown
> > +              * swizzling.
> > +              */
> > +             bool quirked:1;
> > +     } mm;
> > +
> > +     /** Breadcrumb of last rendering to the buffer.
> > +      * There can only be one writer, but we allow for multiple readers.
> > +      * If there is a writer that necessarily implies that all other
> > +      * read requests are complete - but we may only be lazily clearing
> > +      * the read requests. A read request is naturally the most recent
> > +      * request on a ring, so we may have two different write and read
> > +      * requests on one ring where the write request is older than the
> > +      * read request. This allows for the CPU to read from an active
> > +      * buffer by only waiting for the write to complete.
> > +      */
> > +     struct reservation_object *resv;
> > +
> > +     /** References from framebuffers, locks out tiling changes. */
> > +     unsigned int framebuffer_references;
> > +
> > +     /** Record of address bit 17 of each page at last unbind. */
> > +     unsigned long *bit_17;
> > +
> > +     union {
> > +             struct i915_gem_userptr {
> > +                     uintptr_t ptr;
> > +
> > +                     struct i915_mm_struct *mm;
> > +                     struct i915_mmu_object *mmu_object;
> > +                     struct work_struct *work;
> > +             } userptr;
> > +
> > +             unsigned long scratch;
> > +
> > +             void *gvt_info;
> > +     };
> > +
> > +     /** for phys allocated objects */
> > +     struct drm_dma_handle *phys_handle;
> > +
> > +     struct reservation_object __builtin_resv;
> > +};
> > +
> > +static inline struct drm_i915_gem_object *
> > +to_intel_bo(struct drm_gem_object *gem)
> > +{
> > +     /* Assert that to_intel_bo(NULL) == NULL */
> > +     BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
> > +
> > +     return container_of(gem, struct drm_i915_gem_object, base);
> > +}
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index ca8d95a5708d..ae73c6596d08 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -29,6 +29,7 @@
> >  #define I915_CMD_HASH_ORDER 9
> >  
> >  struct dma_fence;
> > +struct drm_i915_gem_object;
> >  struct drm_i915_reg_table;
> >  struct i915_gem_context;
> >  struct i915_request;
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 5ca4df9a7428..0cf87495e11b 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -80,7 +80,6 @@
> >  #include "i915_gem.h"
> >  #include "i915_gem_context.h"
> >  #include "i915_gem_fence_reg.h"
> > -#include "i915_gem_object.h"
> >  #include "i915_gem_gtt.h"
> >  #include "i915_gpu_error.h"
> >  #include "i915_request.h"
> > @@ -135,6 +134,8 @@ bool i915_error_injected(void);
> >       __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
> >                     fmt, ##__VA_ARGS__)
> >  
> > +struct drm_i915_gem_object;
> > +
> >  enum hpd_pin {
> >       HPD_NONE = 0,
> >       HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
> > diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> > index 56947daaaf65..feeeeeaa54d8 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> > @@ -9,6 +9,7 @@
> >  
> >  #include <linux/types.h>
> >  
> > +struct drm_i915_gem_object;
> >  struct intel_engine_cs;
> >  
> >  struct i915_gem_batch_pool {
> > @@ -19,7 +20,7 @@ struct i915_gem_batch_pool {
> >  void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
> >                             struct intel_engine_cs *engine);
> >  void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
> > -struct drm_i915_gem_object*
> > +struct drm_i915_gem_object *
> >  i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
> >  
> >  #endif /* I915_GEM_BATCH_POOL_H */
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > index 2fafa04c45ec..593079f30fbe 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > @@ -61,6 +61,7 @@
> >  
> >  struct drm_i915_file_private;
> >  struct drm_i915_fence_reg;
> > +struct drm_i915_gem_object;
> >  struct i915_vma;
> >  
> >  typedef u32 gen6_pte_t;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> > index ca93a40c0c87..3666b0c5f6ca 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_object.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> > @@ -1,308 +1,19 @@
> >  /*
> > - * Copyright © 2016 Intel Corporation
> > - *
> > - * Permission is hereby granted, free of charge, to any person obtaining a
> > - * copy of this software and associated documentation files (the "Software"),
> > - * to deal in the Software without restriction, including without limitation
> > - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > - * and/or sell copies of the Software, and to permit persons to whom the
> > - * Software is furnished to do so, subject to the following conditions:
> > - *
> > - * The above copyright notice and this permission notice (including the next
> > - * paragraph) shall be included in all copies or substantial portions of the
> > - * Software.
> > - *
> > - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > - * IN THE SOFTWARE.
> > + * SPDX-License-Identifier: MIT
> >   *
> > + * Copyright © 2016 Intel Corporation
> >   */
> >  
> >  #ifndef __I915_GEM_OBJECT_H__
> >  #define __I915_GEM_OBJECT_H__
> >  
> > -#include <linux/reservation.h>
> > -
> > -#include <drm/drm_vma_manager.h>
> >  #include <drm/drm_gem.h>
> >  #include <drm/drm_file.h>
> >  #include <drm/drm_device.h>
> >  
> >  #include <drm/i915_drm.h>
> >  
> > -#include "i915_request.h"
> > -#include "i915_selftest.h"
> > -
> > -struct drm_i915_gem_object;
> > -
> > -/*
> > - * struct i915_lut_handle tracks the fast lookups from handle to vma used
> > - * for execbuf. Although we use a radixtree for that mapping, in order to
> > - * remove them as the object or context is closed, we need a secondary list
> > - * and a translation entry (i915_lut_handle).
> > - */
> > -struct i915_lut_handle {
> > -     struct list_head obj_link;
> > -     struct list_head ctx_link;
> > -     struct i915_gem_context *ctx;
> > -     u32 handle;
> > -};
> > -
> > -struct drm_i915_gem_object_ops {
> > -     unsigned int flags;
> > -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE      BIT(0)
> > -#define I915_GEM_OBJECT_IS_SHRINKABLE        BIT(1)
> > -#define I915_GEM_OBJECT_IS_PROXY     BIT(2)
> > -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
> > -
> > -     /* Interface between the GEM object and its backing storage.
> > -      * get_pages() is called once prior to the use of the associated set
> > -      * of pages before to binding them into the GTT, and put_pages() is
> > -      * called after we no longer need them. As we expect there to be
> > -      * associated cost with migrating pages between the backing storage
> > -      * and making them available for the GPU (e.g. clflush), we may hold
> > -      * onto the pages after they are no longer referenced by the GPU
> > -      * in case they may be used again shortly (for example migrating the
> > -      * pages to a different memory domain within the GTT). put_pages()
> > -      * will therefore most likely be called when the object itself is
> > -      * being released or under memory pressure (where we attempt to
> > -      * reap pages for the shrinker).
> > -      */
> > -     int (*get_pages)(struct drm_i915_gem_object *);
> > -     void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
> > -
> > -     int (*pwrite)(struct drm_i915_gem_object *,
> > -                   const struct drm_i915_gem_pwrite *);
> > -
> > -     int (*dmabuf_export)(struct drm_i915_gem_object *);
> > -     void (*release)(struct drm_i915_gem_object *);
> > -};
> > -
> > -struct drm_i915_gem_object {
> > -     struct drm_gem_object base;
> > -
> > -     const struct drm_i915_gem_object_ops *ops;
> > -
> > -     struct {
> > -             /**
> > -              * @vma.lock: protect the list/tree of vmas
> > -              */
> > -             spinlock_t lock;
> > -
> > -             /**
> > -              * @vma.list: List of VMAs backed by this object
> > -              *
> > -              * The VMA on this list are ordered by type, all GGTT vma are
> > -              * placed at the head and all ppGTT vma are placed at the tail.
> > -              * The different types of GGTT vma are unordered between
> > -              * themselves, use the @vma.tree (which has a defined order
> > -              * between all VMA) to quickly find an exact match.
> > -              */
> > -             struct list_head list;
> > -
> > -             /**
> > -              * @vma.tree: Ordered tree of VMAs backed by this object
> > -              *
> > -              * All VMA created for this object are placed in the @vma.tree
> > -              * for fast retrieval via a binary search in
> > -              * i915_vma_instance(). They are also added to @vma.list for
> > -              * easy iteration.
> > -              */
> > -             struct rb_root tree;
> > -     } vma;
> > -
> > -     /**
> > -      * @lut_list: List of vma lookup entries in use for this object.
> > -      *
> > -      * If this object is closed, we need to remove all of its VMA from
> > -      * the fast lookup index in associated contexts; @lut_list provides
> > -      * this translation from object to context->handles_vma.
> > -      */
> > -     struct list_head lut_list;
> > -
> > -     /** Stolen memory for this object, instead of being backed by shmem. */
> > -     struct drm_mm_node *stolen;
> > -     union {
> > -             struct rcu_head rcu;
> > -             struct llist_node freed;
> > -     };
> > -
> > -     /**
> > -      * Whether the object is currently in the GGTT mmap.
> > -      */
> > -     unsigned int userfault_count;
> > -     struct list_head userfault_link;
> > -
> > -     struct list_head batch_pool_link;
> > -     I915_SELFTEST_DECLARE(struct list_head st_link);
> > -
> > -     unsigned long flags;
> > -
> > -     /**
> > -      * Have we taken a reference for the object for incomplete GPU
> > -      * activity?
> > -      */
> > -#define I915_BO_ACTIVE_REF 0
> > -
> > -     /*
> > -      * Is the object to be mapped as read-only to the GPU
> > -      * Only honoured if hardware has relevant pte bit
> > -      */
> > -     unsigned int cache_level:3;
> > -     unsigned int cache_coherent:2;
> > -#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
> > -#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> > -     unsigned int cache_dirty:1;
> > -
> > -     /**
> > -      * @read_domains: Read memory domains.
> > -      *
> > -      * These monitor which caches contain read/write data related to the
> > -      * object. When transitioning from one set of domains to another,
> > -      * the driver is called to ensure that caches are suitably flushed and
> > -      * invalidated.
> > -      */
> > -     u16 read_domains;
> > -
> > -     /**
> > -      * @write_domain: Corresponding unique write memory domain.
> > -      */
> > -     u16 write_domain;
> > -
> > -     atomic_t frontbuffer_bits;
> > -     unsigned int frontbuffer_ggtt_origin; /* write once */
> > -     struct i915_active_request frontbuffer_write;
> > -
> > -     /** Current tiling stride for the object, if it's tiled. */
> > -     unsigned int tiling_and_stride;
> > -#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
> > -#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
> > -#define STRIDE_MASK (~TILING_MASK)
> > -
> > -     /** Count of VMA actually bound by this object */
> > -     unsigned int bind_count;
> > -     unsigned int active_count;
> > -     /** Count of how many global VMA are currently pinned for use by HW */
> > -     unsigned int pin_global;
> > -
> > -     struct {
> > -             struct mutex lock; /* protects the pages and their use */
> > -             atomic_t pages_pin_count;
> > -
> > -             struct sg_table *pages;
> > -             void *mapping;
> > -
> > -             /* TODO: whack some of this into the error state */
> > -             struct i915_page_sizes {
> > -                     /**
> > -                      * The sg mask of the pages sg_table. i.e the mask of
> > -                      * of the lengths for each sg entry.
> > -                      */
> > -                     unsigned int phys;
> > -
> > -                     /**
> > -                      * The gtt page sizes we are allowed to use given the
> > -                      * sg mask and the supported page sizes. This will
> > -                      * express the smallest unit we can use for the whole
> > -                      * object, as well as the larger sizes we may be able
> > -                      * to use opportunistically.
> > -                      */
> > -                     unsigned int sg;
> > -
> > -                     /**
> > -                      * The actual gtt page size usage. Since we can have
> > -                      * multiple vma associated with this object we need to
> > -                      * prevent any trampling of state, hence a copy of this
> > -                      * struct also lives in each vma, therefore the gtt
> > -                      * value here should only be read/write through the vma.
> > -                      */
> > -                     unsigned int gtt;
> > -             } page_sizes;
> > -
> > -             I915_SELFTEST_DECLARE(unsigned int page_mask);
> > -
> > -             struct i915_gem_object_page_iter {
> > -                     struct scatterlist *sg_pos;
> > -                     unsigned int sg_idx; /* in pages, but 32bit eek! */
> > -
> > -                     struct radix_tree_root radix;
> > -                     struct mutex lock; /* protects this cache */
> > -             } get_page;
> > -
> > -             /**
> > -              * Element within i915->mm.unbound_list or i915->mm.bound_list,
> > -              * locked by i915->mm.obj_lock.
> > -              */
> > -             struct list_head link;
> > -
> > -             /**
> > -              * Advice: are the backing pages purgeable?
> > -              */
> > -             unsigned int madv:2;
> > -
> > -             /**
> > -              * This is set if the object has been written to since the
> > -              * pages were last acquired.
> > -              */
> > -             bool dirty:1;
> > -
> > -             /**
> > -              * This is set if the object has been pinned due to unknown
> > -              * swizzling.
> > -              */
> > -             bool quirked:1;
> > -     } mm;
> > -
> > -     /** Breadcrumb of last rendering to the buffer.
> > -      * There can only be one writer, but we allow for multiple readers.
> > -      * If there is a writer that necessarily implies that all other
> > -      * read requests are complete - but we may only be lazily clearing
> > -      * the read requests. A read request is naturally the most recent
> > -      * request on a ring, so we may have two different write and read
> > -      * requests on one ring where the write request is older than the
> > -      * read request. This allows for the CPU to read from an active
> > -      * buffer by only waiting for the write to complete.
> > -      */
> > -     struct reservation_object *resv;
> > -
> > -     /** References from framebuffers, locks out tiling changes. */
> > -     unsigned int framebuffer_references;
> > -
> > -     /** Record of address bit 17 of each page at last unbind. */
> > -     unsigned long *bit_17;
> > -
> > -     union {
> > -             struct i915_gem_userptr {
> > -                     uintptr_t ptr;
> > -
> > -                     struct i915_mm_struct *mm;
> > -                     struct i915_mmu_object *mmu_object;
> > -                     struct work_struct *work;
> > -             } userptr;
> > -
> > -             unsigned long scratch;
> > -
> > -             void *gvt_info;
> > -     };
> > -
> > -     /** for phys allocated objects */
> > -     struct drm_dma_handle *phys_handle;
> > -
> > -     struct reservation_object __builtin_resv;
> > -};
> > -
> > -static inline struct drm_i915_gem_object *
> > -to_intel_bo(struct drm_gem_object *gem)
> > -{
> > -     /* Assert that to_intel_bo(NULL) == NULL */
> > -     BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
> > -
> > -     return container_of(gem, struct drm_i915_gem_object, base);
> > -}
> > +#include "gem/i915_gem_object_types.h"
> >  
> >  struct drm_i915_gem_object *i915_gem_object_alloc(void);
> >  void i915_gem_object_free(struct drm_i915_gem_object *obj);
> 
> -- 
> Jani Nikula, Intel Open Source Graphics Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 19/45] drm/i915: Load balancing across a virtual engine
  2019-04-25  9:19 ` [PATCH 19/45] drm/i915: Load balancing across a virtual engine Chris Wilson
  2019-04-25 12:14   ` Tvrtko Ursulin
@ 2019-04-29 13:43   ` Tvrtko Ursulin
  1 sibling, 0 replies; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-29 13:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/04/2019 10:19, Chris Wilson wrote:
> Having allowed the user to define a set of engines that they will want
> to only use, we go one step further and allow them to bind those engines
> into a single virtual instance. Submitting a batch to the virtual engine
> will then forward it to any one of the set in a manner as best to
> distribute load.  The virtual engine has a single timeline across all
> engines (it operates as a single queue), so it is not able to concurrently
> run batches across multiple engines by itself; that is left up to the user
> to submit multiple concurrent batches to multiple queues. Multiple users
> will be load balanced across the system.
> 
> The mechanism used for load balancing in this patch is a late greedy
> balancer. When a request is ready for execution, it is added to each
> engine's queue, and when an engine is ready for its next request it
> claims it from the virtual engine. The first engine to do so, wins, i.e.
> the request is executed at the earliest opportunity (idle moment) in the
> system.
> 
> As not all HW is created equal, the user is still able to skip the
> virtual engine and execute the batch on a specific engine, all within the
> same queue. It will then be executed in order on the correct engine,
> with execution on other virtual engines being moved away due to the load
> detection.
> 
> A couple of areas for potential improvement left!
> 
> - The virtual engine always take priority over equal-priority tasks.
> Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
> and hopefully the virtual and real engines are not then congested (i.e.
> all work is via virtual engines, or all work is to the real engine).
> 
> - We require the breadcrumb irq around every virtual engine request. For
> normal engines, we eliminate the need for the slow round trip via
> interrupt by using the submit fence and queueing in order. For virtual
> engines, we have to allow any job to transfer to a new ring, and cannot
> coalesce the submissions, so require the completion fence instead,
> forcing the persistent use of interrupts.
> 
> - We only drip feed single requests through each virtual engine and onto
> the physical engines, even if there was enough work to fill all ELSP,
> leaving small stalls with an idle CS event at the end of every request.
> Could we be greedy and fill both slots? Being lazy is virtuous for load
> distribution on less-than-full workloads though.
> 
> Other areas of improvement are more general, such as reducing lock
> contention, reducing dispatch overhead, looking at direct submission
> rather than bouncing around tasklets etc.
> 
> sseu: Lift the restriction to allow sseu to be reconfigured on virtual
> engines composed of RENDER_CLASS (rcs).
> 
> v2: macroize check_user_mbz()
> v3: Cancel virtual engines on wedging
> v4: Commence commenting
> v5: Replace 64b sibling_mask with a list of class:instance
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c  |   6 +-
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |   8 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 656 ++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/intel_lrc.h          |   9 +
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 180 +++++
>   drivers/gpu/drm/i915/i915_gem.h              |   5 +
>   drivers/gpu/drm/i915/i915_gem_context.c      | 118 +++-
>   drivers/gpu/drm/i915/i915_scheduler.c        |  18 +-
>   drivers/gpu/drm/i915/i915_timeline_types.h   |   1 +
>   include/uapi/drm/i915_drm.h                  |  39 ++
>   10 files changed, 1012 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 4283224249d4..35eef02b3f09 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -290,8 +290,12 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   				break;
>   		}
>   		list_add(&rq->signal_link, pos);
> -		if (pos == &ce->signals) /* catch transitions from empty list */
> +		if (pos == &ce->signals) { /* catch transitions from empty */
>   			list_move_tail(&ce->signal_link, &b->signalers);
> +		} else if (ce->engine != rq->engine) { /* virtualised */
> +			list_move_tail(&ce->signal_link, &b->signalers);
> +			intel_engine_queue_breadcrumbs(rq->engine);
> +		}
>   
>   		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   		spin_unlock(&b->irq_lock);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 9d64e33f8427..b07d3685e2cb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -227,6 +227,7 @@ struct intel_engine_execlists {
>   	 * @queue: queue of requests, in priority lists
>   	 */
>   	struct rb_root_cached queue;
> +	struct rb_root_cached virtual;
>   
>   	/**
>   	 * @csb_write: control register for Context Switch buffer
> @@ -445,6 +446,7 @@ struct intel_engine_cs {
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
>   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
>   #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
> +#define I915_ENGINE_IS_VIRTUAL       BIT(5)
>   	unsigned int flags;
>   
>   	/*
> @@ -534,6 +536,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
>   }
>   
> +static inline bool
> +intel_engine_is_virtual(const struct intel_engine_cs *engine)
> +{
> +	return engine->flags & I915_ENGINE_IS_VIRTUAL;
> +}
> +
>   #define instdone_slice_mask(dev_priv__) \
>   	(IS_GEN(dev_priv__, 7) ? \
>   	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 2e74c792104e..e8faf3417f9c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -136,6 +136,7 @@
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
>   #include "i915_vgpu.h"
> +#include "intel_engine_pm.h"
>   #include "intel_lrc_reg.h"
>   #include "intel_mocs.h"
>   #include "intel_reset.h"
> @@ -165,6 +166,41 @@
>   
>   #define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
>   
> +struct virtual_engine {
> +	struct intel_engine_cs base;
> +	struct intel_context context;
> +
> +	/*
> +	 * We allow only a single request through the virtual engine at a time
> +	 * (each request in the timeline waits for the completion fence of
> +	 * the previous before being submitted). By restricting ourselves to
> +	 * only submitting a single request, each request is placed on to a
> +	 * physical to maximise load spreading (by virtue of the late greedy
> +	 * scheduling -- each real engine takes the next available request
> +	 * upon idling).
> +	 */
> +	struct i915_request *request;
> +
> +	/*
> +	 * We keep a rbtree of available virtual engines inside each physical
> +	 * engine, sorted by priority. Here we preallocate the nodes we need
> +	 * for the virtual engine, indexed by physical_engine->id.
> +	 */
> +	struct ve_node {
> +		struct rb_node rb;
> +		int prio;
> +	} nodes[I915_NUM_ENGINES];
> +
> +	/* And finally, which physical engines this virtual engine maps onto. */
> +	unsigned int num_siblings;
> +	struct intel_engine_cs *siblings[0];
> +};
> +
> +static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
> +{

Would GEM_BUG_ON(!(engine->flags & ..VIRTUAL..)) be too much?

Regards,

Tvrtko

> +	return container_of(engine, struct virtual_engine, base);
> +}
> +
>   static int execlists_context_deferred_alloc(struct intel_context *ce,
>   					    struct intel_engine_cs *engine);
>   static void execlists_init_reg_state(u32 *reg_state,
> @@ -228,7 +264,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
>   }
>   
>   static inline bool need_preempt(const struct intel_engine_cs *engine,
> -				const struct i915_request *rq)
> +				const struct i915_request *rq,
> +				struct rb_node *rb)
>   {
>   	int last_prio;
>   
> @@ -263,6 +300,25 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>   	    rq_prio(list_next_entry(rq, link)) > last_prio)
>   		return true;
>   
> +	if (rb) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		bool preempt = false;
> +
> +		if (engine == ve->siblings[0]) { /* only preempt one sibling */
> +			struct i915_request *next;
> +
> +			rcu_read_lock();
> +			next = READ_ONCE(ve->request);
> +			if (next)
> +				preempt = rq_prio(next) > last_prio;
> +			rcu_read_unlock();
> +		}
> +
> +		if (preempt)
> +			return preempt;
> +	}
> +
>   	/*
>   	 * If the inflight context did not trigger the preemption, then maybe
>   	 * it was the set of queued requests? Pick the highest priority in
> @@ -381,6 +437,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   	list_for_each_entry_safe_reverse(rq, rn,
>   					 &engine->timeline.requests,
>   					 link) {
> +		struct intel_engine_cs *owner;
> +
>   		if (i915_request_completed(rq))
>   			break;
>   
> @@ -389,16 +447,32 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   
>   		GEM_BUG_ON(rq->hw_context->active);
>   
> -		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> -		if (rq_prio(rq) != prio) {
> -			prio = rq_prio(rq);
> -			pl = i915_sched_lookup_priolist(engine, prio);
> -		}
> -		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> +		/*
> +		 * Push the request back into the queue for later resubmission.
> +		 * If this request is not native to this physical engine (i.e.
> +		 * it came from a virtual source), push it back onto the virtual
> +		 * engine so that it can be moved across onto another physical
> +		 * engine as load dictates.
> +		 */
> +		owner = rq->hw_context->engine;
> +		if (likely(owner == engine)) {
> +			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> +			if (rq_prio(rq) != prio) {
> +				prio = rq_prio(rq);
> +				pl = i915_sched_lookup_priolist(engine, prio);
> +			}
> +			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
> -		list_add(&rq->sched.link, pl);
> +			list_add(&rq->sched.link, pl);
> +			active = rq;
> +		} else {
> +			if (__i915_request_has_started(rq))
> +				rq->sched.attr.priority |= ACTIVE_PRIORITY;
>   
> -		active = rq;
> +			rq->engine = owner;
> +			owner->submit_request(rq);
> +			active = NULL;
> +		}
>   	}
>   
>   	/*
> @@ -418,7 +492,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   	 * in the priority queue, but they will not gain immediate access to
>   	 * the GPU.
>   	 */
> -	if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
> +	if ( ~prio & ACTIVE_PRIORITY &&
> +	    active && __i915_request_has_started(active)) {
>   		prio |= ACTIVE_PRIORITY;
>   		active->sched.attr.priority = prio;
>   		list_move_tail(&active->sched.link,
> @@ -658,6 +733,72 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
>   						  execlists));
>   }
>   
> +static void virtual_update_register_offsets(u32 *regs,
> +					    struct intel_engine_cs *engine)
> +{
> +	u32 base = engine->mmio_base;
> +
> +	regs[CTX_CONTEXT_CONTROL] =
> +		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
> +	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
> +	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
> +	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
> +	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
> +
> +	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
> +	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
> +	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
> +	regs[CTX_SECOND_BB_HEAD_U] =
> +		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
> +	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
> +	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
> +
> +	regs[CTX_CTX_TIMESTAMP] =
> +		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
> +	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
> +	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
> +	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
> +	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
> +	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
> +	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
> +	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
> +	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
> +
> +	if (engine->class == RENDER_CLASS) {
> +		regs[CTX_RCS_INDIRECT_CTX] =
> +			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
> +		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
> +			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
> +		regs[CTX_BB_PER_CTX_PTR] =
> +			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
> +
> +		regs[CTX_R_PWR_CLK_STATE] =
> +			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> +	}
> +}
> +
> +static bool virtual_matches(const struct virtual_engine *ve,
> +			    const struct i915_request *rq,
> +			    const struct intel_engine_cs *engine)
> +{
> +	const struct intel_engine_cs *active;
> +
> +	/*
> +	 * We track when the HW has completed saving the context image
> +	 * (i.e. when we have seen the final CS event switching out of
> +	 * the context) and must not overwrite the context image before
> +	 * then. This restricts us to only using the active engine
> +	 * while the previous virtualized request is inflight (so
> +	 * we reuse the register offsets). This is a very small
> +	 * hystersis on the greedy seelction algorithm.
> +	 */
> +	active = READ_ONCE(ve->context.active);
> +	if (active && active != engine)
> +		return false;
> +
> +	return true;
> +}
> +
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -690,6 +831,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * and context switches) submission.
>   	 */
>   
> +	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq = READ_ONCE(ve->request);
> +
> +		if (!rq) { /* lazily cleanup after another engine handled rq */
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +			rb = rb_first_cached(&execlists->virtual);
> +			continue;
> +		}
> +
> +		if (!virtual_matches(ve, rq, engine)) {
> +			rb = rb_next(rb);
> +			continue;
> +		}
> +
> +		break;
> +	}
> +
>   	if (last) {
>   		/*
>   		 * Don't resubmit or switch until all outstanding
> @@ -711,7 +872,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
>   			return;
>   
> -		if (need_preempt(engine, last)) {
> +		if (need_preempt(engine, last, rb)) {
>   			inject_preempt_context(engine);
>   			return;
>   		}
> @@ -751,6 +912,89 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		last->tail = last->wa_tail;
>   	}
>   
> +	while (rb) { /* XXX virtual is always taking precedence */
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq;
> +
> +		spin_lock(&ve->base.timeline.lock);
> +
> +		rq = ve->request;
> +		if (unlikely(!rq)) { /* lost the race to a sibling */
> +			spin_unlock(&ve->base.timeline.lock);
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +			rb = rb_first_cached(&execlists->virtual);
> +			continue;
> +		}
> +
> +		GEM_BUG_ON(rq != ve->request);
> +		GEM_BUG_ON(rq->engine != &ve->base);
> +		GEM_BUG_ON(rq->hw_context != &ve->context);
> +
> +		if (rq_prio(rq) >= queue_prio(execlists)) {
> +			if (!virtual_matches(ve, rq, engine)) {
> +				spin_unlock(&ve->base.timeline.lock);
> +				rb = rb_next(rb);
> +				continue;
> +			}
> +
> +			if (last && !can_merge_rq(last, rq)) {
> +				spin_unlock(&ve->base.timeline.lock);
> +				return; /* leave this rq for another engine */
> +			}
> +
> +			GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
> +				  engine->name,
> +				  rq->fence.context,
> +				  rq->fence.seqno,
> +				  i915_request_completed(rq) ? "!" :
> +				  i915_request_started(rq) ? "*" :
> +				  "",
> +				  yesno(engine != ve->siblings[0]));
> +
> +			ve->request = NULL;
> +			ve->base.execlists.queue_priority_hint = INT_MIN;
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +
> +			rq->engine = engine;
> +
> +			if (engine != ve->siblings[0]) {
> +				u32 *regs = ve->context.lrc_reg_state;
> +				unsigned int n;
> +
> +				GEM_BUG_ON(READ_ONCE(ve->context.active));
> +				virtual_update_register_offsets(regs, engine);
> +
> +				/*
> +				 * Move the bound engine to the top of the list
> +				 * for future execution. We then kick this
> +				 * tasklet first before checking others, so that
> +				 * we preferentially reuse this set of bound
> +				 * registers.
> +				 */
> +				for (n = 1; n < ve->num_siblings; n++) {
> +					if (ve->siblings[n] == engine) {
> +						swap(ve->siblings[n],
> +						     ve->siblings[0]);
> +						break;
> +					}
> +				}
> +
> +				GEM_BUG_ON(ve->siblings[0] != engine);
> +			}
> +
> +			__i915_request_submit(rq);
> +			trace_i915_request_in(rq, port_index(port, execlists));
> +			submit = true;
> +			last = rq;
> +		}
> +
> +		spin_unlock(&ve->base.timeline.lock);
> +		break;
> +	}
> +
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
> @@ -1874,6 +2118,25 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
>   			       &execlists->csb_status[reset_value]);
>   }
>   
> +static struct i915_request *active_request(struct i915_request *rq)
> +{
> +	const struct list_head * const list = &rq->engine->timeline.requests;
> +	const struct intel_context * const context = rq->hw_context;
> +	struct i915_request *active = NULL;
> +
> +	list_for_each_entry_from_reverse(rq, list, link) {
> +		if (i915_request_completed(rq))
> +			break;
> +
> +		if (rq->hw_context != context)
> +			break;
> +
> +		active = rq;
> +	}
> +
> +	return active;
> +}
> +
>   static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -1894,7 +2157,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	if (!port_isset(execlists->port))
>   		goto out_clear;
>   
> -	ce = port_request(execlists->port)->hw_context;
> +	rq = port_request(execlists->port);
> +	ce = rq->hw_context;
>   
>   	/*
>   	 * Catch up with any missed context-switch interrupts.
> @@ -1907,16 +2171,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	 */
>   	execlists_cancel_port_requests(execlists);
>   
> -	/* Push back any incomplete requests for replay after the reset. */
> -	rq = __unwind_incomplete_requests(engine);
> +	rq = active_request(rq);
>   	if (!rq)
>   		goto out_replay;
>   
> -	if (rq->hw_context != ce) { /* caught just before a CS event */
> -		rq = NULL;
> -		goto out_replay;
> -	}
> -
>   	/*
>   	 * If this request hasn't started yet, e.g. it is waiting on a
>   	 * semaphore, we need to avoid skipping the request or else we
> @@ -1963,13 +2221,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   	}
>   	execlists_init_reg_state(regs, ce, engine, ce->ring);
>   
> -	/* Rerun the request; its payload has been neutered (if guilty). */
>   out_replay:
> +	/* Rerun the request; its payload has been neutered (if guilty). */
>   	ce->ring->head =
>   		rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
>   	intel_ring_update_space(ce->ring);
>   	__execlists_update_reg_state(ce, engine);
>   
> +	/* Push back any incomplete requests for replay after the reset. */
> +	__unwind_incomplete_requests(engine);
> +
>   out_clear:
>   	execlists_clear_all_active(execlists);
>   }
> @@ -2043,6 +2304,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   		i915_priolist_free(p);
>   	}
>   
> +	/* Cancel all attached virtual engines */
> +	while ((rb = rb_first_cached(&execlists->virtual))) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +
> +		rb_erase_cached(rb, &execlists->virtual);
> +		RB_CLEAR_NODE(rb);
> +
> +		spin_lock(&ve->base.timeline.lock);
> +		if (ve->request) {
> +			ve->request->engine = engine;
> +			__i915_request_submit(ve->request);
> +			dma_fence_set_error(&ve->request->fence, -EIO);
> +			i915_request_mark_complete(ve->request);
> +			ve->base.execlists.queue_priority_hint = INT_MIN;
> +			ve->request = NULL;
> +		}
> +		spin_unlock(&ve->base.timeline.lock);
> +	}
> +
>   	/* Remaining _unready_ requests will be nop'ed when submitted */
>   
>   	execlists->queue_priority_hint = INT_MIN;
> @@ -2760,6 +3041,318 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
>   	return ret;
>   }
>   
> +static void virtual_context_destroy(struct kref *kref)
> +{
> +	struct virtual_engine *ve =
> +		container_of(kref, typeof(*ve), context.ref);
> +	unsigned int n;
> +
> +	GEM_BUG_ON(ve->request);
> +	GEM_BUG_ON(ve->context.active);
> +
> +	for (n = 0; n < ve->num_siblings; n++) {
> +		struct intel_engine_cs *sibling = ve->siblings[n];
> +		struct rb_node *node = &ve->nodes[sibling->id].rb;
> +
> +		if (RB_EMPTY_NODE(node))
> +			continue;
> +
> +		spin_lock_irq(&sibling->timeline.lock);
> +
> +		if (!RB_EMPTY_NODE(node))
> +			rb_erase_cached(node, &sibling->execlists.virtual);
> +
> +		spin_unlock_irq(&sibling->timeline.lock);
> +	}
> +	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
> +
> +	if (ve->context.state)
> +		__execlists_context_fini(&ve->context);
> +
> +	i915_timeline_fini(&ve->base.timeline);
> +	kfree(ve);
> +}
> +
> +static void virtual_engine_initial_hint(struct virtual_engine *ve)
> +{
> +	int swp;
> +
> +	/*
> +	 * Pick a random sibling on starting to help spread the load around.
> +	 *
> +	 * New contexts are typically created with exactly the same order
> +	 * of siblings, and often started in batches. Due to the way we iterate
> +	 * the array of sibling when submitting requests, sibling[0] is
> +	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
> +	 * randomised across the system, we also help spread the load by the
> +	 * first engine we inspect being different each time.
> +	 *
> +	 * NB This does not force us to execute on this engine, it will just
> +	 * typically be the first we inspect for submission.
> +	 */
> +	swp = prandom_u32_max(ve->num_siblings);
> +	if (!swp)
> +		return;
> +
> +	swap(ve->siblings[swp], ve->siblings[0]);
> +	virtual_update_register_offsets(ve->context.lrc_reg_state,
> +					ve->siblings[0]);
> +}
> +
> +static int virtual_context_pin(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +	int err;
> +
> +	/* Note: we must use a real engine class for setting up reg state */
> +	err = __execlists_context_pin(ce, ve->siblings[0]);
> +	if (err)
> +		return err;
> +
> +	virtual_engine_initial_hint(ve);
> +	return 0;
> +}
> +
> +static void virtual_context_enter(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +	unsigned int n;
> +
> +	for (n = 0; n < ve->num_siblings; n++)
> +		intel_engine_pm_get(ve->siblings[n]);
> +}
> +
> +static void virtual_context_exit(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +	unsigned int n;
> +
> +	for (n = 0; n < ve->num_siblings; n++)
> +		intel_engine_pm_put(ve->siblings[n]);
> +}
> +
> +static const struct intel_context_ops virtual_context_ops = {
> +	.pin = virtual_context_pin,
> +	.unpin = execlists_context_unpin,
> +
> +	.enter = virtual_context_enter,
> +	.exit = virtual_context_exit,
> +
> +	.destroy = virtual_context_destroy,
> +};
> +
> +static void virtual_submission_tasklet(unsigned long data)
> +{
> +	struct virtual_engine * const ve = (struct virtual_engine *)data;
> +	const int prio = ve->base.execlists.queue_priority_hint;
> +	unsigned int n;
> +
> +	local_irq_disable();
> +	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
> +		struct intel_engine_cs *sibling = ve->siblings[n];
> +		struct ve_node * const node = &ve->nodes[sibling->id];
> +		struct rb_node **parent, *rb;
> +		bool first;
> +
> +		spin_lock(&sibling->timeline.lock);
> +
> +		if (!RB_EMPTY_NODE(&node->rb)) {
> +			/*
> +			 * Cheat and avoid rebalancing the tree if we can
> +			 * reuse this node in situ.
> +			 */
> +			first = rb_first_cached(&sibling->execlists.virtual) ==
> +				&node->rb;
> +			if (prio == node->prio || (prio > node->prio && first))
> +				goto submit_engine;
> +
> +			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
> +		}
> +
> +		rb = NULL;
> +		first = true;
> +		parent = &sibling->execlists.virtual.rb_root.rb_node;
> +		while (*parent) {
> +			struct ve_node *other;
> +
> +			rb = *parent;
> +			other = rb_entry(rb, typeof(*other), rb);
> +			if (prio > other->prio) {
> +				parent = &rb->rb_left;
> +			} else {
> +				parent = &rb->rb_right;
> +				first = false;
> +			}
> +		}
> +
> +		rb_link_node(&node->rb, rb, parent);
> +		rb_insert_color_cached(&node->rb,
> +				       &sibling->execlists.virtual,
> +				       first);
> +
> +submit_engine:
> +		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
> +		node->prio = prio;
> +		if (first && prio > sibling->execlists.queue_priority_hint) {
> +			sibling->execlists.queue_priority_hint = prio;
> +			tasklet_hi_schedule(&sibling->execlists.tasklet);
> +		}
> +
> +		spin_unlock(&sibling->timeline.lock);
> +	}
> +	local_irq_enable();
> +}
> +
> +static void virtual_submit_request(struct i915_request *rq)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(rq->engine);
> +
> +	GEM_TRACE("%s: rq=%llx:%lld\n",
> +		  ve->base.name,
> +		  rq->fence.context,
> +		  rq->fence.seqno);
> +
> +	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
> +
> +	GEM_BUG_ON(ve->request);
> +	ve->base.execlists.queue_priority_hint = rq_prio(rq);
> +	WRITE_ONCE(ve->request, rq);
> +
> +	tasklet_schedule(&ve->base.execlists.tasklet);
> +}
> +
> +struct intel_context *
> +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int count)
> +{
> +	struct virtual_engine *ve;
> +	unsigned int n;
> +	int err;
> +
> +	if (count == 0)
> +		return ERR_PTR(-EINVAL);
> +
> +	if (count == 1)
> +		return intel_context_create(ctx, siblings[0]);
> +
> +	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
> +	if (!ve)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ve->base.i915 = ctx->i915;
> +	ve->base.id = -1;
> +	ve->base.class = OTHER_CLASS;
> +	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
> +	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
> +	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
> +
> +	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
> +
> +	err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
> +	if (err)
> +		goto err_put;
> +	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
> +
> +	intel_engine_init_execlists(&ve->base);
> +
> +	ve->base.cops = &virtual_context_ops;
> +	ve->base.request_alloc = execlists_request_alloc;
> +
> +	ve->base.schedule = i915_schedule;
> +	ve->base.submit_request = virtual_submit_request;
> +
> +	ve->base.execlists.queue_priority_hint = INT_MIN;
> +	tasklet_init(&ve->base.execlists.tasklet,
> +		     virtual_submission_tasklet,
> +		     (unsigned long)ve);
> +
> +	intel_context_init(&ve->context, ctx, &ve->base);
> +
> +	for (n = 0; n < count; n++) {
> +		struct intel_engine_cs *sibling = siblings[n];
> +
> +		GEM_BUG_ON(!is_power_of_2(sibling->mask));
> +		if (sibling->mask & ve->base.mask) {
> +			DRM_DEBUG("duplicate %s entry in load balancer\n",
> +				  sibling->name);
> +			err = -EINVAL;
> +			goto err_put;
> +		}
> +
> +		/*
> +		 * The virtual engine implementation is tightly coupled to
> +		 * the execlists backend -- we push out request directly
> +		 * into a tree inside each physical engine. We could support
> +		 * layering if we handle cloning of the requests and
> +		 * submitting a copy into each backend.
> +		 */
> +		if (sibling->execlists.tasklet.func !=
> +		    execlists_submission_tasklet) {
> +			err = -ENODEV;
> +			goto err_put;
> +		}
> +
> +		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
> +		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
> +
> +		ve->siblings[ve->num_siblings++] = sibling;
> +		ve->base.mask |= sibling->mask;
> +
> +		/*
> +		 * All physical engines must be compatible for their emission
> +		 * functions (as we build the instructions during request
> +		 * construction and do not alter them before submission
> +		 * on the physical engine). We use the engine class as a guide
> +		 * here, although that could be refined.
> +		 */
> +		if (ve->base.class != OTHER_CLASS) {
> +			if (ve->base.class != sibling->class) {
> +				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> +					  sibling->class, ve->base.class);
> +				err = -EINVAL;
> +				goto err_put;
> +			}
> +			continue;
> +		}
> +
> +		ve->base.class = sibling->class;
> +		ve->base.uabi_class = sibling->uabi_class;
> +		snprintf(ve->base.name, sizeof(ve->base.name),
> +			 "v%dx%d", ve->base.class, count);
> +		ve->base.context_size = sibling->context_size;
> +
> +		ve->base.emit_bb_start = sibling->emit_bb_start;
> +		ve->base.emit_flush = sibling->emit_flush;
> +		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
> +		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
> +		ve->base.emit_fini_breadcrumb_dw =
> +			sibling->emit_fini_breadcrumb_dw;
> +	}
> +
> +	return &ve->context;
> +
> +err_put:
> +	intel_context_put(&ve->context);
> +	return ERR_PTR(err);
> +}
> +
> +struct intel_context *
> +intel_execlists_clone_virtual(struct i915_gem_context *ctx,
> +			      struct intel_engine_cs *src)
> +{
> +	struct virtual_engine *se = to_virtual_engine(src);
> +	struct intel_context *dst;
> +
> +	dst = intel_execlists_create_virtual(ctx,
> +					     se->siblings,
> +					     se->num_siblings);
> +	if (IS_ERR(dst))
> +		return dst;
> +
> +	return dst;
> +}
> +
>   void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   				   struct drm_printer *m,
>   				   void (*show_request)(struct drm_printer *m,
> @@ -2817,6 +3410,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   		show_request(m, last, "\t\tQ ");
>   	}
>   
> +	last = NULL;
> +	count = 0;
> +	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq = READ_ONCE(ve->request);
> +
> +		if (rq) {
> +			if (count++ < max - 1)
> +				show_request(m, rq, "\t\tV ");
> +			else
> +				last = rq;
> +		}
> +	}
> +	if (last) {
> +		if (count > max) {
> +			drm_printf(m,
> +				   "\t\t...skipping %d virtual requests...\n",
> +				   count - max);
> +		}
> +		show_request(m, last, "\t\tV ");
> +	}
> +
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index a0dc907a7249..5530606052e5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -114,4 +114,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   							const char *prefix),
>   				   unsigned int max);
>   
> +struct intel_context *
> +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int count);
> +
> +struct intel_context *
> +intel_execlists_clone_virtual(struct i915_gem_context *ctx,
> +			      struct intel_engine_cs *src);
> +
>   #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 6f223f7facde..558399e5c7bb 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1301,6 +1301,185 @@ static int live_preempt_smoke(void *arg)
>   	return err;
>   }
>   
> +static int nop_virtual_engine(struct drm_i915_private *i915,
> +			      struct intel_engine_cs **siblings,
> +			      unsigned int nsibling,
> +			      unsigned int nctx,
> +			      unsigned int flags)
> +#define CHAIN BIT(0)
> +{
> +	IGT_TIMEOUT(end_time);
> +	struct i915_request *request[16];
> +	struct i915_gem_context *ctx[16];
> +	struct intel_context *ve[16];
> +	unsigned long n, prime, nc;
> +	struct igt_live_test t;
> +	ktime_t times[2] = {};
> +	int err;
> +
> +	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
> +
> +	for (n = 0; n < nctx; n++) {
> +		ctx[n] = kernel_context(i915);
> +		if (!ctx[n]) {
> +			err = -ENOMEM;
> +			nctx = n;
> +			goto out;
> +		}
> +
> +		ve[n] = intel_execlists_create_virtual(ctx[n],
> +						       siblings, nsibling);
> +		if (IS_ERR(ve[n])) {
> +			kernel_context_close(ctx[n]);
> +			err = PTR_ERR(ve[n]);
> +			nctx = n;
> +			goto out;
> +		}
> +
> +		err = intel_context_pin(ve[n]);
> +		if (err) {
> +			intel_context_put(ve[n]);
> +			kernel_context_close(ctx[n]);
> +			nctx = n;
> +			goto out;
> +		}
> +	}
> +
> +	err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
> +	if (err)
> +		goto out;
> +
> +	for_each_prime_number_from(prime, 1, 8192) {
> +		times[1] = ktime_get_raw();
> +
> +		if (flags & CHAIN) {
> +			for (nc = 0; nc < nctx; nc++) {
> +				for (n = 0; n < prime; n++) {
> +					request[nc] =
> +						i915_request_create(ve[nc]);
> +					if (IS_ERR(request[nc])) {
> +						err = PTR_ERR(request[nc]);
> +						goto out;
> +					}
> +
> +					i915_request_add(request[nc]);
> +				}
> +			}
> +		} else {
> +			for (n = 0; n < prime; n++) {
> +				for (nc = 0; nc < nctx; nc++) {
> +					request[nc] =
> +						i915_request_create(ve[nc]);
> +					if (IS_ERR(request[nc])) {
> +						err = PTR_ERR(request[nc]);
> +						goto out;
> +					}
> +
> +					i915_request_add(request[nc]);
> +				}
> +			}
> +		}
> +
> +		for (nc = 0; nc < nctx; nc++) {
> +			if (i915_request_wait(request[nc],
> +					      I915_WAIT_LOCKED,
> +					      HZ / 10) < 0) {
> +				pr_err("%s(%s): wait for %llx:%lld timed out\n",
> +				       __func__, ve[0]->engine->name,
> +				       request[nc]->fence.context,
> +				       request[nc]->fence.seqno);
> +
> +				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
> +					  __func__, ve[0]->engine->name,
> +					  request[nc]->fence.context,
> +					  request[nc]->fence.seqno);
> +				GEM_TRACE_DUMP();
> +				i915_gem_set_wedged(i915);
> +				break;
> +			}
> +		}
> +
> +		times[1] = ktime_sub(ktime_get_raw(), times[1]);
> +		if (prime == 1)
> +			times[0] = times[1];
> +
> +		if (__igt_timeout(end_time, NULL))
> +			break;
> +	}
> +
> +	err = igt_live_test_end(&t);
> +	if (err)
> +		goto out;
> +
> +	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
> +		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
> +		prime, div64_u64(ktime_to_ns(times[1]), prime));
> +
> +out:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	for (nc = 0; nc < nctx; nc++) {
> +		intel_context_unpin(ve[nc]);
> +		intel_context_put(ve[nc]);
> +		kernel_context_close(ctx[nc]);
> +	}
> +	return err;
> +}
> +
> +static int live_virtual_engine(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	unsigned int class, inst;
> +	int err = -ENODEV;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for_each_engine(engine, i915, id) {
> +		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
> +		if (err) {
> +			pr_err("Failed to wrap engine %s: err=%d\n",
> +			       engine->name, err);
> +			goto out_unlock;
> +		}
> +	}
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		int nsibling, n;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		for (n = 1; n <= nsibling + 1; n++) {
> +			err = nop_virtual_engine(i915, siblings, nsibling,
> +						 n, 0);
> +			if (err)
> +				goto out_unlock;
> +		}
> +
> +		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
> +		if (err)
> +			goto out_unlock;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1313,6 +1492,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_chain_preempt),
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
> +		SUBTEST(live_virtual_engine),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 67f8a4a807a0..fe82d3571072 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -91,4 +91,9 @@ static inline bool __tasklet_enable(struct tasklet_struct *t)
>   	return atomic_dec_and_test(&t->count);
>   }
>   
> +static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
> +{
> +	return test_bit(TASKLET_STATE_SCHED, &t->state);
> +}
> +
>   #endif /* __I915_GEM_H__ */
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index ba7582d955d1..57b09f624bb4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -86,6 +86,7 @@
>    */
>   
>   #include <linux/log2.h>
> +#include <linux/nospec.h>
>   
>   #include <drm/i915_drm.h>
>   
> @@ -1209,7 +1210,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
>   	int ret;
>   
>   	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
> -	GEM_BUG_ON(ce->engine->id != RCS0);
>   
>   	ret = intel_context_lock_pinned(ce);
>   	if (ret)
> @@ -1397,7 +1397,102 @@ struct set_engines {
>   	struct i915_gem_engines *engines;
>   };
>   
> +static int
> +set_engines__load_balance(struct i915_user_extension __user *base, void *data)
> +{
> +	struct i915_context_engines_load_balance __user *ext =
> +		container_of_user(base, typeof(*ext), base);
> +	const struct set_engines *set = data;
> +	struct intel_engine_cs *stack[16];
> +	struct intel_engine_cs **siblings;
> +	struct intel_context *ce;
> +	u16 num_siblings, idx;
> +	unsigned int n;
> +	int err;
> +
> +	if (!HAS_EXECLISTS(set->ctx->i915))
> +		return -ENODEV;
> +
> +	if (USES_GUC_SUBMISSION(set->ctx->i915))
> +		return -ENODEV; /* not implement yet */
> +
> +	if (get_user(idx, &ext->engine_index))
> +		return -EFAULT;
> +
> +	if (idx >= set->engines->num_engines) {
> +		DRM_DEBUG("Invalid placement value, %d >= %d\n",
> +			  idx, set->engines->num_engines);
> +		return -EINVAL;
> +	}
> +
> +	idx = array_index_nospec(idx, set->engines->num_engines);
> +	if (set->engines->engines[idx]) {
> +		DRM_DEBUG("Invalid placement[%d], already occupied\n", idx);
> +		return -EEXIST;
> +	}
> +
> +	if (get_user(num_siblings, &ext->num_siblings))
> +		return -EFAULT;
> +
> +	err = check_user_mbz(&ext->flags);
> +	if (err)
> +		return err;
> +
> +	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> +		err = check_user_mbz(&ext->mbz64[n]);
> +		if (err)
> +			return err;
> +	}
> +
> +	siblings = stack;
> +	if (num_siblings > ARRAY_SIZE(stack)) {
> +		siblings = kmalloc_array(num_siblings,
> +					 sizeof(*siblings),
> +					 GFP_KERNEL);
> +		if (!siblings)
> +			return -ENOMEM;
> +	}
> +
> +	for (n = 0; n < num_siblings; n++) {
> +		struct i915_engine_class_instance ci;
> +
> +		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
> +			err = -EFAULT;
> +			goto out_siblings;
> +		}
> +
> +		siblings[n] = intel_engine_lookup_user(set->ctx->i915,
> +						       ci.engine_class,
> +						       ci.engine_instance);
> +		if (!siblings[n]) {
> +			DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n",
> +				  n, ci.engine_class, ci.engine_instance);
> +			err = -EINVAL;
> +			goto out_siblings;
> +		}
> +	}
> +
> +	ce = intel_execlists_create_virtual(set->ctx, siblings, n);
> +	if (IS_ERR(ce)) {
> +		err = PTR_ERR(ce);
> +		goto out_siblings;
> +	}
> +
> +	if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
> +		intel_context_put(ce);
> +		err = -EEXIST;
> +		goto out_siblings;
> +	}
> +
> +out_siblings:
> +	if (siblings != stack)
> +		kfree(siblings);
> +
> +	return err;
> +}
> +
>   static const i915_user_extension_fn set_engines__extensions[] = {
> +	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
>   };
>   
>   static int
> @@ -1696,14 +1791,29 @@ static int clone_engines(struct i915_gem_context *dst,
>   
>   	clone->i915 = dst->i915;
>   	for (n = 0; n < e->num_engines; n++) {
> +		struct intel_engine_cs *engine;
> +
>   		if (!e->engines[n]) {
>   			clone->engines[n] = NULL;
>   			continue;
>   		}
> +		engine = e->engines[n]->engine;
>   
> -		clone->engines[n] =
> -			intel_context_create(dst, e->engines[n]->engine);
> -		if (!clone->engines[n]) {
> +		/*
> +		 * Virtual engines are singletons; they can only exist
> +		 * inside a single context, because they embed their
> +		 * HW context... As each virtual context implies a single
> +		 * timeline (each engine can only dequeue a single request
> +		 * at any time), it would be surprising for two contexts
> +		 * to use the same engine. So let's create a copy of
> +		 * the virtual engine instead.
> +		 */
> +		if (intel_engine_is_virtual(engine))
> +			clone->engines[n] =
> +				intel_execlists_clone_virtual(dst, engine);
> +		else
> +			clone->engines[n] = intel_context_create(dst, engine);
> +		if (IS_ERR_OR_NULL(clone->engines[n])) {
>   			__free_engines(clone, n);
>   			goto err_unlock;
>   		}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 39bc4f54e272..b58d9c23a876 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -248,17 +248,26 @@ sched_lock_engine(const struct i915_sched_node *node,
>   		  struct intel_engine_cs *locked,
>   		  struct sched_cache *cache)
>   {
> -	struct intel_engine_cs *engine = node_to_request(node)->engine;
> +	const struct i915_request *rq = node_to_request(node);
> +	struct intel_engine_cs *engine;
>   
>   	GEM_BUG_ON(!locked);
>   
> -	if (engine != locked) {
> +	/*
> +	 * Virtual engines complicate acquiring the engine timeline lock,
> +	 * as their rq->engine pointer is not stable until under that
> +	 * engine lock. The simple ploy we use is to take the lock then
> +	 * check that the rq still belongs to the newly locked engine.
> +	 */
> +	while (locked != (engine = READ_ONCE(rq->engine))) {
>   		spin_unlock(&locked->timeline.lock);
>   		memset(cache, 0, sizeof(*cache));
>   		spin_lock(&engine->timeline.lock);
> +		locked = engine;
>   	}
>   
> -	return engine;
> +	GEM_BUG_ON(locked != engine);
> +	return locked;
>   }
>   
>   static bool inflight(const struct i915_request *rq,
> @@ -371,8 +380,11 @@ static void __i915_schedule(struct i915_request *rq,
>   		if (prio <= node->attr.priority || node_signaled(node))
>   			continue;
>   
> +		GEM_BUG_ON(node_to_request(node)->engine != engine);
> +
>   		node->attr.priority = prio;
>   		if (!list_empty(&node->link)) {
> +			GEM_BUG_ON(intel_engine_is_virtual(engine));
>   			if (!cache.priolist)
>   				cache.priolist =
>   					i915_sched_lookup_priolist(engine,
> diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
> index 5256a0b5c5f7..1688705f4a2b 100644
> --- a/drivers/gpu/drm/i915/i915_timeline_types.h
> +++ b/drivers/gpu/drm/i915/i915_timeline_types.h
> @@ -26,6 +26,7 @@ struct i915_timeline {
>   	spinlock_t lock;
>   #define TIMELINE_CLIENT 0 /* default subclass */
>   #define TIMELINE_ENGINE 1
> +#define TIMELINE_VIRTUAL 2
>   	struct mutex mutex; /* protects the flow of requests */
>   
>   	unsigned int pin_count;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7694113362d4..ff2ababc0984 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -137,6 +137,7 @@ struct i915_engine_class_instance {
>   	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
>   	__u16 engine_instance;
>   #define I915_ENGINE_CLASS_INVALID_NONE -1
> +#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
>   };
>   
>   /**
> @@ -1607,8 +1608,46 @@ struct drm_i915_gem_context_param_sseu {
>   	__u32 rsvd;
>   };
>   
> +/*
> + * i915_context_engines_load_balance:
> + *
> + * Enable load balancing across this set of engines.
> + *
> + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
> + * used will proxy the execbuffer request onto one of the set of engines
> + * in such a way as to distribute the load evenly across the set.
> + *
> + * The set of engines must be compatible (e.g. the same HW class) as they
> + * will share the same logical GPU context and ring.
> + *
> + * To intermix rendering with the virtual engine and direct rendering onto
> + * the backing engines (bypassing the load balancing proxy), the context must
> + * be defined to use a single timeline for all engines.
> + */
> +struct i915_context_engines_load_balance {
> +	struct i915_user_extension base;
> +
> +	__u16 engine_index;
> +	__u16 num_siblings;
> +	__u32 flags; /* all undefined flags must be zero */
> +
> +	__u64 mbz64[1]; /* reserved for future use; must be zero */
> +
> +	struct i915_engine_class_instance engines[0];
> +} __attribute__((packed));
> +
> +#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
> +	struct i915_user_extension base; \
> +	__u16 engine_index; \
> +	__u16 num_siblings; \
> +	__u32 flags; \
> +	__u64 mbz64[1]; \
> +	struct i915_engine_class_instance engines[N__]; \
> +} __attribute__((packed)) name__
> +
>   struct i915_context_param_engines {
>   	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
> +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
>   	struct i915_engine_class_instance engines[0];
>   } __attribute__((packed));
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-25  9:19 ` [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine Chris Wilson
@ 2019-04-29 14:12   ` Tvrtko Ursulin
  2019-05-07 16:59     ` Chris Wilson
  0 siblings, 1 reply; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-29 14:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/04/2019 10:19, Chris Wilson wrote:
> Allow the user to direct which physical engines of the virtual engine
> they wish to execute one, as sometimes it is necessary to override the
> load balancing algorithm.
> 
> v2: Only kick the virtual engines on context-out if required
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c    |  67 +++++++++++++
>   drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_request.c    |   1 +
>   drivers/gpu/drm/i915/i915_request.h    |   3 +
>   4 files changed, 202 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e8faf3417f9c..8acf9d1c6f2f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -549,6 +549,15 @@ execlists_context_schedule_in(struct i915_request *rq)
>   	rq->hw_context->active = rq->engine;
>   }
>   
> +static void kick_siblings(struct i915_request *rq)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
> +	struct i915_request *next = READ_ONCE(ve->request);
> +
> +	if (next && next->execution_mask & ~rq->execution_mask)
> +		tasklet_schedule(&ve->base.execlists.tasklet);
> +}
> +
>   static inline void
>   execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
>   {
> @@ -556,6 +565,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
>   	intel_engine_context_out(rq->engine);
>   	execlists_context_status_change(rq, status);
>   	trace_i915_request_out(rq);
> +
> +	/*
> +	 * If this is part of a virtual engine, its next request may have
> +	 * been blocked waiting for access to the active context. We have
> +	 * to kick all the siblings again in case we need to switch (e.g.
> +	 * the next request is not runnable on this engine). Hopefully,
> +	 * we will already have submitted the next request before the
> +	 * tasklet runs and do not need to rebuild each virtual tree
> +	 * and kick everyone again.
> +	 */
> +	if (rq->engine != rq->hw_context->engine)
> +		kick_siblings(rq);
>   }
>   
>   static u64 execlists_update_context(struct i915_request *rq)
> @@ -783,6 +804,9 @@ static bool virtual_matches(const struct virtual_engine *ve,
>   {
>   	const struct intel_engine_cs *active;
>   
> +	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
> +		return false;
> +
>   	/*
>   	 * We track when the HW has completed saving the context image
>   	 * (i.e. when we have seen the final CS event switching out of
> @@ -3141,12 +3165,44 @@ static const struct intel_context_ops virtual_context_ops = {
>   	.destroy = virtual_context_destroy,
>   };
>   
> +static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
> +{
> +	struct i915_request *rq;
> +	intel_engine_mask_t mask;
> +
> +	rq = READ_ONCE(ve->request);
> +	if (!rq)
> +		return 0;
> +
> +	/* The rq is ready for submission; rq->execution_mask is now stable. */
> +	mask = rq->execution_mask;
> +	if (unlikely(!mask)) {
> +		/* Invalid selection, submit to a random engine in error */
> +		i915_request_skip(rq, -ENODEV);
> +		mask = ve->siblings[0]->mask;
> +	}
> +
> +	GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
> +		  ve->base.name,
> +		  rq->fence.context, rq->fence.seqno,
> +		  mask, ve->base.execlists.queue_priority_hint);
> +
> +	return mask;
> +}
> +
>   static void virtual_submission_tasklet(unsigned long data)
>   {
>   	struct virtual_engine * const ve = (struct virtual_engine *)data;
>   	const int prio = ve->base.execlists.queue_priority_hint;
> +	intel_engine_mask_t mask;
>   	unsigned int n;
>   
> +	rcu_read_lock();
> +	mask = virtual_submission_mask(ve);
> +	rcu_read_unlock();
> +	if (unlikely(!mask))

Is the rcu_lock think solely for the same protection against wedging in 
submit_notify?

Regards,

Tvrtko

> +		return;
> +
>   	local_irq_disable();
>   	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
>   		struct intel_engine_cs *sibling = ve->siblings[n];
> @@ -3154,6 +3210,17 @@ static void virtual_submission_tasklet(unsigned long data)
>   		struct rb_node **parent, *rb;
>   		bool first;
>   
> +		if (unlikely(!(mask & sibling->mask))) {
> +			if (!RB_EMPTY_NODE(&node->rb)) {
> +				spin_lock(&sibling->timeline.lock);
> +				rb_erase_cached(&node->rb,
> +						&sibling->execlists.virtual);
> +				RB_CLEAR_NODE(&node->rb);
> +				spin_unlock(&sibling->timeline.lock);
> +			}
> +			continue;
> +		}
> +
>   		spin_lock(&sibling->timeline.lock);
>   
>   		if (!RB_EMPTY_NODE(&node->rb)) {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 558399e5c7bb..73308749936c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1480,6 +1480,136 @@ static int live_virtual_engine(void *arg)
>   	return err;
>   }
>   
> +static int mask_virtual_engine(struct drm_i915_private *i915,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int nsibling)
> +{
> +	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
> +	struct i915_gem_context *ctx;
> +	struct intel_context *ve;
> +	struct igt_live_test t;
> +	unsigned int n;
> +	int err;
> +
> +	/*
> +	 * Check that by setting the execution mask on a request, we can
> +	 * restrict it to our desired engine within the virtual engine.
> +	 */
> +
> +	ctx = kernel_context(i915);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
> +	if (IS_ERR(ve)) {
> +		err = PTR_ERR(ve);
> +		goto out_close;
> +	}
> +
> +	err = intel_context_pin(ve);
> +	if (err)
> +		goto out_put;
> +
> +	err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
> +	if (err)
> +		goto out_unpin;
> +
> +	for (n = 0; n < nsibling; n++) {
> +		request[n] = i915_request_create(ve);
> +		if (IS_ERR(request)) {
> +			err = PTR_ERR(request);
> +			nsibling = n;
> +			goto out;
> +		}
> +
> +		/* Reverse order as it's more likely to be unnatural */
> +		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
> +
> +		i915_request_get(request[n]);
> +		i915_request_add(request[n]);
> +	}
> +
> +	for (n = 0; n < nsibling; n++) {
> +		if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) {
> +			pr_err("%s(%s): wait for %llx:%lld timed out\n",
> +			       __func__, ve->engine->name,
> +			       request[n]->fence.context,
> +			       request[n]->fence.seqno);
> +
> +			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
> +				  __func__, ve->engine->name,
> +				  request[n]->fence.context,
> +				  request[n]->fence.seqno);
> +			GEM_TRACE_DUMP();
> +			i915_gem_set_wedged(i915);
> +			err = -EIO;
> +			goto out;
> +		}
> +
> +		if (request[n]->engine != siblings[nsibling - n - 1]) {
> +			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
> +			       request[n]->engine->name,
> +			       siblings[nsibling - n - 1]->name);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +	}
> +
> +	err = igt_live_test_end(&t);
> +	if (err)
> +		goto out;
> +
> +out:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	for (n = 0; n < nsibling; n++)
> +		i915_request_put(request[n]);
> +
> +out_unpin:
> +	intel_context_unpin(ve);
> +out_put:
> +	intel_context_put(ve);
> +out_close:
> +	kernel_context_close(ctx);
> +	return err;
> +}
> +
> +static int live_virtual_mask(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	unsigned int class, inst;
> +	int err = 0;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		unsigned int nsibling;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		err = mask_virtual_engine(i915, siblings, nsibling);
> +		if (err)
> +			goto out_unlock;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1493,6 +1623,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
>   		SUBTEST(live_virtual_engine),
> +		SUBTEST(live_virtual_mask),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index a5a2ccc23958..06c2b89d0b0a 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -688,6 +688,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   	rq->batch = NULL;
>   	rq->capture_list = NULL;
>   	rq->waitboost = false;
> +	rq->execution_mask = ALL_ENGINES;
>   
>   	INIT_LIST_HEAD(&rq->active_list);
>   	INIT_LIST_HEAD(&rq->execute_cb);
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 8025a89b5999..d7f9b2194568 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -28,6 +28,8 @@
>   #include <linux/dma-fence.h>
>   #include <linux/lockdep.h>
>   
> +#include "gt/intel_engine_types.h"
> +
>   #include "i915_gem.h"
>   #include "i915_scheduler.h"
>   #include "i915_selftest.h"
> @@ -156,6 +158,7 @@ struct i915_request {
>   	 */
>   	struct i915_sched_node sched;
>   	struct i915_dependency dep;
> +	intel_engine_mask_t execution_mask;
>   
>   	/*
>   	 * A convenience pointer to the current breadcrumb value stored in
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 22/45] drm/i915/execlists: Virtual engine bonding
  2019-04-25  9:19 ` [PATCH 22/45] drm/i915/execlists: Virtual engine bonding Chris Wilson
@ 2019-04-29 15:58   ` Tvrtko Ursulin
  0 siblings, 0 replies; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-04-29 15:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/04/2019 10:19, Chris Wilson wrote:
> Some users require that when a master batch is executed on one particular
> engine, a companion batch is run simultaneously on a specific slave
> engine. For this purpose, we introduce virtual engine bonding, allowing
> maps of master:slaves to be constructed to constrain which physical
> engines a virtual engine may select given a fence on a master engine.
> 
> For the moment, we continue to ignore the issue of preemption deferring
> the master request for later. Ideally, we would like to then also remove
> the slave and run something else rather than have it stall the pipeline.
> With load balancing, we should be able to move workload around it, but
> there is a similar stall on the master pipeline while it may wait for
> the slave to be executed. At the cost of more latency for the bonded
> request, it may be interesting to launch both on their engines in
> lockstep. (Bubbles abound.)
> 
> Opens: Also what about bonding an engine as its own master? It doesn't
> break anything internally, so allow the silliness.
> 
> v2: Emancipate the bonds
> v3: Couple in delayed scheduling for the selftests
> v4: Handle invalid mutually exclusive bonding
> v5: Mention what the uapi does
> v6: s/nbond/num_bonds/
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  98 +++++++++
>   drivers/gpu/drm/i915/gt/intel_lrc.h           |   4 +
>   drivers/gpu/drm/i915/gt/selftest_lrc.c        | 191 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.c       |  84 ++++++++
>   drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
>   include/uapi/drm/i915_drm.h                   |  44 ++++
>   7 files changed, 431 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index b07d3685e2cb..ca8d95a5708d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -405,6 +405,13 @@ struct intel_engine_cs {
>   	 */
>   	void		(*submit_request)(struct i915_request *rq);
>   
> +	/*
> +	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
> +	 * request down to the bonded pairs.
> +	 */
> +	void            (*bond_execute)(struct i915_request *rq,
> +					struct dma_fence *signal);
> +
>   	/*
>   	 * Call when the priority on a request has changed and it and its
>   	 * dependencies may need rescheduling. Note the request itself may
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 8acf9d1c6f2f..d793d976402d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -191,6 +191,18 @@ struct virtual_engine {
>   		int prio;
>   	} nodes[I915_NUM_ENGINES];
>   
> +	/*
> +	 * Keep track of bonded pairs -- restrictions upon on our selection
> +	 * of physical engines any particular request may be submitted to.
> +	 * If we receive a submit-fence from a master engine, we will only
> +	 * use one of sibling_mask physical engines.
> +	 */
> +	struct ve_bond {
> +		const struct intel_engine_cs *master;
> +		intel_engine_mask_t sibling_mask;
> +	} *bonds;
> +	unsigned int num_bonds;
> +
>   	/* And finally, which physical engines this virtual engine maps onto. */
>   	unsigned int num_siblings;
>   	struct intel_engine_cs *siblings[0];
> @@ -982,6 +994,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			rb_erase_cached(rb, &execlists->virtual);
>   			RB_CLEAR_NODE(rb);
>   
> +			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
>   			rq->engine = engine;
>   
>   			if (engine != ve->siblings[0]) {
> @@ -3093,6 +3106,8 @@ static void virtual_context_destroy(struct kref *kref)
>   	if (ve->context.state)
>   		__execlists_context_fini(&ve->context);
>   
> +	kfree(ve->bonds);
> +
>   	i915_timeline_fini(&ve->base.timeline);
>   	kfree(ve);
>   }
> @@ -3288,6 +3303,38 @@ static void virtual_submit_request(struct i915_request *rq)
>   	tasklet_schedule(&ve->base.execlists.tasklet);
>   }
>   
> +static struct ve_bond *
> +virtual_find_bond(struct virtual_engine *ve,
> +		  const struct intel_engine_cs *master)
> +{
> +	int i;
> +
> +	for (i = 0; i < ve->num_bonds; i++) {
> +		if (ve->bonds[i].master == master)
> +			return &ve->bonds[i];
> +	}
> +
> +	return NULL;
> +}
> +
> +static void
> +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(rq->engine);
> +	struct ve_bond *bond;
> +
> +	bond = virtual_find_bond(ve, to_request(signal)->engine);
> +	if (bond) {
> +		intel_engine_mask_t old, new, cmp;
> +
> +		cmp = READ_ONCE(rq->execution_mask);
> +		do {
> +			old = cmp;
> +			new = cmp & bond->sibling_mask;
> +		} while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
> +	}
> +}
> +
>   struct intel_context *
>   intel_execlists_create_virtual(struct i915_gem_context *ctx,
>   			       struct intel_engine_cs **siblings,
> @@ -3328,6 +3375,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>   
>   	ve->base.schedule = i915_schedule;
>   	ve->base.submit_request = virtual_submit_request;
> +	ve->base.bond_execute = virtual_bond_execute;
>   
>   	ve->base.execlists.queue_priority_hint = INT_MIN;
>   	tasklet_init(&ve->base.execlists.tasklet,
> @@ -3417,9 +3465,59 @@ intel_execlists_clone_virtual(struct i915_gem_context *ctx,
>   	if (IS_ERR(dst))
>   		return dst;
>   
> +	if (se->num_bonds) {
> +		struct virtual_engine *de = to_virtual_engine(dst->engine);
> +
> +		de->bonds = kmemdup(se->bonds,
> +				    sizeof(*se->bonds) * se->num_bonds,
> +				    GFP_KERNEL);
> +		if (!de->bonds) {
> +			intel_context_put(dst);
> +			return ERR_PTR(-ENOMEM);
> +		}
> +
> +		de->num_bonds = se->num_bonds;
> +	}
> +
>   	return dst;
>   }
>   
> +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> +				     const struct intel_engine_cs *master,
> +				     const struct intel_engine_cs *sibling)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(engine);
> +	struct ve_bond *bond;
> +	int n;
> +
> +	/* Sanity check the sibling is part of the virtual engine */
> +	for (n = 0; n < ve->num_siblings; n++)
> +		if (sibling == ve->siblings[n])
> +			break;
> +	if (n == ve->num_siblings)
> +		return -EINVAL;
> +
> +	bond = virtual_find_bond(ve, master);
> +	if (bond) {
> +		bond->sibling_mask |= sibling->mask;
> +		return 0;
> +	}
> +
> +	bond = krealloc(ve->bonds,
> +			sizeof(*bond) * (ve->num_bonds + 1),
> +			GFP_KERNEL);
> +	if (!bond)
> +		return -ENOMEM;
> +
> +	bond[ve->num_bonds].master = master;
> +	bond[ve->num_bonds].sibling_mask = sibling->mask;
> +
> +	ve->bonds = bond;
> +	ve->num_bonds++;
> +
> +	return 0;
> +}
> +
>   void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   				   struct drm_printer *m,
>   				   void (*show_request)(struct drm_printer *m,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index 5530606052e5..e029aee87adf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -123,4 +123,8 @@ struct intel_context *
>   intel_execlists_clone_virtual(struct i915_gem_context *ctx,
>   			      struct intel_engine_cs *src);
>   
> +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> +				     const struct intel_engine_cs *master,
> +				     const struct intel_engine_cs *sibling);
> +
>   #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 73308749936c..68d10038f3c9 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -13,6 +13,7 @@
>   #include "selftests/igt_gem_utils.h"
>   #include "selftests/igt_live_test.h"
>   #include "selftests/igt_spinner.h"
> +#include "selftests/lib_sw_fence.h"
>   #include "selftests/mock_context.h"
>   
>   static int live_sanitycheck(void *arg)
> @@ -1610,6 +1611,195 @@ static int live_virtual_mask(void *arg)
>   	return err;
>   }
>   
> +static int bond_virtual_engine(struct drm_i915_private *i915,
> +			       unsigned int class,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int nsibling,
> +			       unsigned int flags)
> +#define BOND_SCHEDULE BIT(0)
> +{
> +	struct intel_engine_cs *master;
> +	struct i915_gem_context *ctx;
> +	struct i915_request *rq[16];
> +	enum intel_engine_id id;
> +	unsigned long n;
> +	int err;
> +
> +	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
> +
> +	ctx = kernel_context(i915);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	err = 0;
> +	rq[0] = ERR_PTR(-ENOMEM);
> +	for_each_engine(master, i915, id) {
> +		struct i915_sw_fence fence = {};
> +
> +		if (master->class == class)
> +			continue;
> +
> +		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
> +
> +		rq[0] = igt_request_alloc(ctx, master);
> +		if (IS_ERR(rq[0])) {
> +			err = PTR_ERR(rq[0]);
> +			goto out;
> +		}
> +		i915_request_get(rq[0]);
> +
> +		if (flags & BOND_SCHEDULE) {
> +			onstack_fence_init(&fence);
> +			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
> +							       &fence,
> +							       GFP_KERNEL);
> +		}
> +		i915_request_add(rq[0]);
> +		if (err < 0)
> +			goto out;
> +
> +		for (n = 0; n < nsibling; n++) {
> +			struct intel_context *ve;
> +
> +			ve = intel_execlists_create_virtual(ctx,
> +							    siblings,
> +							    nsibling);
> +			if (IS_ERR(ve)) {
> +				err = PTR_ERR(ve);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			err = intel_virtual_engine_attach_bond(ve->engine,
> +							       master,
> +							       siblings[n]);
> +			if (err) {
> +				intel_context_put(ve);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			err = intel_context_pin(ve);
> +			intel_context_put(ve);
> +			if (err) {
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			rq[n + 1] = i915_request_create(ve);
> +			intel_context_unpin(ve);
> +			if (IS_ERR(rq[n + 1])) {
> +				err = PTR_ERR(rq[n + 1]);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +			i915_request_get(rq[n + 1]);
> +
> +			err = i915_request_await_execution(rq[n + 1],
> +							   &rq[0]->fence,
> +							   ve->engine->bond_execute);
> +			i915_request_add(rq[n + 1]);
> +			if (err < 0) {
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +		}
> +		onstack_fence_fini(&fence);
> +
> +		if (i915_request_wait(rq[0],
> +				      I915_WAIT_LOCKED,
> +				      HZ / 10) < 0) {
> +			pr_err("Master request did not execute (on %s)!\n",
> +			       rq[0]->engine->name);
> +			err = -EIO;
> +			goto out;
> +		}
> +
> +		for (n = 0; n < nsibling; n++) {
> +			if (i915_request_wait(rq[n + 1],
> +					      I915_WAIT_LOCKED,
> +					      MAX_SCHEDULE_TIMEOUT) < 0) {
> +				err = -EIO;
> +				goto out;
> +			}
> +
> +			if (rq[n + 1]->engine != siblings[n]) {
> +				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
> +				       siblings[n]->name,
> +				       rq[n + 1]->engine->name,
> +				       rq[0]->engine->name);
> +				err = -EINVAL;
> +				goto out;
> +			}
> +		}
> +
> +		for (n = 0; !IS_ERR(rq[n]); n++)
> +			i915_request_put(rq[n]);
> +		rq[0] = ERR_PTR(-ENOMEM);
> +	}
> +
> +out:
> +	for (n = 0; !IS_ERR(rq[n]); n++)
> +		i915_request_put(rq[n]);
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	kernel_context_close(ctx);
> +	return err;
> +}
> +
> +static int live_virtual_bond(void *arg)
> +{
> +	static const struct phase {
> +		const char *name;
> +		unsigned int flags;
> +	} phases[] = {
> +		{ "", 0 },
> +		{ "schedule", BOND_SCHEDULE },
> +		{ },
> +	};
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	unsigned int class, inst;
> +	int err = 0;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		const struct phase *p;
> +		int nsibling;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		for (p = phases; p->name; p++) {
> +			err = bond_virtual_engine(i915,
> +						  class, siblings, nsibling,
> +						  p->flags);
> +			if (err) {
> +				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
> +				       __func__, p->name, class, nsibling, err);
> +				goto out_unlock;
> +			}
> +		}
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1624,6 +1814,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt_smoke),
>   		SUBTEST(live_virtual_engine),
>   		SUBTEST(live_virtual_mask),
> +		SUBTEST(live_virtual_bond),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 57b09f624bb4..cbba8f4bd786 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1491,8 +1491,92 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
>   	return err;
>   }
>   
> +static int
> +set_engines__bond(struct i915_user_extension __user *base, void *data)
> +{
> +	struct i915_context_engines_bond __user *ext =
> +		container_of_user(base, typeof(*ext), base);
> +	const struct set_engines *set = data;
> +	struct i915_engine_class_instance ci;
> +	struct intel_engine_cs *virtual;
> +	struct intel_engine_cs *master;
> +	u16 idx, num_bonds;
> +	int err, n;
> +
> +	if (get_user(idx, &ext->virtual_index))
> +		return -EFAULT;
> +
> +	if (idx >= set->engines->num_engines) {
> +		DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
> +			  idx, set->engines->num_engines);
> +		return -EINVAL;
> +	}
> +
> +	idx = array_index_nospec(idx, set->engines->num_engines);
> +	if (!set->engines->engines[idx]) {
> +		DRM_DEBUG("Invalid engine at %d\n", idx);
> +		return -EINVAL;
> +	}
> +	virtual = set->engines->engines[idx]->engine;
> +
> +	err = check_user_mbz(&ext->flags);
> +	if (err)
> +		return err;
> +
> +	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> +		err = check_user_mbz(&ext->mbz64[n]);
> +		if (err)
> +			return err;
> +	}
> +
> +	if (copy_from_user(&ci, &ext->master, sizeof(ci)))
> +		return -EFAULT;
> +
> +	master = intel_engine_lookup_user(set->ctx->i915,
> +					  ci.engine_class, ci.engine_instance);
> +	if (!master) {
> +		DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n",
> +			  ci.engine_class, ci.engine_instance);
> +		return -EINVAL;
> +	}
> +
> +	if (get_user(num_bonds, &ext->num_bonds))
> +		return -EFAULT;
> +
> +	for (n = 0; n < num_bonds; n++) {
> +		struct intel_engine_cs *bond;
> +
> +		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci)))
> +			return -EFAULT;
> +
> +		bond = intel_engine_lookup_user(set->ctx->i915,
> +						ci.engine_class,
> +						ci.engine_instance);
> +		if (!bond) {
> +			DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n",
> +				  n, ci.engine_class, ci.engine_instance);
> +			return -EINVAL;
> +		}
> +
> +		/*
> +		 * A non-virtual engine has no siblings to choose between; and
> +		 * a submit fence will always be directed to the one engine.
> +		 */
> +		if (intel_engine_is_virtual(virtual)) {
> +			err = intel_virtual_engine_attach_bond(virtual,
> +							       master,
> +							       bond);
> +			if (err)
> +				return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static const i915_user_extension_fn set_engines__extensions[] = {
>   	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
> +	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
>   };
>   
>   static int
> diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> index 2bfa72c1654b..b976c12817c5 100644
> --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> @@ -45,6 +45,9 @@ void __onstack_fence_init(struct i915_sw_fence *fence,
>   
>   void onstack_fence_fini(struct i915_sw_fence *fence)
>   {
> +	if (!fence->flags)
> +		return;
> +
>   	i915_sw_fence_commit(fence);
>   	i915_sw_fence_fini(fence);
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ff2ababc0984..c0054074b4c3 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1543,6 +1543,10 @@ struct drm_i915_gem_context_param {
>    * sized argument, will revert back to default settings.
>    *
>    * See struct i915_context_param_engines.
> + *
> + * Extensions:
> + *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
> + *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
>    */
>   #define I915_CONTEXT_PARAM_ENGINES	0xa
>   /* Must be kept compact -- no holes and well documented */
> @@ -1645,9 +1649,49 @@ struct i915_context_engines_load_balance {
>   	struct i915_engine_class_instance engines[N__]; \
>   } __attribute__((packed)) name__
>   
> +/*
> + * i915_context_engines_bond:
> + *
> + * Constructed bonded pairs for execution within a virtual engine.
> + *
> + * All engines are equal, but some are more equal than others. Given
> + * the distribution of resources in the HW, it may be preferable to run
> + * a request on a given subset of engines in parallel to a request on a
> + * specific engine. We enable this selection of engines within a virtual
> + * engine by specifying bonding pairs, for any given master engine we will
> + * only execute on one of the corresponding siblings within the virtual engine.
> + *
> + * To execute a request in parallel on the master engine and a sibling requires
> + * coordination with a I915_EXEC_FENCE_SUBMIT.
> + */
> +struct i915_context_engines_bond {
> +	struct i915_user_extension base;
> +
> +	struct i915_engine_class_instance master;
> +
> +	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
> +	__u16 num_bonds;
> +
> +	__u64 flags; /* all undefined flags must be zero */
> +	__u64 mbz64[4]; /* reserved for future use; must be zero */
> +
> +	struct i915_engine_class_instance engines[0];
> +} __attribute__((packed));
> +
> +#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \
> +	struct i915_user_extension base; \
> +	struct i915_engine_class_instance master; \
> +	__u16 virtual_index; \
> +	__u16 num_bonds; \
> +	__u64 flags; \
> +	__u64 mbz64[4]; \
> +	struct i915_engine_class_instance engines[N__]; \
> +} __attribute__((packed)) name__
> +
>   struct i915_context_param_engines {
>   	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
>   #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
> +#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
>   	struct i915_engine_class_instance engines[0];
>   } __attribute__((packed));
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 24/45] drm/i915: Split GEM object type definition to its own header
  2019-04-29  9:36     ` Joonas Lahtinen
@ 2019-04-29 17:52       ` Rodrigo Vivi
  0 siblings, 0 replies; 74+ messages in thread
From: Rodrigo Vivi @ 2019-04-29 17:52 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx, Matthew Auld

On Mon, Apr 29, 2019 at 12:36:30PM +0300, Joonas Lahtinen wrote:
> Quoting Jani Nikula (2019-04-26 15:12:49)
> > On Thu, 25 Apr 2019, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > For convenience in avoiding inline spaghetti, keep the type definition
> > > as a separate header.
> > >
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/Makefile                 |   1 +
> > >  drivers/gpu/drm/i915/gem/Makefile             |   1 +
> > >  drivers/gpu/drm/i915/gem/Makefile.header-test |  16 +
> > >  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 285 +++++++++++++++++
> > >  drivers/gpu/drm/i915/gt/intel_engine_types.h  |   1 +
> > >  drivers/gpu/drm/i915/i915_drv.h               |   3 +-
> > >  drivers/gpu/drm/i915/i915_gem_batch_pool.h    |   3 +-
> > >  drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
> > >  drivers/gpu/drm/i915/i915_gem_object.h        | 295 +-----------------
> > >  9 files changed, 312 insertions(+), 294 deletions(-)
> > >  create mode 100644 drivers/gpu/drm/i915/gem/Makefile
> > >  create mode 100644 drivers/gpu/drm/i915/gem/Makefile.header-test
> > >  create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > >
> > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > > index 58643373495c..def781c9ea69 100644
> > > --- a/drivers/gpu/drm/i915/Makefile
> > > +++ b/drivers/gpu/drm/i915/Makefile
> > > @@ -85,6 +85,7 @@ gt-$(CONFIG_DRM_I915_SELFTEST) += \
> > >  i915-y += $(gt-y)
> > >  
> > >  # GEM (Graphics Execution Management) code
> > > +obj-y += gem/
> > 
> > I think this warrants similar treatment as gt/. Posted standalone
> > instead of hidden in a series, explicit acks.
> > 
> > Provided the split makes sense to Joonas,
> > 
> > Acked-by: Jani Nikula <jani.nikula@intel.com>
> 
> Yeah, makes sense to me.
> 
> Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> Regards, Joonas

I liked the idea of splitting gem from gt, so:

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

> 
> > 
> > 
> > >  i915-y += \
> > >         i915_active.o \
> > >         i915_cmd_parser.o \
> > > diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile
> > > new file mode 100644
> > > index 000000000000..07e7b8b840ea
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/i915/gem/Makefile
> > > @@ -0,0 +1 @@
> > > +include $(src)/Makefile.header-test # Extra header tests
> > > diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test
> > > new file mode 100644
> > > index 000000000000..61e06cbb4b32
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/i915/gem/Makefile.header-test
> > > @@ -0,0 +1,16 @@
> > > +# SPDX-License-Identifier: MIT
> > > +# Copyright © 2019 Intel Corporation
> > > +
> > > +# Test the headers are compilable as standalone units
> > > +header_test := $(notdir $(wildcard $(src)/*.h))
> > > +
> > > +quiet_cmd_header_test = HDRTEST $@
> > > +      cmd_header_test = echo "\#include \"$(<F)\"" > $@
> > > +
> > > +header_test_%.c: %.h
> > > +     $(call cmd,header_test)
> > > +
> > > +extra-$(CONFIG_DRM_I915_WERROR) += \
> > > +     $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
> > > +
> > > +clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > > new file mode 100644
> > > index 000000000000..e4b50944f553
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > > @@ -0,0 +1,285 @@
> > > +/*
> > > + * SPDX-License-Identifier: MIT
> > > + *
> > > + * Copyright © 2016 Intel Corporation
> > > + */
> > > +
> > > +#ifndef __I915_GEM_OBJECT_TYPES_H__
> > > +#define __I915_GEM_OBJECT_TYPES_H__
> > > +
> > > +#include <linux/reservation.h>
> > > +
> > > +#include <drm/drm_gem.h>
> > > +
> > > +#include "../i915_active.h"
> > > +#include "../i915_selftest.h"
> > > +
> > > +struct drm_i915_gem_object;
> > > +
> > > +/*
> > > + * struct i915_lut_handle tracks the fast lookups from handle to vma used
> > > + * for execbuf. Although we use a radixtree for that mapping, in order to
> > > + * remove them as the object or context is closed, we need a secondary list
> > > + * and a translation entry (i915_lut_handle).
> > > + */
> > > +struct i915_lut_handle {
> > > +     struct list_head obj_link;
> > > +     struct list_head ctx_link;
> > > +     struct i915_gem_context *ctx;
> > > +     u32 handle;
> > > +};
> > > +
> > > +struct drm_i915_gem_object_ops {
> > > +     unsigned int flags;
> > > +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE      BIT(0)
> > > +#define I915_GEM_OBJECT_IS_SHRINKABLE        BIT(1)
> > > +#define I915_GEM_OBJECT_IS_PROXY     BIT(2)
> > > +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
> > > +
> > > +     /* Interface between the GEM object and its backing storage.
> > > +      * get_pages() is called once prior to the use of the associated set
> > > +      * of pages before to binding them into the GTT, and put_pages() is
> > > +      * called after we no longer need them. As we expect there to be
> > > +      * associated cost with migrating pages between the backing storage
> > > +      * and making them available for the GPU (e.g. clflush), we may hold
> > > +      * onto the pages after they are no longer referenced by the GPU
> > > +      * in case they may be used again shortly (for example migrating the
> > > +      * pages to a different memory domain within the GTT). put_pages()
> > > +      * will therefore most likely be called when the object itself is
> > > +      * being released or under memory pressure (where we attempt to
> > > +      * reap pages for the shrinker).
> > > +      */
> > > +     int (*get_pages)(struct drm_i915_gem_object *obj);
> > > +     void (*put_pages)(struct drm_i915_gem_object *obj,
> > > +                       struct sg_table *pages);
> > > +
> > > +     int (*pwrite)(struct drm_i915_gem_object *obj,
> > > +                   const struct drm_i915_gem_pwrite *arg);
> > > +
> > > +     int (*dmabuf_export)(struct drm_i915_gem_object *obj);
> > > +     void (*release)(struct drm_i915_gem_object *obj);
> > > +};
> > > +
> > > +struct drm_i915_gem_object {
> > > +     struct drm_gem_object base;
> > > +
> > > +     const struct drm_i915_gem_object_ops *ops;
> > > +
> > > +     struct {
> > > +             /**
> > > +              * @vma.lock: protect the list/tree of vmas
> > > +              */
> > > +             spinlock_t lock;
> > > +
> > > +             /**
> > > +              * @vma.list: List of VMAs backed by this object
> > > +              *
> > > +              * The VMA on this list are ordered by type, all GGTT vma are
> > > +              * placed at the head and all ppGTT vma are placed at the tail.
> > > +              * The different types of GGTT vma are unordered between
> > > +              * themselves, use the @vma.tree (which has a defined order
> > > +              * between all VMA) to quickly find an exact match.
> > > +              */
> > > +             struct list_head list;
> > > +
> > > +             /**
> > > +              * @vma.tree: Ordered tree of VMAs backed by this object
> > > +              *
> > > +              * All VMA created for this object are placed in the @vma.tree
> > > +              * for fast retrieval via a binary search in
> > > +              * i915_vma_instance(). They are also added to @vma.list for
> > > +              * easy iteration.
> > > +              */
> > > +             struct rb_root tree;
> > > +     } vma;
> > > +
> > > +     /**
> > > +      * @lut_list: List of vma lookup entries in use for this object.
> > > +      *
> > > +      * If this object is closed, we need to remove all of its VMA from
> > > +      * the fast lookup index in associated contexts; @lut_list provides
> > > +      * this translation from object to context->handles_vma.
> > > +      */
> > > +     struct list_head lut_list;
> > > +
> > > +     /** Stolen memory for this object, instead of being backed by shmem. */
> > > +     struct drm_mm_node *stolen;
> > > +     union {
> > > +             struct rcu_head rcu;
> > > +             struct llist_node freed;
> > > +     };
> > > +
> > > +     /**
> > > +      * Whether the object is currently in the GGTT mmap.
> > > +      */
> > > +     unsigned int userfault_count;
> > > +     struct list_head userfault_link;
> > > +
> > > +     struct list_head batch_pool_link;
> > > +     I915_SELFTEST_DECLARE(struct list_head st_link);
> > > +
> > > +     unsigned long flags;
> > > +
> > > +     /**
> > > +      * Have we taken a reference for the object for incomplete GPU
> > > +      * activity?
> > > +      */
> > > +#define I915_BO_ACTIVE_REF 0
> > > +
> > > +     /*
> > > +      * Is the object to be mapped as read-only to the GPU
> > > +      * Only honoured if hardware has relevant pte bit
> > > +      */
> > > +     unsigned int cache_level:3;
> > > +     unsigned int cache_coherent:2;
> > > +#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
> > > +#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> > > +     unsigned int cache_dirty:1;
> > > +
> > > +     /**
> > > +      * @read_domains: Read memory domains.
> > > +      *
> > > +      * These monitor which caches contain read/write data related to the
> > > +      * object. When transitioning from one set of domains to another,
> > > +      * the driver is called to ensure that caches are suitably flushed and
> > > +      * invalidated.
> > > +      */
> > > +     u16 read_domains;
> > > +
> > > +     /**
> > > +      * @write_domain: Corresponding unique write memory domain.
> > > +      */
> > > +     u16 write_domain;
> > > +
> > > +     atomic_t frontbuffer_bits;
> > > +     unsigned int frontbuffer_ggtt_origin; /* write once */
> > > +     struct i915_active_request frontbuffer_write;
> > > +
> > > +     /** Current tiling stride for the object, if it's tiled. */
> > > +     unsigned int tiling_and_stride;
> > > +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
> > > +#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
> > > +#define STRIDE_MASK (~TILING_MASK)
> > > +
> > > +     /** Count of VMA actually bound by this object */
> > > +     unsigned int bind_count;
> > > +     unsigned int active_count;
> > > +     /** Count of how many global VMA are currently pinned for use by HW */
> > > +     unsigned int pin_global;
> > > +
> > > +     struct {
> > > +             struct mutex lock; /* protects the pages and their use */
> > > +             atomic_t pages_pin_count;
> > > +
> > > +             struct sg_table *pages;
> > > +             void *mapping;
> > > +
> > > +             /* TODO: whack some of this into the error state */
> > > +             struct i915_page_sizes {
> > > +                     /**
> > > +                      * The sg mask of the pages sg_table. i.e the mask of
> > > +                      * of the lengths for each sg entry.
> > > +                      */
> > > +                     unsigned int phys;
> > > +
> > > +                     /**
> > > +                      * The gtt page sizes we are allowed to use given the
> > > +                      * sg mask and the supported page sizes. This will
> > > +                      * express the smallest unit we can use for the whole
> > > +                      * object, as well as the larger sizes we may be able
> > > +                      * to use opportunistically.
> > > +                      */
> > > +                     unsigned int sg;
> > > +
> > > +                     /**
> > > +                      * The actual gtt page size usage. Since we can have
> > > +                      * multiple vma associated with this object we need to
> > > +                      * prevent any trampling of state, hence a copy of this
> > > +                      * struct also lives in each vma, therefore the gtt
> > > +                      * value here should only be read/write through the vma.
> > > +                      */
> > > +                     unsigned int gtt;
> > > +             } page_sizes;
> > > +
> > > +             I915_SELFTEST_DECLARE(unsigned int page_mask);
> > > +
> > > +             struct i915_gem_object_page_iter {
> > > +                     struct scatterlist *sg_pos;
> > > +                     unsigned int sg_idx; /* in pages, but 32bit eek! */
> > > +
> > > +                     struct radix_tree_root radix;
> > > +                     struct mutex lock; /* protects this cache */
> > > +             } get_page;
> > > +
> > > +             /**
> > > +              * Element within i915->mm.unbound_list or i915->mm.bound_list,
> > > +              * locked by i915->mm.obj_lock.
> > > +              */
> > > +             struct list_head link;
> > > +
> > > +             /**
> > > +              * Advice: are the backing pages purgeable?
> > > +              */
> > > +             unsigned int madv:2;
> > > +
> > > +             /**
> > > +              * This is set if the object has been written to since the
> > > +              * pages were last acquired.
> > > +              */
> > > +             bool dirty:1;
> > > +
> > > +             /**
> > > +              * This is set if the object has been pinned due to unknown
> > > +              * swizzling.
> > > +              */
> > > +             bool quirked:1;
> > > +     } mm;
> > > +
> > > +     /** Breadcrumb of last rendering to the buffer.
> > > +      * There can only be one writer, but we allow for multiple readers.
> > > +      * If there is a writer that necessarily implies that all other
> > > +      * read requests are complete - but we may only be lazily clearing
> > > +      * the read requests. A read request is naturally the most recent
> > > +      * request on a ring, so we may have two different write and read
> > > +      * requests on one ring where the write request is older than the
> > > +      * read request. This allows for the CPU to read from an active
> > > +      * buffer by only waiting for the write to complete.
> > > +      */
> > > +     struct reservation_object *resv;
> > > +
> > > +     /** References from framebuffers, locks out tiling changes. */
> > > +     unsigned int framebuffer_references;
> > > +
> > > +     /** Record of address bit 17 of each page at last unbind. */
> > > +     unsigned long *bit_17;
> > > +
> > > +     union {
> > > +             struct i915_gem_userptr {
> > > +                     uintptr_t ptr;
> > > +
> > > +                     struct i915_mm_struct *mm;
> > > +                     struct i915_mmu_object *mmu_object;
> > > +                     struct work_struct *work;
> > > +             } userptr;
> > > +
> > > +             unsigned long scratch;
> > > +
> > > +             void *gvt_info;
> > > +     };
> > > +
> > > +     /** for phys allocated objects */
> > > +     struct drm_dma_handle *phys_handle;
> > > +
> > > +     struct reservation_object __builtin_resv;
> > > +};
> > > +
> > > +static inline struct drm_i915_gem_object *
> > > +to_intel_bo(struct drm_gem_object *gem)
> > > +{
> > > +     /* Assert that to_intel_bo(NULL) == NULL */
> > > +     BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
> > > +
> > > +     return container_of(gem, struct drm_i915_gem_object, base);
> > > +}
> > > +
> > > +#endif
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > index ca8d95a5708d..ae73c6596d08 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > @@ -29,6 +29,7 @@
> > >  #define I915_CMD_HASH_ORDER 9
> > >  
> > >  struct dma_fence;
> > > +struct drm_i915_gem_object;
> > >  struct drm_i915_reg_table;
> > >  struct i915_gem_context;
> > >  struct i915_request;
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 5ca4df9a7428..0cf87495e11b 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -80,7 +80,6 @@
> > >  #include "i915_gem.h"
> > >  #include "i915_gem_context.h"
> > >  #include "i915_gem_fence_reg.h"
> > > -#include "i915_gem_object.h"
> > >  #include "i915_gem_gtt.h"
> > >  #include "i915_gpu_error.h"
> > >  #include "i915_request.h"
> > > @@ -135,6 +134,8 @@ bool i915_error_injected(void);
> > >       __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
> > >                     fmt, ##__VA_ARGS__)
> > >  
> > > +struct drm_i915_gem_object;
> > > +
> > >  enum hpd_pin {
> > >       HPD_NONE = 0,
> > >       HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> > > index 56947daaaf65..feeeeeaa54d8 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> > > +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
> > > @@ -9,6 +9,7 @@
> > >  
> > >  #include <linux/types.h>
> > >  
> > > +struct drm_i915_gem_object;
> > >  struct intel_engine_cs;
> > >  
> > >  struct i915_gem_batch_pool {
> > > @@ -19,7 +20,7 @@ struct i915_gem_batch_pool {
> > >  void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
> > >                             struct intel_engine_cs *engine);
> > >  void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
> > > -struct drm_i915_gem_object*
> > > +struct drm_i915_gem_object *
> > >  i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
> > >  
> > >  #endif /* I915_GEM_BATCH_POOL_H */
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > index 2fafa04c45ec..593079f30fbe 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > @@ -61,6 +61,7 @@
> > >  
> > >  struct drm_i915_file_private;
> > >  struct drm_i915_fence_reg;
> > > +struct drm_i915_gem_object;
> > >  struct i915_vma;
> > >  
> > >  typedef u32 gen6_pte_t;
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> > > index ca93a40c0c87..3666b0c5f6ca 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_object.h
> > > +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> > > @@ -1,308 +1,19 @@
> > >  /*
> > > - * Copyright © 2016 Intel Corporation
> > > - *
> > > - * Permission is hereby granted, free of charge, to any person obtaining a
> > > - * copy of this software and associated documentation files (the "Software"),
> > > - * to deal in the Software without restriction, including without limitation
> > > - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > > - * and/or sell copies of the Software, and to permit persons to whom the
> > > - * Software is furnished to do so, subject to the following conditions:
> > > - *
> > > - * The above copyright notice and this permission notice (including the next
> > > - * paragraph) shall be included in all copies or substantial portions of the
> > > - * Software.
> > > - *
> > > - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > > - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > > - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > > - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > > - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > > - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > > - * IN THE SOFTWARE.
> > > + * SPDX-License-Identifier: MIT
> > >   *
> > > + * Copyright © 2016 Intel Corporation
> > >   */
> > >  
> > >  #ifndef __I915_GEM_OBJECT_H__
> > >  #define __I915_GEM_OBJECT_H__
> > >  
> > > -#include <linux/reservation.h>
> > > -
> > > -#include <drm/drm_vma_manager.h>
> > >  #include <drm/drm_gem.h>
> > >  #include <drm/drm_file.h>
> > >  #include <drm/drm_device.h>
> > >  
> > >  #include <drm/i915_drm.h>
> > >  
> > > -#include "i915_request.h"
> > > -#include "i915_selftest.h"
> > > -
> > > -struct drm_i915_gem_object;
> > > -
> > > -/*
> > > - * struct i915_lut_handle tracks the fast lookups from handle to vma used
> > > - * for execbuf. Although we use a radixtree for that mapping, in order to
> > > - * remove them as the object or context is closed, we need a secondary list
> > > - * and a translation entry (i915_lut_handle).
> > > - */
> > > -struct i915_lut_handle {
> > > -     struct list_head obj_link;
> > > -     struct list_head ctx_link;
> > > -     struct i915_gem_context *ctx;
> > > -     u32 handle;
> > > -};
> > > -
> > > -struct drm_i915_gem_object_ops {
> > > -     unsigned int flags;
> > > -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE      BIT(0)
> > > -#define I915_GEM_OBJECT_IS_SHRINKABLE        BIT(1)
> > > -#define I915_GEM_OBJECT_IS_PROXY     BIT(2)
> > > -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
> > > -
> > > -     /* Interface between the GEM object and its backing storage.
> > > -      * get_pages() is called once prior to the use of the associated set
> > > -      * of pages before to binding them into the GTT, and put_pages() is
> > > -      * called after we no longer need them. As we expect there to be
> > > -      * associated cost with migrating pages between the backing storage
> > > -      * and making them available for the GPU (e.g. clflush), we may hold
> > > -      * onto the pages after they are no longer referenced by the GPU
> > > -      * in case they may be used again shortly (for example migrating the
> > > -      * pages to a different memory domain within the GTT). put_pages()
> > > -      * will therefore most likely be called when the object itself is
> > > -      * being released or under memory pressure (where we attempt to
> > > -      * reap pages for the shrinker).
> > > -      */
> > > -     int (*get_pages)(struct drm_i915_gem_object *);
> > > -     void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
> > > -
> > > -     int (*pwrite)(struct drm_i915_gem_object *,
> > > -                   const struct drm_i915_gem_pwrite *);
> > > -
> > > -     int (*dmabuf_export)(struct drm_i915_gem_object *);
> > > -     void (*release)(struct drm_i915_gem_object *);
> > > -};
> > > -
> > > -struct drm_i915_gem_object {
> > > -     struct drm_gem_object base;
> > > -
> > > -     const struct drm_i915_gem_object_ops *ops;
> > > -
> > > -     struct {
> > > -             /**
> > > -              * @vma.lock: protect the list/tree of vmas
> > > -              */
> > > -             spinlock_t lock;
> > > -
> > > -             /**
> > > -              * @vma.list: List of VMAs backed by this object
> > > -              *
> > > -              * The VMA on this list are ordered by type, all GGTT vma are
> > > -              * placed at the head and all ppGTT vma are placed at the tail.
> > > -              * The different types of GGTT vma are unordered between
> > > -              * themselves, use the @vma.tree (which has a defined order
> > > -              * between all VMA) to quickly find an exact match.
> > > -              */
> > > -             struct list_head list;
> > > -
> > > -             /**
> > > -              * @vma.tree: Ordered tree of VMAs backed by this object
> > > -              *
> > > -              * All VMA created for this object are placed in the @vma.tree
> > > -              * for fast retrieval via a binary search in
> > > -              * i915_vma_instance(). They are also added to @vma.list for
> > > -              * easy iteration.
> > > -              */
> > > -             struct rb_root tree;
> > > -     } vma;
> > > -
> > > -     /**
> > > -      * @lut_list: List of vma lookup entries in use for this object.
> > > -      *
> > > -      * If this object is closed, we need to remove all of its VMA from
> > > -      * the fast lookup index in associated contexts; @lut_list provides
> > > -      * this translation from object to context->handles_vma.
> > > -      */
> > > -     struct list_head lut_list;
> > > -
> > > -     /** Stolen memory for this object, instead of being backed by shmem. */
> > > -     struct drm_mm_node *stolen;
> > > -     union {
> > > -             struct rcu_head rcu;
> > > -             struct llist_node freed;
> > > -     };
> > > -
> > > -     /**
> > > -      * Whether the object is currently in the GGTT mmap.
> > > -      */
> > > -     unsigned int userfault_count;
> > > -     struct list_head userfault_link;
> > > -
> > > -     struct list_head batch_pool_link;
> > > -     I915_SELFTEST_DECLARE(struct list_head st_link);
> > > -
> > > -     unsigned long flags;
> > > -
> > > -     /**
> > > -      * Have we taken a reference for the object for incomplete GPU
> > > -      * activity?
> > > -      */
> > > -#define I915_BO_ACTIVE_REF 0
> > > -
> > > -     /*
> > > -      * Is the object to be mapped as read-only to the GPU
> > > -      * Only honoured if hardware has relevant pte bit
> > > -      */
> > > -     unsigned int cache_level:3;
> > > -     unsigned int cache_coherent:2;
> > > -#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
> > > -#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
> > > -     unsigned int cache_dirty:1;
> > > -
> > > -     /**
> > > -      * @read_domains: Read memory domains.
> > > -      *
> > > -      * These monitor which caches contain read/write data related to the
> > > -      * object. When transitioning from one set of domains to another,
> > > -      * the driver is called to ensure that caches are suitably flushed and
> > > -      * invalidated.
> > > -      */
> > > -     u16 read_domains;
> > > -
> > > -     /**
> > > -      * @write_domain: Corresponding unique write memory domain.
> > > -      */
> > > -     u16 write_domain;
> > > -
> > > -     atomic_t frontbuffer_bits;
> > > -     unsigned int frontbuffer_ggtt_origin; /* write once */
> > > -     struct i915_active_request frontbuffer_write;
> > > -
> > > -     /** Current tiling stride for the object, if it's tiled. */
> > > -     unsigned int tiling_and_stride;
> > > -#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
> > > -#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
> > > -#define STRIDE_MASK (~TILING_MASK)
> > > -
> > > -     /** Count of VMA actually bound by this object */
> > > -     unsigned int bind_count;
> > > -     unsigned int active_count;
> > > -     /** Count of how many global VMA are currently pinned for use by HW */
> > > -     unsigned int pin_global;
> > > -
> > > -     struct {
> > > -             struct mutex lock; /* protects the pages and their use */
> > > -             atomic_t pages_pin_count;
> > > -
> > > -             struct sg_table *pages;
> > > -             void *mapping;
> > > -
> > > -             /* TODO: whack some of this into the error state */
> > > -             struct i915_page_sizes {
> > > -                     /**
> > > -                      * The sg mask of the pages sg_table. i.e the mask of
> > > -                      * of the lengths for each sg entry.
> > > -                      */
> > > -                     unsigned int phys;
> > > -
> > > -                     /**
> > > -                      * The gtt page sizes we are allowed to use given the
> > > -                      * sg mask and the supported page sizes. This will
> > > -                      * express the smallest unit we can use for the whole
> > > -                      * object, as well as the larger sizes we may be able
> > > -                      * to use opportunistically.
> > > -                      */
> > > -                     unsigned int sg;
> > > -
> > > -                     /**
> > > -                      * The actual gtt page size usage. Since we can have
> > > -                      * multiple vma associated with this object we need to
> > > -                      * prevent any trampling of state, hence a copy of this
> > > -                      * struct also lives in each vma, therefore the gtt
> > > -                      * value here should only be read/write through the vma.
> > > -                      */
> > > -                     unsigned int gtt;
> > > -             } page_sizes;
> > > -
> > > -             I915_SELFTEST_DECLARE(unsigned int page_mask);
> > > -
> > > -             struct i915_gem_object_page_iter {
> > > -                     struct scatterlist *sg_pos;
> > > -                     unsigned int sg_idx; /* in pages, but 32bit eek! */
> > > -
> > > -                     struct radix_tree_root radix;
> > > -                     struct mutex lock; /* protects this cache */
> > > -             } get_page;
> > > -
> > > -             /**
> > > -              * Element within i915->mm.unbound_list or i915->mm.bound_list,
> > > -              * locked by i915->mm.obj_lock.
> > > -              */
> > > -             struct list_head link;
> > > -
> > > -             /**
> > > -              * Advice: are the backing pages purgeable?
> > > -              */
> > > -             unsigned int madv:2;
> > > -
> > > -             /**
> > > -              * This is set if the object has been written to since the
> > > -              * pages were last acquired.
> > > -              */
> > > -             bool dirty:1;
> > > -
> > > -             /**
> > > -              * This is set if the object has been pinned due to unknown
> > > -              * swizzling.
> > > -              */
> > > -             bool quirked:1;
> > > -     } mm;
> > > -
> > > -     /** Breadcrumb of last rendering to the buffer.
> > > -      * There can only be one writer, but we allow for multiple readers.
> > > -      * If there is a writer that necessarily implies that all other
> > > -      * read requests are complete - but we may only be lazily clearing
> > > -      * the read requests. A read request is naturally the most recent
> > > -      * request on a ring, so we may have two different write and read
> > > -      * requests on one ring where the write request is older than the
> > > -      * read request. This allows for the CPU to read from an active
> > > -      * buffer by only waiting for the write to complete.
> > > -      */
> > > -     struct reservation_object *resv;
> > > -
> > > -     /** References from framebuffers, locks out tiling changes. */
> > > -     unsigned int framebuffer_references;
> > > -
> > > -     /** Record of address bit 17 of each page at last unbind. */
> > > -     unsigned long *bit_17;
> > > -
> > > -     union {
> > > -             struct i915_gem_userptr {
> > > -                     uintptr_t ptr;
> > > -
> > > -                     struct i915_mm_struct *mm;
> > > -                     struct i915_mmu_object *mmu_object;
> > > -                     struct work_struct *work;
> > > -             } userptr;
> > > -
> > > -             unsigned long scratch;
> > > -
> > > -             void *gvt_info;
> > > -     };
> > > -
> > > -     /** for phys allocated objects */
> > > -     struct drm_dma_handle *phys_handle;
> > > -
> > > -     struct reservation_object __builtin_resv;
> > > -};
> > > -
> > > -static inline struct drm_i915_gem_object *
> > > -to_intel_bo(struct drm_gem_object *gem)
> > > -{
> > > -     /* Assert that to_intel_bo(NULL) == NULL */
> > > -     BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
> > > -
> > > -     return container_of(gem, struct drm_i915_gem_object, base);
> > > -}
> > > +#include "gem/i915_gem_object_types.h"
> > >  
> > >  struct drm_i915_gem_object *i915_gem_object_alloc(void);
> > >  void i915_gem_object_free(struct drm_i915_gem_object *obj);
> > 
> > -- 
> > Jani Nikula, Intel Open Source Graphics Center
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 38/45] drm/i915: Drop the deferred active reference
  2019-04-25  9:19 ` [PATCH 38/45] drm/i915: Drop the deferred active reference Chris Wilson
@ 2019-05-01 10:34   ` Matthew Auld
  0 siblings, 0 replies; 74+ messages in thread
From: Matthew Auld @ 2019-05-01 10:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On Thu, 25 Apr 2019 at 10:20, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> An old optimisation to reduce the number of atomics per batch sadly
> relies on struct_mutex for coordination. In order to remove struct_mutex
> from serialising object/context closing, always taking and releasing an
> active reference on first use / last use greatly simplifies the locking.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-29 14:12   ` Tvrtko Ursulin
@ 2019-05-07 16:59     ` Chris Wilson
  2019-05-08  8:48       ` Tvrtko Ursulin
  0 siblings, 1 reply; 74+ messages in thread
From: Chris Wilson @ 2019-05-07 16:59 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-29 15:12:23)
> 
> On 25/04/2019 10:19, Chris Wilson wrote:
> >   static void virtual_submission_tasklet(unsigned long data)
> >   {
> >       struct virtual_engine * const ve = (struct virtual_engine *)data;
> >       const int prio = ve->base.execlists.queue_priority_hint;
> > +     intel_engine_mask_t mask;
> >       unsigned int n;
> >   
> > +     rcu_read_lock();
> > +     mask = virtual_submission_mask(ve);
> > +     rcu_read_unlock();
> > +     if (unlikely(!mask))
> 
> Is the rcu_lock think solely for the same protection against wedging in 
> submit_notify?

No. We may still be in the rbtree of the physical engines and
ve->request may be plucked out from underneath us as we read it. And in
the time it takes to tracek, that request may have been executed,
retired and freed. To prevent the dangling stale dereference, we use
rcu_read_lock() here as we peek into the request, and spinlocks around
the actual transfer to the execution backend.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

* Re: [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine
  2019-05-07 16:59     ` Chris Wilson
@ 2019-05-08  8:48       ` Tvrtko Ursulin
  0 siblings, 0 replies; 74+ messages in thread
From: Tvrtko Ursulin @ 2019-05-08  8:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/05/2019 17:59, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-29 15:12:23)
>>
>> On 25/04/2019 10:19, Chris Wilson wrote:
>>>    static void virtual_submission_tasklet(unsigned long data)
>>>    {
>>>        struct virtual_engine * const ve = (struct virtual_engine *)data;
>>>        const int prio = ve->base.execlists.queue_priority_hint;
>>> +     intel_engine_mask_t mask;
>>>        unsigned int n;
>>>    
>>> +     rcu_read_lock();
>>> +     mask = virtual_submission_mask(ve);
>>> +     rcu_read_unlock();
>>> +     if (unlikely(!mask))
>>
>> Is the rcu_lock think solely for the same protection against wedging in
>> submit_notify?
> 
> No. We may still be in the rbtree of the physical engines and
> ve->request may be plucked out from underneath us as we read it. And in
> the time it takes to tracek, that request may have been executed,
> retired and freed. To prevent the dangling stale dereference, we use
> rcu_read_lock() here as we peek into the request, and spinlocks around
> the actual transfer to the execution backend.

So it's not actually about ve->request as s member pointer, but the 
request object itself. That could make sense, but then wouldn't you need 
to hold the rcu_read_lock over the whole tasklet? There is another 
ve->request read in the for loop just below, although not an actual 
dereference. I guess I just answered it to myself. Okay, looks good then.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 74+ messages in thread

end of thread, other threads:[~2019-05-08  8:48 UTC | newest]

Thread overview: 74+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
2019-04-25  9:19 ` [PATCH 02/45] drm/i915/gvt: Pin the per-engine GVT shadow contexts Chris Wilson
2019-04-25  9:19 ` [PATCH 03/45] drm/i915: Export intel_context_instance() Chris Wilson
2019-04-25  9:19 ` [PATCH 04/45] drm/i915/selftests: Use the real kernel context for sseu isolation tests Chris Wilson
2019-04-25  9:19 ` [PATCH 05/45] drm/i915/selftests: Pass around intel_context for sseu Chris Wilson
2019-04-25  9:19 ` [PATCH 06/45] drm/i915: Pass intel_context to intel_context_pin_lock() Chris Wilson
2019-04-25  9:19 ` [PATCH 07/45] drm/i915: Split engine setup/init into two phases Chris Wilson
2019-04-25  9:19 ` [PATCH 08/45] drm/i915: Switch back to an array of logical per-engine HW contexts Chris Wilson
2019-04-25  9:19 ` [PATCH 09/45] drm/i915: Remove intel_context.active_link Chris Wilson
2019-04-25  9:19 ` [PATCH 10/45] drm/i915: Move i915_request_alloc into selftests/ Chris Wilson
2019-04-25  9:19 ` [PATCH 11/45] drm/i915/execlists: Flush the tasklet on parking Chris Wilson
2019-04-25  9:19 ` [PATCH 12/45] drm/i915: Move the engine->destroy() vfunc onto the engine Chris Wilson
2019-04-25 11:01   ` Tvrtko Ursulin
2019-04-25 11:24     ` Chris Wilson
2019-04-25 11:29     ` [PATCH v2] " Chris Wilson
2019-04-25 11:46       ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 13/45] drm/i915: Convert inconsistent static engine tables into an init error Chris Wilson
2019-04-25  9:19 ` [PATCH 14/45] drm/i915: Make engine_mask & num_engines static Chris Wilson
2019-04-25  9:30   ` Jani Nikula
2019-04-25 10:20   ` Tvrtko Ursulin
2019-04-25 10:30     ` Chris Wilson
2019-04-25 10:43       ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 15/45] drm/i915: Restore control over ppgtt for context creation ABI Chris Wilson
2019-04-25  9:19 ` [PATCH 16/45] drm/i915: Allow a context to define its set of engines Chris Wilson
2019-04-25  9:19 ` [PATCH 17/45] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation Chris Wilson
2019-04-25  9:19 ` [PATCH 18/45] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
2019-04-25  9:19 ` [PATCH 19/45] drm/i915: Load balancing across a virtual engine Chris Wilson
2019-04-25 12:14   ` Tvrtko Ursulin
2019-04-25 12:23     ` Chris Wilson
2019-04-29 13:43   ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine Chris Wilson
2019-04-29 14:12   ` Tvrtko Ursulin
2019-05-07 16:59     ` Chris Wilson
2019-05-08  8:48       ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 21/45] drm/i915: Extend execution fence to support a callback Chris Wilson
2019-04-25  9:19 ` [PATCH 22/45] drm/i915/execlists: Virtual engine bonding Chris Wilson
2019-04-29 15:58   ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 23/45] drm/i915: Allow specification of parallel execbuf Chris Wilson
2019-04-25  9:19 ` [PATCH 24/45] drm/i915: Split GEM object type definition to its own header Chris Wilson
2019-04-26 12:12   ` Jani Nikula
2019-04-29  9:36     ` Joonas Lahtinen
2019-04-29 17:52       ` Rodrigo Vivi
2019-04-25  9:19 ` [PATCH 25/45] drm/i915: Pull GEM ioctls interface to its own file Chris Wilson
2019-04-25  9:19 ` [PATCH 26/45] drm/i915: Move object->pages API to i915_gem_object.[ch] Chris Wilson
2019-04-25  9:19 ` [PATCH 27/45] drm/i915: Move shmem object setup to its own file Chris Wilson
2019-04-25  9:19 ` [PATCH 28/45] drm/i915: Move phys objects " Chris Wilson
2019-04-25  9:19 ` [PATCH 29/45] drm/i915: Move mmap and friends " Chris Wilson
2019-04-25  9:19 ` [PATCH 30/45] drm/i915: Move GEM domain management " Chris Wilson
2019-04-25  9:19 ` [PATCH 31/45] drm/i915: Move more GEM objects under gem/ Chris Wilson
2019-04-25  9:19 ` [PATCH 32/45] drm/i915: Pull scatterlist utils out of i915_gem.h Chris Wilson
2019-04-25  9:19 ` [PATCH 33/45] lockdep: Swap storage for pin_count and refereneces Chris Wilson
2019-04-25  9:19 ` [PATCH 34/45] drm/i915: Move GEM object domain management from struct_mutex to local Chris Wilson
2019-04-25  9:19 ` [PATCH 35/45] drm/i915: Move GEM object waiting to its own file Chris Wilson
2019-04-25  9:19 ` [PATCH 36/45] drm/i915: Move GEM object busy checking " Chris Wilson
2019-04-25  9:19 ` [PATCH 37/45] drm/i915: Move GEM client throttling " Chris Wilson
2019-04-25  9:19 ` [PATCH 38/45] drm/i915: Drop the deferred active reference Chris Wilson
2019-05-01 10:34   ` Matthew Auld
2019-04-25  9:19 ` [PATCH 39/45] drm/i915: Move object close under its own lock Chris Wilson
2019-04-25  9:19 ` [PATCH 40/45] drm/i915: Rename intel_context.active to .inflight Chris Wilson
2019-04-25  9:20 ` [PATCH 41/45] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
2019-04-25  9:20 ` [PATCH 42/45] drm/i915: Stop retiring along engine Chris Wilson
2019-04-25  9:20 ` [PATCH 43/45] drm/i915: Replace engine->timeline with a plain list Chris Wilson
2019-04-25  9:20 ` [PATCH 44/45] drm/i915/execlists: Preempt-to-busy Chris Wilson
2019-04-25  9:20 ` [PATCH 45/45] drm/i915/execlists: Minimalistic timeslicing Chris Wilson
2019-04-25 10:35 ` [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Tvrtko Ursulin
2019-04-25 10:42   ` Chris Wilson
2019-04-26 10:40     ` Tvrtko Ursulin
2019-04-25 11:22 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] " Patchwork
2019-04-25 11:39 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-04-25 12:07 ` ✓ Fi.CI.BAT: success " Patchwork
2019-04-25 12:09 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2) Patchwork
2019-04-25 12:25 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-04-25 12:56 ` ✓ Fi.CI.BAT: success " Patchwork
2019-04-25 21:52 ` ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.