All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/19] drm/i915: Move request->ctx aside
@ 2018-05-17  7:40 Chris Wilson
  2018-05-17  7:40 ` [PATCH 02/19] drm/i915: Move fiddling with engine->last_retired_context Chris Wilson
                   ` (21 more replies)
  0 siblings, 22 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we want to store the intel_context pointer inside
i915_request, as it is frequently access via a convoluted dance when
submitting the request to hw. Having two context pointers inside
i915_request leads to confusion so first rename the existing
i915_gem_context pointer to i915_request.gem_context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c          |  4 +--
 drivers/gpu/drm/i915/i915_debugfs.c           |  4 +--
 drivers/gpu/drm/i915/i915_gem.c               | 10 +++---
 drivers/gpu/drm/i915/i915_gpu_error.c         | 18 ++++++-----
 drivers/gpu/drm/i915/i915_request.c           | 12 +++----
 drivers/gpu/drm/i915/i915_request.h           |  2 +-
 drivers/gpu/drm/i915/i915_trace.h             | 10 +++---
 drivers/gpu/drm/i915/intel_engine_cs.c        |  2 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |  7 +++--
 drivers/gpu/drm/i915/intel_lrc.c              | 31 ++++++++++---------
 drivers/gpu/drm/i915/intel_ringbuffer.c       | 12 +++----
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  5 ++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |  2 +-
 13 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index c2d183b91500..17f9f8d7e148 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -205,7 +205,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 
 static inline bool is_gvt_request(struct i915_request *req)
 {
-	return i915_gem_context_force_single_submission(req->ctx);
+	return i915_gem_context_force_single_submission(req->gem_context);
 }
 
 static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id)
@@ -305,7 +305,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 	struct i915_request *req = workload->req;
 
 	if (IS_KABYLAKE(req->i915) &&
-	    is_inhibit_context(req->ctx, req->engine->id))
+	    is_inhibit_context(req->gem_context, req->engine->id))
 		intel_vgpu_restore_inhibit_context(vgpu, req);
 
 	/* allocate shadow ring buffer */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 13e7b9e4a6e6..ee8e2ff2c426 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -542,8 +542,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 						   struct i915_request,
 						   client_link);
 		rcu_read_lock();
-		task = pid_task(request && request->ctx->pid ?
-				request->ctx->pid : file->pid,
+		task = pid_task(request && request->gem_context->pid ?
+				request->gem_context->pid : file->pid,
 				PIDTYPE_PID);
 		print_file_stats(m, task ? task->comm : "<unknown>", stats);
 		rcu_read_unlock();
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b0fe452ce17c..a20f8db5729d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3067,7 +3067,7 @@ static void skip_request(struct i915_request *request)
 static void engine_skip_context(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct i915_gem_context *hung_ctx = request->ctx;
+	struct i915_gem_context *hung_ctx = request->gem_context;
 	struct i915_timeline *timeline = request->timeline;
 	unsigned long flags;
 
@@ -3077,7 +3077,7 @@ static void engine_skip_context(struct i915_request *request)
 	spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING);
 
 	list_for_each_entry_continue(request, &engine->timeline.requests, link)
-		if (request->ctx == hung_ctx)
+		if (request->gem_context == hung_ctx)
 			skip_request(request);
 
 	list_for_each_entry(request, &timeline->requests, link)
@@ -3123,11 +3123,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 	}
 
 	if (stalled) {
-		i915_gem_context_mark_guilty(request->ctx);
+		i915_gem_context_mark_guilty(request->gem_context);
 		skip_request(request);
 
 		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(request->ctx))
+		if (i915_gem_context_is_banned(request->gem_context))
 			engine_skip_context(request);
 	} else {
 		/*
@@ -3137,7 +3137,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 		 */
 		request = i915_gem_find_active_request(engine);
 		if (request) {
-			i915_gem_context_mark_innocent(request->ctx);
+			i915_gem_context_mark_innocent(request->gem_context);
 			dma_fence_set_error(&request->fence, -EAGAIN);
 
 			/* Rewind the engine to replay the incomplete rq */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index df234dc23274..7cc7d3bc731b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1287,9 +1287,11 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 static void record_request(struct i915_request *request,
 			   struct drm_i915_error_request *erq)
 {
-	erq->context = request->ctx->hw_id;
+	struct i915_gem_context *ctx = request->gem_context;
+
+	erq->context = ctx->hw_id;
 	erq->sched_attr = request->sched.attr;
-	erq->ban_score = atomic_read(&request->ctx->ban_score);
+	erq->ban_score = atomic_read(&ctx->ban_score);
 	erq->seqno = request->global_seqno;
 	erq->jiffies = request->emitted_jiffies;
 	erq->start = i915_ggtt_offset(request->ring->vma);
@@ -1297,7 +1299,7 @@ static void record_request(struct i915_request *request,
 	erq->tail = request->tail;
 
 	rcu_read_lock();
-	erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
+	erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0;
 	rcu_read_unlock();
 }
 
@@ -1461,12 +1463,12 @@ static void gem_record_rings(struct i915_gpu_state *error)
 
 		request = i915_gem_find_active_request(engine);
 		if (request) {
+			struct i915_gem_context *ctx = request->gem_context;
 			struct intel_ring *ring;
 
-			ee->vm = request->ctx->ppgtt ?
-				&request->ctx->ppgtt->base : &ggtt->base;
+			ee->vm = ctx->ppgtt ? &ctx->ppgtt->base : &ggtt->base;
 
-			record_context(&ee->context, request->ctx);
+			record_context(&ee->context, ctx);
 
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
@@ -1483,11 +1485,11 @@ static void gem_record_rings(struct i915_gpu_state *error)
 
 			ee->ctx =
 				i915_error_object_create(i915,
-							 to_intel_context(request->ctx,
+							 to_intel_context(ctx,
 									  engine)->state);
 
 			error->simulated |=
-				i915_gem_context_no_error_capture(request->ctx);
+				i915_gem_context_no_error_capture(ctx);
 
 			ee->rq_head = request->head;
 			ee->rq_post = request->postfix;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8928894dd9c7..fe8810a6a339 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -384,7 +384,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	 */
 	if (engine->last_retired_context)
 		intel_context_unpin(engine->last_retired_context, engine);
-	engine->last_retired_context = rq->ctx;
+	engine->last_retired_context = rq->gem_context;
 }
 
 static void __retire_engine_upto(struct intel_engine_cs *engine,
@@ -455,8 +455,8 @@ static void i915_request_retire(struct i915_request *request)
 	i915_request_remove_from_client(request);
 
 	/* Retirement decays the ban score as it is a sign of ctx progress */
-	atomic_dec_if_positive(&request->ctx->ban_score);
-	intel_context_unpin(request->ctx, request->engine);
+	atomic_dec_if_positive(&request->gem_context->ban_score);
+	intel_context_unpin(request->gem_context, request->engine);
 
 	__retire_engine_upto(request->engine, request);
 
@@ -760,7 +760,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	INIT_LIST_HEAD(&rq->active_list);
 	rq->i915 = i915;
 	rq->engine = engine;
-	rq->ctx = ctx;
+	rq->gem_context = ctx;
 	rq->ring = ring;
 	rq->timeline = ring->timeline;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
@@ -814,7 +814,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		goto err_unwind;
 
 	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(rq->ctx, engine);
+	__intel_context_pin(rq->gem_context, engine);
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
@@ -1113,7 +1113,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule)
-		engine->schedule(request, &request->ctx->sched);
+		engine->schedule(request, &request->gem_context->sched);
 	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index eddbd4245cb3..dddecd9ffd0c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -93,7 +93,7 @@ struct i915_request {
 	 * i915_request_free() will then decrement the refcount on the
 	 * context.
 	 */
-	struct i915_gem_context *ctx;
+	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
 	struct intel_ring *ring;
 	struct i915_timeline *timeline;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 8cc3a256f29d..5d4f78765083 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -624,7 +624,7 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
@@ -651,7 +651,7 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
@@ -696,7 +696,7 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
@@ -727,7 +727,7 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
@@ -815,7 +815,7 @@ TRACE_EVENT(i915_request_wait_begin,
 	     */
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d4e159ae65a6..faaaf2638bb8 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1020,7 +1020,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
 	 */
 	rq = __i915_gem_active_peek(&engine->timeline.last_request);
 	if (rq)
-		return rq->ctx == kernel_context;
+		return rq->gem_context == kernel_context;
 	else
 		return engine->last_retired_context == kernel_context;
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 637e852888ec..a432a193f3c4 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -513,8 +513,9 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
 	struct intel_guc_client *client = guc->execbuf_client;
 	struct intel_engine_cs *engine = rq->engine;
-	u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(rq->ctx,
-								 engine));
+	u32 ctx_desc =
+		lower_32_bits(intel_lr_context_descriptor(rq->gem_context,
+							  engine));
 	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
 	spin_lock(&client->wq_lock);
@@ -725,7 +726,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 		struct i915_request *rq, *rn;
 
 		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
-			if (last && rq->ctx != last->ctx) {
+			if (last && rq->gem_context != last->gem_context) {
 				if (port == last_port) {
 					__list_del_many(&p->requests,
 							&rq->sched.link);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 646ecf267411..3173dc58a3b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -418,9 +418,10 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 
 static u64 execlists_update_context(struct i915_request *rq)
 {
-	struct intel_context *ce = to_intel_context(rq->ctx, rq->engine);
+	struct intel_context *ce =
+		to_intel_context(rq->gem_context, rq->engine);
 	struct i915_hw_ppgtt *ppgtt =
-		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
+		rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
@@ -681,7 +682,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 			 * second request, and so we never need to tell the
 			 * hardware about the first.
 			 */
-			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+			if (last && !can_merge_ctx(rq->gem_context,
+						   last->gem_context)) {
 				/*
 				 * If we are on the second port and cannot
 				 * combine this request with the last, then we
@@ -700,14 +702,14 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 				 * the same context (even though a different
 				 * request) to the second port.
 				 */
-				if (ctx_single_port_submission(last->ctx) ||
-				    ctx_single_port_submission(rq->ctx)) {
+				if (ctx_single_port_submission(last->gem_context) ||
+				    ctx_single_port_submission(rq->gem_context)) {
 					__list_del_many(&p->requests,
 							&rq->sched.link);
 					goto done;
 				}
 
-				GEM_BUG_ON(last->ctx == rq->ctx);
+				GEM_BUG_ON(last->gem_context == rq->gem_context);
 
 				if (submit)
 					port_assign(port, last);
@@ -1438,7 +1440,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
 static int execlists_request_alloc(struct i915_request *request)
 {
 	struct intel_context *ce =
-		to_intel_context(request->ctx, request->engine);
+		to_intel_context(request->gem_context, request->engine);
 	int ret;
 
 	GEM_BUG_ON(!ce->pin_count);
@@ -1955,7 +1957,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = to_intel_context(request->ctx, engine)->lrc_reg_state;
+	regs = to_intel_context(request->gem_context, engine)->lrc_reg_state;
 	if (engine->default_state) {
 		void *defaults;
 
@@ -1968,7 +1970,8 @@ static void execlists_reset(struct intel_engine_cs *engine,
 			i915_gem_object_unpin_map(engine->default_state);
 		}
 	}
-	execlists_init_reg_state(regs, request->ctx, engine, request->ring);
+	execlists_init_reg_state(regs,
+				 request->gem_context, engine, request->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
 	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
@@ -1990,7 +1993,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 
 static int intel_logical_ring_emit_pdps(struct i915_request *rq)
 {
-	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+	struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
 	struct intel_engine_cs *engine = rq->engine;
 	const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
 	u32 *cs;
@@ -2029,15 +2032,15 @@ static int gen8_emit_bb_start(struct i915_request *rq,
 	 * it is unsafe in case of lite-restore (because the ctx is
 	 * not idle). PML4 is allocated during ppgtt init so this is
 	 * not needed in 48-bit.*/
-	if (rq->ctx->ppgtt &&
-	    (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) &&
-	    !i915_vm_is_48bit(&rq->ctx->ppgtt->base) &&
+	if (rq->gem_context->ppgtt &&
+	    (intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) &&
+	    !i915_vm_is_48bit(&rq->gem_context->ppgtt->base) &&
 	    !intel_vgpu_active(rq->i915)) {
 		ret = intel_logical_ring_emit_pdps(rq);
 		if (ret)
 			return ret;
 
-		rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
+		rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
 	}
 
 	cs = intel_ring_begin(rq, 6);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6f200a747176..53703012ec75 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -571,8 +571,8 @@ static void reset_ring(struct intel_engine_cs *engine,
 	 */
 	if (request) {
 		struct drm_i915_private *dev_priv = request->i915;
-		struct intel_context *ce = to_intel_context(request->ctx,
-							    engine);
+		struct intel_context *ce =
+			to_intel_context(request->gem_context, engine);
 		struct i915_hw_ppgtt *ppgtt;
 
 		if (ce->state) {
@@ -584,7 +584,7 @@ static void reset_ring(struct intel_engine_cs *engine,
 				   CCID_EN);
 		}
 
-		ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
+		ppgtt = request->gem_context->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
 		if (ppgtt) {
 			u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
 
@@ -1458,7 +1458,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 
 	*cs++ = MI_NOOP;
 	*cs++ = MI_SET_CONTEXT;
-	*cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags;
+	*cs++ = i915_ggtt_offset(to_intel_context(rq->gem_context, engine)->state) | flags;
 	/*
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
@@ -1526,7 +1526,7 @@ static int remap_l3(struct i915_request *rq, int slice)
 static int switch_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_gem_context *to_ctx = rq->ctx;
+	struct i915_gem_context *to_ctx = rq->gem_context;
 	struct i915_hw_ppgtt *to_mm =
 		to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	struct i915_gem_context *from_ctx = engine->legacy_active_context;
@@ -1597,7 +1597,7 @@ static int ring_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count);
+	GEM_BUG_ON(!to_intel_context(request->gem_context, request->engine)->pin_count);
 
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 438e0b045a2c..2c4e77c050dc 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -105,7 +105,10 @@ static int emit_recurse_batch(struct hang *h,
 			      struct i915_request *rq)
 {
 	struct drm_i915_private *i915 = h->i915;
-	struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
+	struct i915_address_space *vm =
+		rq->gem_context->ppgtt ?
+		&rq->gem_context->ppgtt->base :
+		&i915->ggtt.base;
 	struct i915_vma *hws, *vma;
 	unsigned int flags;
 	u32 *batch;
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 1b8a07125150..68cb9126b3e1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -83,7 +83,7 @@ static int emit_recurse_batch(struct spinner *spin,
 			      struct i915_request *rq,
 			      u32 arbitration_command)
 {
-	struct i915_address_space *vm = &rq->ctx->ppgtt->base;
+	struct i915_address_space *vm = &rq->gem_context->ppgtt->base;
 	struct i915_vma *hws, *vma;
 	u32 *batch;
 	int err;
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 02/19] drm/i915: Move fiddling with engine->last_retired_context
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17  7:40 ` [PATCH 03/19] drm/i915: Store a pointer to intel_context in i915_request Chris Wilson
                   ` (20 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

Move the knowledge about resetting the current context tracking on the
engine from inside i915_gem_context.c into intel_engine_cs.c

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 12 ++----------
 drivers/gpu/drm/i915/intel_engine_cs.c  | 23 +++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 4bf18b5c6f1d..9e70f4dfa703 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -514,16 +514,8 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	for_each_engine(engine, dev_priv, id) {
-		engine->legacy_active_context = NULL;
-		engine->legacy_active_ppgtt = NULL;
-
-		if (!engine->last_retired_context)
-			continue;
-
-		intel_context_unpin(engine->last_retired_context, engine);
-		engine->last_retired_context = NULL;
-	}
+	for_each_engine(engine, dev_priv, id)
+		intel_engine_lost_context(engine);
 }
 
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index faaaf2638bb8..f510fa196e17 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1096,6 +1096,29 @@ void intel_engines_unpark(struct drm_i915_private *i915)
 	}
 }
 
+/**
+ * intel_engine_lost_context: called when the GPU is reset into unknown state
+ * @engine: the engine
+ *
+ * We have either reset the GPU or otherwise about to lose state tracking of
+ * the current GPU logical state (e.g. suspend). On next use, it is therefore
+ * imperative that we make no presumptions about the current state and load
+ * from scratch.
+ */
+void intel_engine_lost_context(struct intel_engine_cs *engine)
+{
+	struct i915_gem_context *ctx;
+
+	lockdep_assert_held(&engine->i915->drm.struct_mutex);
+
+	engine->legacy_active_context = NULL;
+	engine->legacy_active_ppgtt = NULL;
+
+	ctx = fetch_and_zero(&engine->last_retired_context);
+	if (ctx)
+		intel_context_unpin(ctx, engine);
+}
+
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 61f385a92484..2b16185e36c4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -1053,6 +1053,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
 bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
+void intel_engine_lost_context(struct intel_engine_cs *engine);
 
 void intel_engines_park(struct drm_i915_private *i915);
 void intel_engines_unpark(struct drm_i915_private *i915);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 03/19] drm/i915: Store a pointer to intel_context in i915_request
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
  2018-05-17  7:40 ` [PATCH 02/19] drm/i915: Move fiddling with engine->last_retired_context Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17  7:40 ` [PATCH 04/19] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin Chris Wilson
                   ` (19 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

To ease the frequent and ugly pointer dance of
&request->gem_context->engine[request->engine->id] during request
submission, store that pointer as request->hw_context. One major
advantage that we will exploit later is that this decouples the logical
context state from the engine itself.

v2: Set mock_context->ops so we don't crash and burn in selftests.
    Cleanups from Tvrtko.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gvt/mmio_context.c       |   6 +-
 drivers/gpu/drm/i915/gvt/mmio_context.h       |   2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          | 141 +++++++-----------
 drivers/gpu/drm/i915/gvt/scheduler.h          |   1 -
 drivers/gpu/drm/i915/i915_drv.h               |   1 +
 drivers/gpu/drm/i915/i915_gem.c               |  12 +-
 drivers/gpu/drm/i915/i915_gem_context.c       |  17 ++-
 drivers/gpu/drm/i915/i915_gem_context.h       |  21 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c         |   3 +-
 drivers/gpu/drm/i915/i915_perf.c              |  25 ++--
 drivers/gpu/drm/i915/i915_request.c           |  34 ++---
 drivers/gpu/drm/i915/i915_request.h           |   1 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |  54 ++++---
 drivers/gpu/drm/i915/intel_guc_submission.c   |  10 +-
 drivers/gpu/drm/i915/intel_lrc.c              | 125 +++++++++-------
 drivers/gpu/drm/i915/intel_lrc.h              |   7 -
 drivers/gpu/drm/i915/intel_ringbuffer.c       | 100 ++++++++-----
 drivers/gpu/drm/i915/intel_ringbuffer.h       |   9 +-
 drivers/gpu/drm/i915/selftests/mock_context.c |   7 +
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  41 +++--
 20 files changed, 321 insertions(+), 296 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 0f949554d118..708170e61625 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -446,9 +446,9 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
 
 #define CTX_CONTEXT_CONTROL_VAL	0x03
 
-bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id)
+bool is_inhibit_context(struct intel_context *ce)
 {
-	u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state;
+	const u32 *reg_state = ce->lrc_reg_state;
 	u32 inhibit_mask =
 		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 
@@ -501,7 +501,7 @@ static void switch_mmio(struct intel_vgpu *pre,
 			 * itself.
 			 */
 			if (mmio->in_context &&
-			    !is_inhibit_context(s->shadow_ctx, ring_id))
+			    !is_inhibit_context(&s->shadow_ctx->__engine[ring_id]))
 				continue;
 
 			if (mmio->mask)
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h
index 0439eb8057a8..5c3b9ff9f96a 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.h
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.h
@@ -49,7 +49,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
 
 void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt);
 
-bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id);
+bool is_inhibit_context(struct intel_context *ce);
 
 int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
 				       struct i915_request *req);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 17f9f8d7e148..e1760030dda1 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -54,11 +54,8 @@ static void set_context_pdp_root_pointer(
 
 static void update_shadow_pdps(struct intel_vgpu_workload *workload)
 {
-	struct intel_vgpu *vgpu = workload->vgpu;
-	int ring_id = workload->ring_id;
-	struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
 	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->__engine[ring_id].state->obj;
+		workload->req->hw_context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 
@@ -128,9 +125,8 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
 	int ring_id = workload->ring_id;
-	struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
 	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->__engine[ring_id].state->obj;
+		workload->req->hw_context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *dst;
@@ -280,10 +276,8 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
-		struct intel_engine_cs *engine)
+static void shadow_context_descriptor_update(struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
 	u64 desc = 0;
 
 	desc = ce->lrc_desc;
@@ -292,7 +286,7 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
 	 * like GEN8_CTX_* cached in desc_template
 	 */
 	desc &= U64_MAX << 12;
-	desc |= ctx->desc_template & ((1ULL << 12) - 1);
+	desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
 
 	ce->lrc_desc = desc;
 }
@@ -300,12 +294,11 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
 static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
+	struct i915_request *req = workload->req;
 	void *shadow_ring_buffer_va;
 	u32 *cs;
-	struct i915_request *req = workload->req;
 
-	if (IS_KABYLAKE(req->i915) &&
-	    is_inhibit_context(req->gem_context, req->engine->id))
+	if (IS_KABYLAKE(req->i915) && is_inhibit_context(req->hw_context))
 		intel_vgpu_restore_inhibit_context(vgpu, req);
 
 	/* allocate shadow ring buffer */
@@ -353,60 +346,56 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	struct intel_vgpu_submission *s = &vgpu->submission;
 	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	int ring_id = workload->ring_id;
-	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
-	struct intel_ring *ring;
+	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
+	struct intel_context *ce;
 	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	if (workload->shadowed)
+	if (workload->req)
 		return 0;
 
+	/* pin shadow context by gvt even the shadow context will be pinned
+	 * when i915 alloc request. That is because gvt will update the guest
+	 * context from shadow context when workload is completed, and at that
+	 * moment, i915 may already unpined the shadow context to make the
+	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
+	 * the guest context, gvt can unpin the shadow_ctx safely.
+	 */
+	ce = intel_context_pin(shadow_ctx, engine);
+	if (IS_ERR(ce)) {
+		gvt_vgpu_err("fail to pin shadow context\n");
+		return PTR_ERR(ce);
+	}
+
 	shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
 	shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
 				    GEN8_CTX_ADDRESSING_MODE_SHIFT;
 
-	if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated))
-		shadow_context_descriptor_update(shadow_ctx,
-					dev_priv->engine[ring_id]);
+	if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated))
+		shadow_context_descriptor_update(ce);
 
 	ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
 	if (ret)
-		goto err_scan;
+		goto err_unpin;
 
 	if ((workload->ring_id == RCS) &&
 	    (workload->wa_ctx.indirect_ctx.size != 0)) {
 		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
 		if (ret)
-			goto err_scan;
-	}
-
-	/* pin shadow context by gvt even the shadow context will be pinned
-	 * when i915 alloc request. That is because gvt will update the guest
-	 * context from shadow context when workload is completed, and at that
-	 * moment, i915 may already unpined the shadow context to make the
-	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
-	 * the guest context, gvt can unpin the shadow_ctx safely.
-	 */
-	ring = intel_context_pin(shadow_ctx, engine);
-	if (IS_ERR(ring)) {
-		ret = PTR_ERR(ring);
-		gvt_vgpu_err("fail to pin shadow context\n");
-		goto err_shadow;
+			goto err_shadow;
 	}
 
 	ret = populate_shadow_context(workload);
 	if (ret)
-		goto err_unpin;
-	workload->shadowed = true;
+		goto err_shadow;
+
 	return 0;
 
-err_unpin:
-	intel_context_unpin(shadow_ctx, engine);
 err_shadow:
 	release_shadow_wa_ctx(&workload->wa_ctx);
-err_scan:
+err_unpin:
+	intel_context_unpin(ce);
 	return ret;
 }
 
@@ -414,7 +403,6 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload)
 {
 	int ring_id = workload->ring_id;
 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
 	struct i915_request *rq;
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
@@ -437,7 +425,6 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload)
 	return 0;
 
 err_unpin:
-	intel_context_unpin(shadow_ctx, engine);
 	release_shadow_wa_ctx(&workload->wa_ctx);
 	return ret;
 }
@@ -517,21 +504,13 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 	return ret;
 }
 
-static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
-	struct intel_vgpu_workload *workload = container_of(wa_ctx,
-					struct intel_vgpu_workload,
-					wa_ctx);
-	int ring_id = workload->ring_id;
-	struct intel_vgpu_submission *s = &workload->vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
-	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->__engine[ring_id].state->obj;
-	struct execlist_ring_context *shadow_ring_context;
-	struct page *page;
-
-	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-	shadow_ring_context = kmap_atomic(page);
+	struct intel_vgpu_workload *workload =
+		container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx);
+	struct i915_request *rq = workload->req;
+	struct execlist_ring_context *shadow_ring_context =
+		(struct execlist_ring_context *)rq->hw_context->lrc_reg_state;
 
 	shadow_ring_context->bb_per_ctx_ptr.val =
 		(shadow_ring_context->bb_per_ctx_ptr.val &
@@ -539,9 +518,6 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	shadow_ring_context->rcs_indirect_ctx.val =
 		(shadow_ring_context->rcs_indirect_ctx.val &
 		(~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma;
-
-	kunmap_atomic(shadow_ring_context);
-	return 0;
 }
 
 static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
@@ -670,12 +646,9 @@ static int prepare_workload(struct intel_vgpu_workload *workload)
 static int dispatch_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
-	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	int ring_id = workload->ring_id;
-	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
-	int ret = 0;
+	int ret;
 
 	gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
 		ring_id, workload);
@@ -687,10 +660,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 		goto out;
 
 	ret = prepare_workload(workload);
-	if (ret) {
-		intel_context_unpin(shadow_ctx, engine);
-		goto out;
-	}
 
 out:
 	if (ret)
@@ -765,27 +734,23 @@ static struct intel_vgpu_workload *pick_next_workload(
 
 static void update_guest_context(struct intel_vgpu_workload *workload)
 {
+	struct i915_request *rq = workload->req;
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
-	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
-	int ring_id = workload->ring_id;
-	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->__engine[ring_id].state->obj;
+	struct drm_i915_gem_object *ctx_obj = rq->hw_context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *src;
 	unsigned long context_gpa, context_page_num;
 	int i;
 
-	gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
-			workload->ctx_desc.lrca);
-
-	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
+	gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
+		      workload->ctx_desc.lrca);
 
+	context_page_num = rq->engine->context_size;
 	context_page_num = context_page_num >> PAGE_SHIFT;
 
-	if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
+	if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS)
 		context_page_num = 19;
 
 	i = 2;
@@ -858,6 +823,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 		scheduler->current_workload[ring_id];
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct i915_request *rq;
 	int event;
 
 	mutex_lock(&gvt->lock);
@@ -866,11 +832,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 	 * switch to make sure request is completed.
 	 * For the workload w/o request, directly complete the workload.
 	 */
-	if (workload->req) {
-		struct drm_i915_private *dev_priv =
-			workload->vgpu->gvt->dev_priv;
-		struct intel_engine_cs *engine =
-			dev_priv->engine[workload->ring_id];
+	rq = fetch_and_zero(&workload->req);
+	if (rq) {
 		wait_event(workload->shadow_ctx_status_wq,
 			   !atomic_read(&workload->shadow_ctx_active));
 
@@ -886,8 +849,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 				workload->status = 0;
 		}
 
-		i915_request_put(fetch_and_zero(&workload->req));
-
 		if (!workload->status && !(vgpu->resetting_eng &
 					   ENGINE_MASK(ring_id))) {
 			update_guest_context(workload);
@@ -896,10 +857,13 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 					 INTEL_GVT_EVENT_MAX)
 				intel_vgpu_trigger_virtual_event(vgpu, event);
 		}
-		mutex_lock(&dev_priv->drm.struct_mutex);
+
 		/* unpin shadow ctx as the shadow_ctx update is done */
-		intel_context_unpin(s->shadow_ctx, engine);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
+		mutex_lock(&rq->i915->drm.struct_mutex);
+		intel_context_unpin(rq->hw_context);
+		mutex_unlock(&rq->i915->drm.struct_mutex);
+
+		i915_request_put(rq);
 	}
 
 	gvt_dbg_sched("ring id %d complete workload %p status %d\n",
@@ -1270,7 +1234,6 @@ alloc_workload(struct intel_vgpu *vgpu)
 	atomic_set(&workload->shadow_ctx_active, 0);
 
 	workload->status = -EINPROGRESS;
-	workload->shadowed = false;
 	workload->vgpu = vgpu;
 
 	return workload;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index 6c644782193e..21eddab4a9cd 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -83,7 +83,6 @@ struct intel_vgpu_workload {
 	struct i915_request *req;
 	/* if this workload has been dispatched to i915? */
 	bool dispatched;
-	bool shadowed;
 	int status;
 
 	struct intel_vgpu_mm *shadow_mm;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 34c125e2d90c..e33c380b43e3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1950,6 +1950,7 @@ struct drm_i915_private {
 			 */
 			struct i915_perf_stream *exclusive_stream;
 
+			struct intel_context *pinned_ctx;
 			u32 specific_ctx_id;
 
 			struct hrtimer poll_check_timer;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a20f8db5729d..03874b50ada9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3181,14 +3181,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
 	i915_retire_requests(dev_priv);
 
 	for_each_engine(engine, dev_priv, id) {
-		struct i915_gem_context *ctx;
+		struct intel_context *ce;
 
 		i915_gem_reset_engine(engine,
 				      engine->hangcheck.active_request,
 				      stalled_mask & ENGINE_MASK(id));
-		ctx = fetch_and_zero(&engine->last_retired_context);
-		if (ctx)
-			intel_context_unpin(ctx, engine);
+		ce = fetch_and_zero(&engine->last_retired_context);
+		if (ce)
+			intel_context_unpin(ce);
 
 		/*
 		 * Ostensibily, we always want a context loaded for powersaving,
@@ -4897,13 +4897,13 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 
 static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 {
-	struct i915_gem_context *kernel_context = i915->kernel_context;
+	struct i915_gem_context *kctx = i915->kernel_context;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
 	for_each_engine(engine, i915, id) {
 		GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
-		GEM_BUG_ON(engine->last_retired_context != kernel_context);
+		GEM_BUG_ON(engine->last_retired_context->gem_context != kctx);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9e70f4dfa703..b69b18ef8120 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -127,14 +127,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
 		struct intel_context *ce = &ctx->__engine[n];
 
-		if (!ce->state)
-			continue;
-
-		WARN_ON(ce->pin_count);
-		if (ce->ring)
-			intel_ring_free(ce->ring);
-
-		__i915_gem_object_release_unless_active(ce->state->obj);
+		if (ce->ops)
+			ce->ops->destroy(ce);
 	}
 
 	kfree(ctx->name);
@@ -266,6 +260,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
 {
 	struct i915_gem_context *ctx;
+	unsigned int n;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -283,6 +278,12 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->i915 = dev_priv;
 	ctx->sched.priority = I915_PRIORITY_NORMAL;
 
+	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
+		struct intel_context *ce = &ctx->__engine[n];
+
+		ce->gem_context = ctx;
+	}
+
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index ace3b129c189..749a4ff566f5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -45,6 +45,11 @@ struct intel_ring;
 
 #define DEFAULT_CONTEXT_HANDLE 0
 
+struct intel_context_ops {
+	void (*unpin)(struct intel_context *ce);
+	void (*destroy)(struct intel_context *ce);
+};
+
 /**
  * struct i915_gem_context - client state
  *
@@ -144,11 +149,14 @@ struct i915_gem_context {
 
 	/** engine: per-engine logical HW state */
 	struct intel_context {
+		struct i915_gem_context *gem_context;
 		struct i915_vma *state;
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
 		u64 lrc_desc;
 		int pin_count;
+
+		const struct intel_context_ops *ops;
 	} __engine[I915_NUM_ENGINES];
 
 	/** ring_size: size for allocating the per-engine ring buffer */
@@ -263,25 +271,22 @@ to_intel_context(struct i915_gem_context *ctx,
 	return &ctx->__engine[engine->id];
 }
 
-static inline struct intel_ring *
+static inline struct intel_context *
 intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 {
 	return engine->context_pin(engine, ctx);
 }
 
-static inline void __intel_context_pin(struct i915_gem_context *ctx,
-				       const struct intel_engine_cs *engine)
+static inline void __intel_context_pin(struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
-
 	GEM_BUG_ON(!ce->pin_count);
 	ce->pin_count++;
 }
 
-static inline void intel_context_unpin(struct i915_gem_context *ctx,
-				       struct intel_engine_cs *engine)
+static inline void intel_context_unpin(struct intel_context *ce)
 {
-	engine->context_unpin(engine, ctx);
+	GEM_BUG_ON(!ce->ops);
+	ce->ops->unpin(ce);
 }
 
 /* i915_gem_context.c */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7cc7d3bc731b..145823f0b48e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1485,8 +1485,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
 
 			ee->ctx =
 				i915_error_object_create(i915,
-							 to_intel_context(ctx,
-									  engine)->state);
+							 request->hw_context->state);
 
 			error->simulated |=
 				i915_gem_context_no_error_capture(ctx);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 019bd2d073ad..4f0eb84b3c00 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1221,7 +1221,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
 	} else {
 		struct intel_engine_cs *engine = dev_priv->engine[RCS];
-		struct intel_ring *ring;
+		struct intel_context *ce;
 		int ret;
 
 		ret = i915_mutex_lock_interruptible(&dev_priv->drm);
@@ -1234,19 +1234,19 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		 *
 		 * NB: implied RCS engine...
 		 */
-		ring = intel_context_pin(stream->ctx, engine);
+		ce = intel_context_pin(stream->ctx, engine);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
-		if (IS_ERR(ring))
-			return PTR_ERR(ring);
+		if (IS_ERR(ce))
+			return PTR_ERR(ce);
 
+		dev_priv->perf.oa.pinned_ctx = ce;
 
 		/*
 		 * Explicitly track the ID (instead of calling
 		 * i915_ggtt_offset() on the fly) considering the difference
 		 * with gen8+ and execlists
 		 */
-		dev_priv->perf.oa.specific_ctx_id =
-			i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state);
+		dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
 	}
 
 	return 0;
@@ -1262,17 +1262,14 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_context *ce;
 
-	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
-		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
-	} else {
-		struct intel_engine_cs *engine = dev_priv->engine[RCS];
+	dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
 
+	ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
+	if (ce) {
 		mutex_lock(&dev_priv->drm.struct_mutex);
-
-		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
-		intel_context_unpin(stream->ctx, engine);
-
+		intel_context_unpin(ce);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index fe8810a6a339..fc499bcbd105 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -383,8 +383,8 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	 * the subsequent request.
 	 */
 	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context, engine);
-	engine->last_retired_context = rq->gem_context;
+		intel_context_unpin(engine->last_retired_context);
+	engine->last_retired_context = rq->hw_context;
 }
 
 static void __retire_engine_upto(struct intel_engine_cs *engine,
@@ -456,7 +456,7 @@ static void i915_request_retire(struct i915_request *request)
 
 	/* Retirement decays the ban score as it is a sign of ctx progress */
 	atomic_dec_if_positive(&request->gem_context->ban_score);
-	intel_context_unpin(request->gem_context, request->engine);
+	intel_context_unpin(request->hw_context);
 
 	__retire_engine_upto(request->engine, request);
 
@@ -657,7 +657,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *i915 = engine->i915;
 	struct i915_request *rq;
-	struct intel_ring *ring;
+	struct intel_context *ce;
 	int ret;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -681,22 +681,21 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ring = intel_context_pin(ctx, engine);
-	if (IS_ERR(ring))
-		return ERR_CAST(ring);
-	GEM_BUG_ON(!ring);
+	ce = intel_context_pin(ctx, engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
 
 	ret = reserve_gt(i915);
 	if (ret)
 		goto err_unpin;
 
-	ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST);
+	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
 	if (ret)
 		goto err_unreserve;
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ring->request_list) &&
+	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
+	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
 	    i915_request_completed(rq))
 		i915_request_retire(rq);
 
@@ -761,8 +760,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->i915 = i915;
 	rq->engine = engine;
 	rq->gem_context = ctx;
-	rq->ring = ring;
-	rq->timeline = ring->timeline;
+	rq->hw_context = ce;
+	rq->ring = ce->ring;
+	rq->timeline = ce->ring->timeline;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
 
 	spin_lock_init(&rq->lock);
@@ -814,14 +814,14 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		goto err_unwind;
 
 	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(rq->gem_context, engine);
+	__intel_context_pin(ce);
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
 	return rq;
 
 err_unwind:
-	rq->ring->emit = rq->head;
+	ce->ring->emit = rq->head;
 
 	/* Make sure we didn't add ourselves to external state before freeing */
 	GEM_BUG_ON(!list_empty(&rq->active_list));
@@ -832,7 +832,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 err_unreserve:
 	unreserve_gt(i915);
 err_unpin:
-	intel_context_unpin(ctx, engine);
+	intel_context_unpin(ce);
 	return ERR_PTR(ret);
 }
 
@@ -1018,8 +1018,8 @@ i915_request_await_object(struct i915_request *to,
 void __i915_request_add(struct i915_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct intel_ring *ring = request->ring;
 	struct i915_timeline *timeline = request->timeline;
+	struct intel_ring *ring = request->ring;
 	struct i915_request *prev;
 	u32 *cs;
 	int err;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index dddecd9ffd0c..1bbbb7a9fa03 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -95,6 +95,7 @@ struct i915_request {
 	 */
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
+	struct intel_context *hw_context;
 	struct intel_ring *ring;
 	struct i915_timeline *timeline;
 	struct intel_signal_node signaling;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index f510fa196e17..828b7377d0d0 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -645,6 +645,12 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
 	return 0;
 }
 
+static void __intel_context_unpin(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine)
+{
+	intel_context_unpin(to_intel_context(ctx, engine));
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -658,7 +664,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
  */
 int intel_engine_init_common(struct intel_engine_cs *engine)
 {
-	struct intel_ring *ring;
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_context *ce;
 	int ret;
 
 	engine->set_default_submission(engine);
@@ -670,18 +677,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	 * be available. To avoid this we always pin the default
 	 * context.
 	 */
-	ring = intel_context_pin(engine->i915->kernel_context, engine);
-	if (IS_ERR(ring))
-		return PTR_ERR(ring);
+	ce = intel_context_pin(i915->kernel_context, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
 
 	/*
 	 * Similarly the preempt context must always be available so that
 	 * we can interrupt the engine at any time.
 	 */
-	if (engine->i915->preempt_context) {
-		ring = intel_context_pin(engine->i915->preempt_context, engine);
-		if (IS_ERR(ring)) {
-			ret = PTR_ERR(ring);
+	if (i915->preempt_context) {
+		ce = intel_context_pin(i915->preempt_context, engine);
+		if (IS_ERR(ce)) {
+			ret = PTR_ERR(ce);
 			goto err_unpin_kernel;
 		}
 	}
@@ -690,7 +697,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		goto err_unpin_preempt;
 
-	if (HWS_NEEDS_PHYSICAL(engine->i915))
+	if (HWS_NEEDS_PHYSICAL(i915))
 		ret = init_phys_status_page(engine);
 	else
 		ret = init_status_page(engine);
@@ -702,10 +709,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
-	if (engine->i915->preempt_context)
-		intel_context_unpin(engine->i915->preempt_context, engine);
+	if (i915->preempt_context)
+		__intel_context_unpin(i915->preempt_context, engine);
+
 err_unpin_kernel:
-	intel_context_unpin(engine->i915->kernel_context, engine);
+	__intel_context_unpin(i915->kernel_context, engine);
 	return ret;
 }
 
@@ -718,6 +726,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+
 	intel_engine_cleanup_scratch(engine);
 
 	if (HWS_NEEDS_PHYSICAL(engine->i915))
@@ -732,9 +742,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->default_state)
 		i915_gem_object_put(engine->default_state);
 
-	if (engine->i915->preempt_context)
-		intel_context_unpin(engine->i915->preempt_context, engine);
-	intel_context_unpin(engine->i915->kernel_context, engine);
+	if (i915->preempt_context)
+		__intel_context_unpin(i915->preempt_context, engine);
+	__intel_context_unpin(i915->kernel_context, engine);
 
 	i915_timeline_fini(&engine->timeline);
 }
@@ -1007,8 +1017,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
  */
 bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
 {
-	const struct i915_gem_context * const kernel_context =
-		engine->i915->kernel_context;
+	const struct intel_context *kernel_context =
+		to_intel_context(engine->i915->kernel_context, engine);
 	struct i915_request *rq;
 
 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
@@ -1020,7 +1030,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
 	 */
 	rq = __i915_gem_active_peek(&engine->timeline.last_request);
 	if (rq)
-		return rq->gem_context == kernel_context;
+		return rq->hw_context == kernel_context;
 	else
 		return engine->last_retired_context == kernel_context;
 }
@@ -1107,16 +1117,16 @@ void intel_engines_unpark(struct drm_i915_private *i915)
  */
 void intel_engine_lost_context(struct intel_engine_cs *engine)
 {
-	struct i915_gem_context *ctx;
+	struct intel_context *ce;
 
 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
 	engine->legacy_active_context = NULL;
 	engine->legacy_active_ppgtt = NULL;
 
-	ctx = fetch_and_zero(&engine->last_retired_context);
-	if (ctx)
-		intel_context_unpin(ctx, engine);
+	ce = fetch_and_zero(&engine->last_retired_context);
+	if (ce)
+		intel_context_unpin(ce);
 }
 
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index a432a193f3c4..133367a17863 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -513,9 +513,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
 	struct intel_guc_client *client = guc->execbuf_client;
 	struct intel_engine_cs *engine = rq->engine;
-	u32 ctx_desc =
-		lower_32_bits(intel_lr_context_descriptor(rq->gem_context,
-							  engine));
+	u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
 	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
 	spin_lock(&client->wq_lock);
@@ -553,8 +551,8 @@ static void inject_preempt_context(struct work_struct *work)
 					     preempt_work[engine->id]);
 	struct intel_guc_client *client = guc->preempt_client;
 	struct guc_stage_desc *stage_desc = __get_stage_desc(client);
-	u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner,
-								 engine));
+	u32 ctx_desc = lower_32_bits(to_intel_context(client->owner,
+						      engine)->lrc_desc);
 	u32 data[7];
 
 	/*
@@ -726,7 +724,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 		struct i915_request *rq, *rn;
 
 		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
-			if (last && rq->gem_context != last->gem_context) {
+			if (last && rq->hw_context != last->hw_context) {
 				if (port == last_port) {
 					__list_del_many(&p->requests,
 							&rq->sched.link);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3173dc58a3b3..960948617748 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,7 +164,8 @@
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
-					    struct intel_engine_cs *engine);
+					    struct intel_engine_cs *engine,
+					    struct intel_context *ce);
 static void execlists_init_reg_state(u32 *reg_state,
 				     struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine,
@@ -189,12 +190,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 		!i915_request_completed(last));
 }
 
-/**
- * intel_lr_context_descriptor_update() - calculate & cache the descriptor
- * 					  descriptor for a pinned context
- * @ctx: Context to work on
- * @engine: Engine the descriptor will be used with
- *
+/*
  * The context descriptor encodes various attributes of a context,
  * including its GTT address and some flags. Because it's fairly
  * expensive to calculate, we'll just do it once and cache the result,
@@ -222,9 +218,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
  */
 static void
 intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
-				   struct intel_engine_cs *engine)
+				   struct intel_engine_cs *engine,
+				   struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
 	u64 desc;
 
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -418,8 +414,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 
 static u64 execlists_update_context(struct i915_request *rq)
 {
-	struct intel_context *ce =
-		to_intel_context(rq->gem_context, rq->engine);
+	struct intel_context *ce = rq->hw_context;
 	struct i915_hw_ppgtt *ppgtt =
 		rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
@@ -496,14 +491,14 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
-static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
+static bool ctx_single_port_submission(const struct intel_context *ce)
 {
 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
-		i915_gem_context_force_single_submission(ctx));
+		i915_gem_context_force_single_submission(ce->gem_context));
 }
 
-static bool can_merge_ctx(const struct i915_gem_context *prev,
-			  const struct i915_gem_context *next)
+static bool can_merge_ctx(const struct intel_context *prev,
+			  const struct intel_context *next)
 {
 	if (prev != next)
 		return false;
@@ -682,8 +677,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 			 * second request, and so we never need to tell the
 			 * hardware about the first.
 			 */
-			if (last && !can_merge_ctx(rq->gem_context,
-						   last->gem_context)) {
+			if (last &&
+			    !can_merge_ctx(rq->hw_context, last->hw_context)) {
 				/*
 				 * If we are on the second port and cannot
 				 * combine this request with the last, then we
@@ -702,14 +697,14 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 				 * the same context (even though a different
 				 * request) to the second port.
 				 */
-				if (ctx_single_port_submission(last->gem_context) ||
-				    ctx_single_port_submission(rq->gem_context)) {
+				if (ctx_single_port_submission(last->hw_context) ||
+				    ctx_single_port_submission(rq->hw_context)) {
 					__list_del_many(&p->requests,
 							&rq->sched.link);
 					goto done;
 				}
 
-				GEM_BUG_ON(last->gem_context == rq->gem_context);
+				GEM_BUG_ON(last->hw_context == rq->hw_context);
 
 				if (submit)
 					port_assign(port, last);
@@ -1340,6 +1335,37 @@ static void execlists_schedule(struct i915_request *request,
 	spin_unlock_irq(&engine->timeline.lock);
 }
 
+static void execlists_context_destroy(struct intel_context *ce)
+{
+	GEM_BUG_ON(!ce->state);
+	GEM_BUG_ON(ce->pin_count);
+
+	intel_ring_free(ce->ring);
+	__i915_gem_object_release_unless_active(ce->state->obj);
+}
+
+static void __execlists_context_unpin(struct intel_context *ce)
+{
+	intel_ring_unpin(ce->ring);
+
+	ce->state->obj->pin_global--;
+	i915_gem_object_unpin_map(ce->state->obj);
+	i915_vma_unpin(ce->state);
+
+	i915_gem_context_put(ce->gem_context);
+}
+
+static void execlists_context_unpin(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
+	GEM_BUG_ON(ce->pin_count == 0);
+
+	if (--ce->pin_count)
+		return;
+
+	__execlists_context_unpin(ce);
+}
+
 static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 {
 	unsigned int flags;
@@ -1363,21 +1389,15 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, flags);
 }
 
-static struct intel_ring *
-execlists_context_pin(struct intel_engine_cs *engine,
-		      struct i915_gem_context *ctx)
+static struct intel_context *
+__execlists_context_pin(struct intel_engine_cs *engine,
+			struct i915_gem_context *ctx,
+			struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
 	void *vaddr;
 	int ret;
 
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-
-	if (likely(ce->pin_count++))
-		goto out;
-	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
-
-	ret = execlists_context_deferred_alloc(ctx, engine);
+	ret = execlists_context_deferred_alloc(ctx, engine, ce);
 	if (ret)
 		goto err;
 	GEM_BUG_ON(!ce->state);
@@ -1396,7 +1416,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
 	if (ret)
 		goto unpin_map;
 
-	intel_lr_context_descriptor_update(ctx, engine);
+	intel_lr_context_descriptor_update(ctx, engine, ce);
 
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
@@ -1405,8 +1425,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
 
 	ce->state->obj->pin_global++;
 	i915_gem_context_get(ctx);
-out:
-	return ce->ring;
+	return ce;
 
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
@@ -1417,33 +1436,33 @@ execlists_context_pin(struct intel_engine_cs *engine,
 	return ERR_PTR(ret);
 }
 
-static void execlists_context_unpin(struct intel_engine_cs *engine,
-				    struct i915_gem_context *ctx)
+static const struct intel_context_ops execlists_context_ops = {
+	.unpin = execlists_context_unpin,
+	.destroy = execlists_context_destroy,
+};
+
+static struct intel_context *
+execlists_context_pin(struct intel_engine_cs *engine,
+		      struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-	GEM_BUG_ON(ce->pin_count == 0);
 
-	if (--ce->pin_count)
-		return;
-
-	intel_ring_unpin(ce->ring);
+	if (likely(ce->pin_count++))
+		return ce;
+	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
-	ce->state->obj->pin_global--;
-	i915_gem_object_unpin_map(ce->state->obj);
-	i915_vma_unpin(ce->state);
+	ce->ops = &execlists_context_ops;
 
-	i915_gem_context_put(ctx);
+	return __execlists_context_pin(engine, ctx, ce);
 }
 
 static int execlists_request_alloc(struct i915_request *request)
 {
-	struct intel_context *ce =
-		to_intel_context(request->gem_context, request->engine);
 	int ret;
 
-	GEM_BUG_ON(!ce->pin_count);
+	GEM_BUG_ON(!request->hw_context->pin_count);
 
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
@@ -1957,7 +1976,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = to_intel_context(request->gem_context, engine)->lrc_reg_state;
+	regs = request->hw_context->lrc_reg_state;
 	if (engine->default_state) {
 		void *defaults;
 
@@ -2328,8 +2347,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->reset.finish = execlists_reset_finish;
 
 	engine->context_pin = execlists_context_pin;
-	engine->context_unpin = execlists_context_unpin;
-
 	engine->request_alloc = execlists_request_alloc;
 
 	engine->emit_flush = gen8_emit_flush;
@@ -2564,7 +2581,7 @@ static void execlists_init_reg_state(u32 *regs,
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
 	u32 base = engine->mmio_base;
-	bool rcs = engine->id == RCS;
+	bool rcs = engine->class == RENDER_CLASS;
 
 	/* A context is actually a big batch buffer with several
 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
@@ -2711,10 +2728,10 @@ populate_lr_context(struct i915_gem_context *ctx,
 }
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
-					    struct intel_engine_cs *engine)
+					    struct intel_engine_cs *engine,
+					    struct intel_context *ce)
 {
 	struct drm_i915_gem_object *ctx_obj;
-	struct intel_context *ce = to_intel_context(ctx, engine);
 	struct i915_vma *vma;
 	uint32_t context_size;
 	struct intel_ring *ring;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4ec7d8dd13c8..1593194e930c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,11 +104,4 @@ struct i915_gem_context;
 
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 
-static inline uint64_t
-intel_lr_context_descriptor(struct i915_gem_context *ctx,
-			    struct intel_engine_cs *engine)
-{
-	return to_intel_context(ctx, engine)->lrc_desc;
-}
-
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 53703012ec75..0c0c9f531e4e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -571,8 +571,7 @@ static void reset_ring(struct intel_engine_cs *engine,
 	 */
 	if (request) {
 		struct drm_i915_private *dev_priv = request->i915;
-		struct intel_context *ce =
-			to_intel_context(request->gem_context, engine);
+		struct intel_context *ce = request->hw_context;
 		struct i915_hw_ppgtt *ppgtt;
 
 		if (ce->state) {
@@ -1186,7 +1185,31 @@ intel_ring_free(struct intel_ring *ring)
 	kfree(ring);
 }
 
-static int context_pin(struct intel_context *ce)
+static void intel_ring_context_destroy(struct intel_context *ce)
+{
+	GEM_BUG_ON(ce->pin_count);
+
+	if (ce->state)
+		__i915_gem_object_release_unless_active(ce->state->obj);
+}
+
+static void intel_ring_context_unpin(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
+	GEM_BUG_ON(ce->pin_count == 0);
+
+	if (--ce->pin_count)
+		return;
+
+	if (ce->state) {
+		ce->state->obj->pin_global--;
+		i915_vma_unpin(ce->state);
+	}
+
+	i915_gem_context_put(ce->gem_context);
+}
+
+static int __context_pin(struct intel_context *ce)
 {
 	struct i915_vma *vma = ce->state;
 	int ret;
@@ -1275,25 +1298,19 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	return ERR_PTR(err);
 }
 
-static struct intel_ring *
-intel_ring_context_pin(struct intel_engine_cs *engine,
-		       struct i915_gem_context *ctx)
+static struct intel_context *
+__ring_context_pin(struct intel_engine_cs *engine,
+		   struct i915_gem_context *ctx,
+		   struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
-	int ret;
-
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-
-	if (likely(ce->pin_count++))
-		goto out;
-	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
+	int err;
 
 	if (!ce->state && engine->context_size) {
 		struct i915_vma *vma;
 
 		vma = alloc_context_vma(engine);
 		if (IS_ERR(vma)) {
-			ret = PTR_ERR(vma);
+			err = PTR_ERR(vma);
 			goto err;
 		}
 
@@ -1301,8 +1318,8 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
 	}
 
 	if (ce->state) {
-		ret = context_pin(ce);
-		if (ret)
+		err = __context_pin(ce);
+		if (err)
 			goto err;
 
 		ce->state->obj->pin_global++;
@@ -1310,32 +1327,37 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
 
 	i915_gem_context_get(ctx);
 
-out:
 	/* One ringbuffer to rule them all */
-	return engine->buffer;
+	GEM_BUG_ON(!engine->buffer);
+	ce->ring = engine->buffer;
+
+	return ce;
 
 err:
 	ce->pin_count = 0;
-	return ERR_PTR(ret);
+	return ERR_PTR(err);
 }
 
-static void intel_ring_context_unpin(struct intel_engine_cs *engine,
-				     struct i915_gem_context *ctx)
+static const struct intel_context_ops ring_context_ops = {
+	.unpin = intel_ring_context_unpin,
+	.destroy = intel_ring_context_destroy,
+};
+
+static struct intel_context *
+intel_ring_context_pin(struct intel_engine_cs *engine,
+		       struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-	GEM_BUG_ON(ce->pin_count == 0);
 
-	if (--ce->pin_count)
-		return;
+	if (likely(ce->pin_count++))
+		return ce;
+	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
-	if (ce->state) {
-		ce->state->obj->pin_global--;
-		i915_vma_unpin(ce->state);
-	}
+	ce->ops = &ring_context_ops;
 
-	i915_gem_context_put(ctx);
+	return __ring_context_pin(engine, ctx, ce);
 }
 
 static int intel_init_ring_buffer(struct intel_engine_cs *engine)
@@ -1346,10 +1368,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 
 	intel_engine_setup_common(engine);
 
-	err = intel_engine_init_common(engine);
-	if (err)
-		goto err;
-
 	timeline = i915_timeline_create(engine->i915, engine->name);
 	if (IS_ERR(timeline)) {
 		err = PTR_ERR(timeline);
@@ -1371,8 +1389,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	GEM_BUG_ON(engine->buffer);
 	engine->buffer = ring;
 
+	err = intel_engine_init_common(engine);
+	if (err)
+		goto err_unpin;
+
 	return 0;
 
+err_unpin:
+	intel_ring_unpin(ring);
 err_ring:
 	intel_ring_free(ring);
 err:
@@ -1458,7 +1482,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 
 	*cs++ = MI_NOOP;
 	*cs++ = MI_SET_CONTEXT;
-	*cs++ = i915_ggtt_offset(to_intel_context(rq->gem_context, engine)->state) | flags;
+	*cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
 	/*
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
@@ -1549,7 +1573,7 @@ static int switch_context(struct i915_request *rq)
 		hw_flags = MI_FORCE_RESTORE;
 	}
 
-	if (to_intel_context(to_ctx, engine)->state &&
+	if (rq->hw_context->state &&
 	    (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) {
 		GEM_BUG_ON(engine->id != RCS);
 
@@ -1597,7 +1621,7 @@ static int ring_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!to_intel_context(request->gem_context, request->engine)->pin_count);
+	GEM_BUG_ON(!request->hw_context->pin_count);
 
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
@@ -2028,8 +2052,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->reset.finish = reset_finish;
 
 	engine->context_pin = intel_ring_context_pin;
-	engine->context_unpin = intel_ring_context_unpin;
-
 	engine->request_alloc = ring_request_alloc;
 
 	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2b16185e36c4..20c4e13efc0d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -436,10 +436,9 @@ struct intel_engine_cs {
 
 	void		(*set_default_submission)(struct intel_engine_cs *engine);
 
-	struct intel_ring *(*context_pin)(struct intel_engine_cs *engine,
-					  struct i915_gem_context *ctx);
-	void		(*context_unpin)(struct intel_engine_cs *engine,
-					 struct i915_gem_context *ctx);
+	struct intel_context *(*context_pin)(struct intel_engine_cs *engine,
+					     struct i915_gem_context *ctx);
+
 	int		(*request_alloc)(struct i915_request *rq);
 	int		(*init_context)(struct i915_request *rq);
 
@@ -555,7 +554,7 @@ struct intel_engine_cs {
 	 * to the kernel context and trash it as the save may not happen
 	 * before the hardware is powered down.
 	 */
-	struct i915_gem_context *last_retired_context;
+	struct intel_context *last_retired_context;
 
 	/* We track the current MI_SET_CONTEXT in order to eliminate
 	 * redudant context switches. This presumes that requests are not
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 501becc47c0c..8904f1ce64e3 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915,
 	     const char *name)
 {
 	struct i915_gem_context *ctx;
+	unsigned int n;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -43,6 +44,12 @@ mock_context(struct drm_i915_private *i915,
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
 
+	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
+		struct intel_context *ce = &ctx->__engine[n];
+
+		ce->gem_context = ctx;
+	}
+
 	ret = ida_simple_get(&i915->contexts.hw_ida,
 			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
 	if (ret < 0)
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 26bf29d97007..33eddfc1f8ce 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -72,25 +72,37 @@ static void hw_delay_complete(struct timer_list *t)
 	spin_unlock(&engine->hw_lock);
 }
 
-static struct intel_ring *
-mock_context_pin(struct intel_engine_cs *engine,
-		 struct i915_gem_context *ctx)
+static void mock_context_unpin(struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	if (--ce->pin_count)
+		return;
 
-	if (!ce->pin_count++)
-		i915_gem_context_get(ctx);
+	i915_gem_context_put(ce->gem_context);
+}
 
-	return engine->buffer;
+static void mock_context_destroy(struct intel_context *ce)
+{
+	GEM_BUG_ON(ce->pin_count);
 }
 
-static void mock_context_unpin(struct intel_engine_cs *engine,
-			       struct i915_gem_context *ctx)
+static const struct intel_context_ops mock_context_ops = {
+	.unpin = mock_context_unpin,
+	.destroy = mock_context_destroy,
+};
+
+static struct intel_context *
+mock_context_pin(struct intel_engine_cs *engine,
+		 struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
 
-	if (!--ce->pin_count)
-		i915_gem_context_put(ctx);
+	if (!ce->pin_count++) {
+		i915_gem_context_get(ctx);
+		ce->ring = engine->buffer;
+		ce->ops = &mock_context_ops;
+	}
+
+	return ce;
 }
 
 static int mock_request_alloc(struct i915_request *request)
@@ -185,7 +197,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.status_page.page_addr = (void *)(engine + 1);
 
 	engine->base.context_pin = mock_context_pin;
-	engine->base.context_unpin = mock_context_unpin;
 	engine->base.request_alloc = mock_request_alloc;
 	engine->base.emit_flush = mock_emit_flush;
 	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
@@ -238,11 +249,13 @@ void mock_engine_free(struct intel_engine_cs *engine)
 {
 	struct mock_engine *mock =
 		container_of(engine, typeof(*mock), base);
+	struct intel_context *ce;
 
 	GEM_BUG_ON(timer_pending(&mock->hw_delay));
 
-	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context, engine);
+	ce = fetch_and_zero(&engine->last_retired_context);
+	if (ce)
+		intel_context_unpin(ce);
 
 	mock_ring_free(engine->buffer);
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 04/19] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
  2018-05-17  7:40 ` [PATCH 02/19] drm/i915: Move fiddling with engine->last_retired_context Chris Wilson
  2018-05-17  7:40 ` [PATCH 03/19] drm/i915: Store a pointer to intel_context in i915_request Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17 10:20   ` Tvrtko Ursulin
  2018-05-17  7:40 ` [PATCH 05/19] drm/i915: Be irqsafe inside reset Chris Wilson
                   ` (18 subsequent siblings)
  21 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

As all backends implement the same pin_count mechanism and do a
dec-and-test as their first step, pull that into the common
intel_context_unpin(). This also pulls into the caller, eliminating the
indirect call in the usual steady state case. The intel_context_pin()
side is a little more complicated as it combines the lookup/alloc as
well as pinning the state, and so is left for a later date.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.h      |  4 ++++
 drivers/gpu/drm/i915/intel_lrc.c             | 13 +------------
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  6 ------
 drivers/gpu/drm/i915/selftests/mock_engine.c |  3 ---
 4 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 749a4ff566f5..c3262b4dd2ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -285,6 +285,10 @@ static inline void __intel_context_pin(struct intel_context *ce)
 
 static inline void intel_context_unpin(struct intel_context *ce)
 {
+	GEM_BUG_ON(!ce->pin_count);
+	if (--ce->pin_count)
+		return;
+
 	GEM_BUG_ON(!ce->ops);
 	ce->ops->unpin(ce);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 960948617748..f3470b95d64e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1344,7 +1344,7 @@ static void execlists_context_destroy(struct intel_context *ce)
 	__i915_gem_object_release_unless_active(ce->state->obj);
 }
 
-static void __execlists_context_unpin(struct intel_context *ce)
+static void execlists_context_unpin(struct intel_context *ce)
 {
 	intel_ring_unpin(ce->ring);
 
@@ -1355,17 +1355,6 @@ static void __execlists_context_unpin(struct intel_context *ce)
 	i915_gem_context_put(ce->gem_context);
 }
 
-static void execlists_context_unpin(struct intel_context *ce)
-{
-	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
-	GEM_BUG_ON(ce->pin_count == 0);
-
-	if (--ce->pin_count)
-		return;
-
-	__execlists_context_unpin(ce);
-}
-
 static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 {
 	unsigned int flags;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0c0c9f531e4e..001cf6bcb349 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1195,12 +1195,6 @@ static void intel_ring_context_destroy(struct intel_context *ce)
 
 static void intel_ring_context_unpin(struct intel_context *ce)
 {
-	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
-	GEM_BUG_ON(ce->pin_count == 0);
-
-	if (--ce->pin_count)
-		return;
-
 	if (ce->state) {
 		ce->state->obj->pin_global--;
 		i915_vma_unpin(ce->state);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 33eddfc1f8ce..f1ac7453053e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -74,9 +74,6 @@ static void hw_delay_complete(struct timer_list *t)
 
 static void mock_context_unpin(struct intel_context *ce)
 {
-	if (--ce->pin_count)
-		return;
-
 	i915_gem_context_put(ce->gem_context);
 }
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 05/19] drm/i915: Be irqsafe inside reset
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (2 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 04/19] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17 10:27   ` Tvrtko Ursulin
  2018-05-17  7:40 ` [PATCH 06/19] drm/i915: Make intel_engine_dump irqsafe Chris Wilson
                   ` (17 subsequent siblings)
  21 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

As we want to be able to call i915_reset_engine and co from a softirq or
timer context, we need to be irqsafe at all timers. So we have to forgo
the simple spin_lock_irq for the full spin_lock_irqsave.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 03874b50ada9..a3885adec78a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3137,15 +3137,17 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 		 */
 		request = i915_gem_find_active_request(engine);
 		if (request) {
+			unsigned long flags;
+
 			i915_gem_context_mark_innocent(request->gem_context);
 			dma_fence_set_error(&request->fence, -EAGAIN);
 
 			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irq(&engine->timeline.lock);
+			spin_lock_irqsave(&engine->timeline.lock, flags);
 			request = list_prev_entry(request, link);
 			if (&request->link == &engine->timeline.requests)
 				request = NULL;
-			spin_unlock_irq(&engine->timeline.lock);
+			spin_unlock_irqrestore(&engine->timeline.lock, flags);
 		}
 	}
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 06/19] drm/i915: Make intel_engine_dump irqsafe
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (3 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 05/19] drm/i915: Be irqsafe inside reset Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17 10:28   ` Tvrtko Ursulin
  2018-05-17  7:40 ` [PATCH 07/19] drm/i915/execlists: Handle copying default context state for atomic reset Chris Wilson
                   ` (16 subsequent siblings)
  21 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

To be useful later, enable intel_engine_dump() to be called from irq
context (i.e. using saving and restoring irq start rather than assuming
we enter with irqs enabled).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 828b7377d0d0..333318b340e1 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1358,6 +1358,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_request *rq, *last;
+	unsigned long flags;
 	struct rb_node *rb;
 	int count;
 
@@ -1424,7 +1425,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
 	}
 
-	spin_lock_irq(&engine->timeline.lock);
+	local_irq_save(flags);
+	spin_lock(&engine->timeline.lock);
 
 	last = NULL;
 	count = 0;
@@ -1466,16 +1468,17 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		print_request(m, last, "\t\tQ ");
 	}
 
-	spin_unlock_irq(&engine->timeline.lock);
+	spin_unlock(&engine->timeline.lock);
 
-	spin_lock_irq(&b->rb_lock);
+	spin_lock(&b->rb_lock);
 	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
 		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
 
 		drm_printf(m, "\t%s [%d] waiting for %x\n",
 			   w->tsk->comm, w->tsk->pid, w->seqno);
 	}
-	spin_unlock_irq(&b->rb_lock);
+	spin_unlock(&b->rb_lock);
+	local_irq_restore(flags);
 
 	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
 		   engine->irq_posted,
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 07/19] drm/i915/execlists: Handle copying default context state for atomic reset
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (4 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 06/19] drm/i915: Make intel_engine_dump irqsafe Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17 10:37   ` Tvrtko Ursulin
  2018-05-17  7:40 ` [PATCH 08/19] drm/i915: Allow init_breadcrumbs to be used from irq context Chris Wilson
                   ` (15 subsequent siblings)
  21 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

We want to be able to reset the GPU from inside a timer callback
(hardirq context). One step requires us to copy the default context
state over to the guilty context, which means we need to plan in advance
to have that object accessible from within an atomic context. The atomic
context prevents us from pinning the object or in peeking into the
shmemfs backing store (all may sleep), so we choose to pin the
default_state into memory when the engine becomes active. This
compromise allows us to swap out the default state when idle, if
required.

References: 5692251c254a ("drm/i915/lrc: Scrub the GPU state of the guilty hanging request")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  | 15 +++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 15 ++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 333318b340e1..b1a1ca0758ce 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1082,6 +1082,11 @@ void intel_engines_park(struct drm_i915_private *i915)
 		if (engine->park)
 			engine->park(engine);
 
+		if (engine->pinned_default_state) {
+			i915_gem_object_unpin_map(engine->default_state);
+			engine->pinned_default_state = NULL;
+		}
+
 		i915_gem_batch_pool_fini(&engine->batch_pool);
 		engine->execlists.no_priolist = false;
 	}
@@ -1099,6 +1104,16 @@ void intel_engines_unpark(struct drm_i915_private *i915)
 	enum intel_engine_id id;
 
 	for_each_engine(engine, i915, id) {
+		void *map;
+
+		/* Pin the default state for fast resets from atomic context. */
+		map = NULL;
+		if (engine->default_state)
+			map = i915_gem_object_pin_map(engine->default_state,
+						      I915_MAP_WB);
+		if (!IS_ERR_OR_NULL(map))
+			engine->pinned_default_state = map;
+
 		if (engine->unpark)
 			engine->unpark(engine);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f3470b95d64e..49283b3d3ebb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1966,17 +1966,10 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * to recreate its own state.
 	 */
 	regs = request->hw_context->lrc_reg_state;
-	if (engine->default_state) {
-		void *defaults;
-
-		defaults = i915_gem_object_pin_map(engine->default_state,
-						   I915_MAP_WB);
-		if (!IS_ERR(defaults)) {
-			memcpy(regs, /* skip restoring the vanilla PPHWSP */
-			       defaults + LRC_STATE_PN * PAGE_SIZE,
-			       engine->context_size - PAGE_SIZE);
-			i915_gem_object_unpin_map(engine->default_state);
-		}
+	if (engine->pinned_default_state) {
+		memcpy(regs, /* skip restoring the vanilla PPHWSP */
+		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+		       engine->context_size - PAGE_SIZE);
 	}
 	execlists_init_reg_state(regs,
 				 request->gem_context, engine, request->ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 20c4e13efc0d..acef385c4c80 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -342,6 +342,7 @@ struct intel_engine_cs {
 	struct i915_timeline timeline;
 
 	struct drm_i915_gem_object *default_state;
+	void *pinned_default_state;
 
 	atomic_t irq_count;
 	unsigned long irq_posted;
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 08/19] drm/i915: Allow init_breadcrumbs to be used from irq context
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (5 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 07/19] drm/i915/execlists: Handle copying default context state for atomic reset Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17 10:40   ` Tvrtko Ursulin
  2018-05-17  7:40 ` [PATCH 09/19] drm/i915/execlists: HWACK checking superseded checking port[0].count Chris Wilson
                   ` (14 subsequent siblings)
  21 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

In order to support engine reset from irq (timer) context, we need to be
able to re-initialise the breadcrumbs. So we need to promote the plain
spin_lock_irq to a safe spin_lock_irqsave.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 18e643df523e..86a987b8ac66 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -846,8 +846,9 @@ static void cancel_fake_irq(struct intel_engine_cs *engine)
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned long flags;
 
-	spin_lock_irq(&b->irq_lock);
+	spin_lock_irqsave(&b->irq_lock, flags);
 
 	/*
 	 * Leave the fake_irq timer enabled (if it is running), but clear the
@@ -871,7 +872,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 	 */
 	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
-	spin_unlock_irq(&b->irq_lock);
+	spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 09/19] drm/i915/execlists: HWACK checking superseded checking port[0].count
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (6 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 08/19] drm/i915: Allow init_breadcrumbs to be used from irq context Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17 10:55   ` Tvrtko Ursulin
  2018-05-17  7:40 ` [PATCH 10/19] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler Chris Wilson
                   ` (13 subsequent siblings)
  21 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

The HWACK bit more generically solves the problem of resubmitting ESLP
while the hardware is still processing the current ELSP write. We no
longer need to check port[0].count itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 49283b3d3ebb..857ab04452f0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -608,8 +608,6 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		GEM_BUG_ON(!execlists_is_active(execlists,
 						EXECLISTS_ACTIVE_USER));
 		GEM_BUG_ON(!port_count(&port[0]));
-		if (port_count(&port[0]) > 1)
-			return false;
 
 		/*
 		 * If we write to ELSP a second time before the HW has had
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 10/19] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (7 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 09/19] drm/i915/execlists: HWACK checking superseded checking port[0].count Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17 10:58   ` Tvrtko Ursulin
  2018-05-17  7:40 ` [PATCH 11/19] drm/i915/execlists: Double check rpm wakeref Chris Wilson
                   ` (12 subsequent siblings)
  21 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

Store whether or not we need to kick the guc's execlists emulation on
the engine itself to avoid chasing the device info.

gen8_cs_irq_handler                          512     428     -84

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c             | 4 +++-
 drivers/gpu/drm/i915/intel_guc_submission.c | 1 +
 drivers/gpu/drm/i915/intel_lrc.c            | 1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h     | 7 +++++++
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f9bc3aaa90d0..460878572515 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1472,8 +1472,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
+		if (intel_engine_uses_guc(engine))
+			tasklet = true;
+
 		notify_ring(engine);
-		tasklet |= USES_GUC_SUBMISSION(engine->i915);
 	}
 
 	if (tasklet)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 133367a17863..d9fcd5db4ea4 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -1312,6 +1312,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 		engine->unpark = guc_submission_unpark;
 
 		engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
+		engine->flags |= I915_ENGINE_USES_GUC;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 857ab04452f0..4928e9ad7826 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2305,6 +2305,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->park = NULL;
 	engine->unpark = NULL;
 
+	engine->flags &= ~I915_ENGINE_USES_GUC;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (engine->i915->preempt_context)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index acef385c4c80..4ad9c5842575 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -574,6 +574,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
+#define I915_ENGINE_USES_GUC         BIT(3)
 	unsigned int flags;
 
 	/*
@@ -651,6 +652,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
 }
 
+static inline bool
+intel_engine_uses_guc(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_USES_GUC;
+}
+
 static inline bool __execlists_need_preempt(int prio, int last)
 {
 	return prio > max(0, last);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 11/19] drm/i915/execlists: Double check rpm wakeref
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (8 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 10/19] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17 11:04   ` Tvrtko Ursulin
  2018-05-17  7:40 ` [PATCH 12/19] drm/i915: After reset on sanitization, reset the engine backends Chris Wilson
                   ` (11 subsequent siblings)
  21 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

As we are splitting processing the CSB events from submitting the ELSP,
we also need to duplicate the check that we hold a device wakeref for our
hardware access to the disjoint locations.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4928e9ad7826..6d3b03299b0c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -449,6 +449,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	struct execlist_port *port = execlists->port;
 	unsigned int n;
 
+	/*
+	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
+	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
+	 * not be relinquished until the device is idle (see
+	 * i915_gem_idle_work_handler()). As a precaution, we make sure
+	 * that all ELSP are drained i.e. we have processed the CSB,
+	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
+	 */
+	GEM_BUG_ON(!engine->i915->gt.awake);
+
 	/*
 	 * ELSQ note: the submit queue is not cleared after being submitted
 	 * to the HW so we need to make sure we always clean it up. This is
@@ -959,6 +969,12 @@ static void process_csb(struct intel_engine_cs *engine)
 	struct drm_i915_private *i915 = engine->i915;
 	bool fw = false;
 
+	/*
+	 * We must never release our device wakeref until after we have
+	 * finished processing all potential interrupts from the hardware.
+	 */
+	GEM_BUG_ON(!engine->i915->gt.awake);
+
 	do {
 		/* The HWSP contains a (cacheable) mirror of the CSB */
 		const u32 *buf =
@@ -1139,16 +1155,6 @@ static void execlists_submission_tasklet(unsigned long data)
 		  engine->execlists.active,
 		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
 
-	/*
-	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
-	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
-	 * not be relinquished until the device is idle (see
-	 * i915_gem_idle_work_handler()). As a precaution, we make sure
-	 * that all ELSP are drained i.e. we have processed the CSB,
-	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
-	 */
-	GEM_BUG_ON(!engine->i915->gt.awake);
-
 	/*
 	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
 	 * imposing the cost of a locked atomic transaction when submitting a
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 12/19] drm/i915: After reset on sanitization, reset the engine backends
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (9 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 11/19] drm/i915/execlists: Double check rpm wakeref Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17  7:40 ` [PATCH 13/19] drm/i915/execlists: Reset the CSB head tracking on reset/sanitization Chris Wilson
                   ` (10 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

As we reset the GPU on suspend/resume, we also do need to reset the
engine state tracking so call into the engine backends. This is
especially important so that we can also sanitize the state tracking
across resume.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a3885adec78a..fa09837d0569 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4911,11 +4911,15 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
-	if (i915_terminally_wedged(&i915->gpu_error)) {
-		mutex_lock(&i915->drm.struct_mutex);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+
+	if (i915_terminally_wedged(&i915->gpu_error))
 		i915_gem_unset_wedged(i915);
-		mutex_unlock(&i915->drm.struct_mutex);
-	}
 
 	/*
 	 * If we inherit context state from the BIOS or earlier occupants
@@ -4925,8 +4929,19 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * it may impact the display and we are uncertain about the stability
 	 * of the reset, so this could be applied to even earlier gen.
 	 */
-	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
-		WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
+	err = -ENODEV;
+	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
+		if (!WARN_ON(intel_gpu_reset(i915, ALL_ENGINES))) {
+			for_each_engine(engine, i915, id) {
+				if (engine->reset.reset)
+					engine->reset.reset(engine, NULL);
+			}
+			err = 0;
+		}
+	}
+
+	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 13/19] drm/i915/execlists: Reset the CSB head tracking on reset/sanitization
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (10 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 12/19] drm/i915: After reset on sanitization, reset the engine backends Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17  7:40 ` [PATCH 14/19] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
                   ` (9 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

We can avoid the mmio read of the CSB pointers after reset based on the
knowledge that the HW always start writing at entry 0 in the CSB buffer.
We need to reset our CSB head tracking after GPU reset (and on
sanitization after resume) so that we are expecting to read from entry
0.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6d3b03299b0c..14149d0912fb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -981,22 +981,19 @@ static void process_csb(struct intel_engine_cs *engine)
 			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
 		unsigned int head, tail;
 
-		if (unlikely(execlists->csb_use_mmio)) {
-			buf = (u32 * __force)
-				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-			execlists->csb_head = -1; /* force mmio read of CSB */
-		}
-
 		/* Clear before reading to catch new interrupts */
 		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 		smp_mb__after_atomic();
 
-		if (unlikely(execlists->csb_head == -1)) { /* after a reset */
+		if (unlikely(execlists->csb_use_mmio)) {
 			if (!fw) {
 				intel_uncore_forcewake_get(i915, execlists->fw_domains);
 				fw = true;
 			}
 
+			buf = (u32 * __force)
+				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+
 			head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
 			tail = GEN8_CSB_WRITE_PTR(head);
 			head = GEN8_CSB_READ_PTR(head);
@@ -1790,9 +1787,6 @@ static void enable_execlists(struct intel_engine_cs *engine)
 	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
 		   engine->status_page.ggtt_offset);
 	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
-
-	/* Following the reset, we need to reload the CSB read/write pointers */
-	engine->execlists.csb_head = -1;
 }
 
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
@@ -1945,6 +1939,9 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	__unwind_incomplete_requests(engine);
 	spin_unlock(&engine->timeline.lock);
 
+	/* Following the reset, we need to reload the CSB read/write pointers */
+	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+
 	local_irq_restore(flags);
 
 	/*
@@ -2436,6 +2433,8 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 			upper_32_bits(ce->lrc_desc);
 	}
 
+	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+
 	return 0;
 
 error:
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 14/19] drm/i915/execlists: Pull submit after dequeue under timeline lock
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (11 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 13/19] drm/i915/execlists: Reset the CSB head tracking on reset/sanitization Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17  7:40 ` [PATCH 15/19] drm/i915/execlists: Process one CSB interrupt at a time Chris Wilson
                   ` (8 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will begin processing the CSB from inside the
interrupt handler. This means that updating the execlists->port[] will
no longer be locked by the tasklet but by the engine->timeline.lock
instead. Pull dequeue and submit under the same lock for protection.
(An alternative, future, plan is to keep the in/out arrays separate for
concurrent processing and reduced lock coverage.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 14149d0912fb..6c050d553d4e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -572,7 +572,7 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
-static bool __execlists_dequeue(struct intel_engine_cs *engine)
+static void __execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -627,11 +627,11 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		 * the HW to indicate that it has had a chance to respond.
 		 */
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-			return false;
+			return;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
 			inject_preempt_context(engine);
-			return false;
+			return;
 		}
 
 		/*
@@ -656,7 +656,7 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		 * priorities of the ports haven't been switch.
 		 */
 		if (port_count(&port[1]))
-			return false;
+			return;
 
 		/*
 		 * WaIdleLiteRestore:bdw,skl
@@ -756,8 +756,10 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		port != execlists->port ? rq_prio(last) : INT_MIN;
 
 	execlists->first = rb;
-	if (submit)
+	if (submit) {
 		port_assign(port, last);
+		execlists_submit_ports(engine);
+	}
 
 	/* We must always keep the beast fed if we have work piled up */
 	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
@@ -766,24 +768,19 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 	if (last)
 		execlists_user_begin(execlists, execlists->port);
 
-	return submit;
+	/* If the engine is now idle, so should be the flag; and vice versa. */
+	GEM_BUG_ON(execlists_is_active(&engine->execlists,
+				       EXECLISTS_ACTIVE_USER) ==
+		   !port_isset(engine->execlists.port));
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	unsigned long flags;
-	bool submit;
 
 	spin_lock_irqsave(&engine->timeline.lock, flags);
-	submit = __execlists_dequeue(engine);
+	__execlists_dequeue(engine);
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-	if (submit)
-		execlists_submit_ports(engine);
-
-	GEM_BUG_ON(port_isset(execlists->port) &&
-		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
 }
 
 void
@@ -1162,11 +1159,6 @@ static void execlists_submission_tasklet(unsigned long data)
 
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
-
-	/* If the engine is now idle, so should be the flag; and vice versa. */
-	GEM_BUG_ON(execlists_is_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_USER) ==
-		   !port_isset(engine->execlists.port));
 }
 
 static void queue_request(struct intel_engine_cs *engine,
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 15/19] drm/i915/execlists: Process one CSB interrupt at a time
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (12 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 14/19] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17  7:40 ` [PATCH 16/19] drm/i915/execlists: Unify CSB access pointers Chris Wilson
                   ` (7 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will process the CSB events directly from the CS
interrupt handler, being called for each interrupt. Hence, we will no
longer have the need for a loop until the has-interrupt bit is clear,
and in the meantime can remove that small optimisation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 274 +++++++++++++++----------------
 1 file changed, 135 insertions(+), 139 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6c050d553d4e..0c7108249244 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -964,6 +964,11 @@ static void process_csb(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
 	struct drm_i915_private *i915 = engine->i915;
+
+	/* The HWSP contains a (cacheable) mirror of the CSB */
+	const u32 *buf =
+		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+	unsigned int head, tail;
 	bool fw = false;
 
 	/*
@@ -972,164 +977,155 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	GEM_BUG_ON(!engine->i915->gt.awake);
 
-	do {
-		/* The HWSP contains a (cacheable) mirror of the CSB */
-		const u32 *buf =
-			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
-		unsigned int head, tail;
+	/* Clear before reading to catch new interrupts */
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+	smp_mb__after_atomic();
 
-		/* Clear before reading to catch new interrupts */
-		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		smp_mb__after_atomic();
+	if (unlikely(execlists->csb_use_mmio)) {
+		intel_uncore_forcewake_get(i915, execlists->fw_domains);
+		fw = true;
 
-		if (unlikely(execlists->csb_use_mmio)) {
-			if (!fw) {
-				intel_uncore_forcewake_get(i915, execlists->fw_domains);
-				fw = true;
-			}
+		buf = (u32 * __force)
+			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
 
-			buf = (u32 * __force)
-				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+		tail = GEN8_CSB_WRITE_PTR(head);
+		head = GEN8_CSB_READ_PTR(head);
+		execlists->csb_head = head;
+	} else {
+		const int write_idx =
+			intel_hws_csb_write_index(i915) -
+			I915_HWS_CSB_BUF0_INDEX;
 
-			head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-			tail = GEN8_CSB_WRITE_PTR(head);
-			head = GEN8_CSB_READ_PTR(head);
-			execlists->csb_head = head;
-		} else {
-			const int write_idx =
-				intel_hws_csb_write_index(i915) -
-				I915_HWS_CSB_BUF0_INDEX;
+		head = execlists->csb_head;
+		tail = READ_ONCE(buf[write_idx]);
+		rmb(); /* Hopefully paired with a wmb() in HW */
+	}
+	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
+		  engine->name,
+		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
 
-			head = execlists->csb_head;
-			tail = READ_ONCE(buf[write_idx]);
-			rmb(); /* Hopefully paired with a wmb() in HW */
-		}
-		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-			  engine->name,
-			  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-			  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+	while (head != tail) {
+		struct i915_request *rq;
+		unsigned int status;
+		unsigned int count;
 
-		while (head != tail) {
-			struct i915_request *rq;
-			unsigned int status;
-			unsigned int count;
+		if (++head == GEN8_CSB_ENTRIES)
+			head = 0;
 
-			if (++head == GEN8_CSB_ENTRIES)
-				head = 0;
+		/*
+		 * We are flying near dragons again.
+		 *
+		 * We hold a reference to the request in execlist_port[]
+		 * but no more than that. We are operating in softirq
+		 * context and so cannot hold any mutex or sleep. That
+		 * prevents us stopping the requests we are processing
+		 * in port[] from being retired simultaneously (the
+		 * breadcrumb will be complete before we see the
+		 * context-switch). As we only hold the reference to the
+		 * request, any pointer chasing underneath the request
+		 * is subject to a potential use-after-free. Thus we
+		 * store all of the bookkeeping within port[] as
+		 * required, and avoid using unguarded pointers beneath
+		 * request itself. The same applies to the atomic
+		 * status notifier.
+		 */
 
-			/*
-			 * We are flying near dragons again.
-			 *
-			 * We hold a reference to the request in execlist_port[]
-			 * but no more than that. We are operating in softirq
-			 * context and so cannot hold any mutex or sleep. That
-			 * prevents us stopping the requests we are processing
-			 * in port[] from being retired simultaneously (the
-			 * breadcrumb will be complete before we see the
-			 * context-switch). As we only hold the reference to the
-			 * request, any pointer chasing underneath the request
-			 * is subject to a potential use-after-free. Thus we
-			 * store all of the bookkeeping within port[] as
-			 * required, and avoid using unguarded pointers beneath
-			 * request itself. The same applies to the atomic
-			 * status notifier.
-			 */
+		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+			  engine->name, head,
+			  status, buf[2*head + 1],
+			  execlists->active);
+
+		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+			      GEN8_CTX_STATUS_PREEMPTED))
+			execlists_set_active(execlists,
+					     EXECLISTS_ACTIVE_HWACK);
+		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+			execlists_clear_active(execlists,
+					       EXECLISTS_ACTIVE_HWACK);
+
+		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+			continue;
 
-			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
-			GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
-				  engine->name, head,
-				  status, buf[2*head + 1],
-				  execlists->active);
-
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_HWACK);
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_HWACK);
-
-			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-				continue;
+		/* We should never get a COMPLETED | IDLE_ACTIVE! */
+		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			/* We should never get a COMPLETED | IDLE_ACTIVE! */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		if (status & GEN8_CTX_STATUS_COMPLETE &&
+		    buf[2*head + 1] == execlists->preempt_complete_status) {
+			GEM_TRACE("%s preempt-idle\n", engine->name);
+			complete_preempt_context(execlists);
+			continue;
+		}
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
-				GEM_TRACE("%s preempt-idle\n", engine->name);
-				complete_preempt_context(execlists);
-				continue;
-			}
+		if (status & GEN8_CTX_STATUS_PREEMPTED &&
+		    execlists_is_active(execlists,
+					EXECLISTS_ACTIVE_PREEMPT))
+			continue;
 
-			if (status & GEN8_CTX_STATUS_PREEMPTED &&
-			    execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_PREEMPT))
-				continue;
+		GEM_BUG_ON(!execlists_is_active(execlists,
+						EXECLISTS_ACTIVE_USER));
 
-			GEM_BUG_ON(!execlists_is_active(execlists,
-							EXECLISTS_ACTIVE_USER));
+		rq = port_unpack(port, &count);
+		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
+			  engine->name,
+			  port->context_id, count,
+			  rq ? rq->global_seqno : 0,
+			  rq ? rq->fence.context : 0,
+			  rq ? rq->fence.seqno : 0,
+			  intel_engine_get_seqno(engine),
+			  rq ? rq_prio(rq) : 0);
+
+		/* Check the context/desc id for this event matches */
+		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+		GEM_BUG_ON(count == 0);
+		if (--count == 0) {
+			/*
+			 * On the final event corresponding to the
+			 * submission of this context, we expect either
+			 * an element-switch event or a completion
+			 * event (and on completion, the active-idle
+			 * marker). No more preemptions, lite-restore
+			 * or otherwise.
+			 */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+			GEM_BUG_ON(port_isset(&port[1]) &&
+				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+			GEM_BUG_ON(!port_isset(&port[1]) &&
+				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 
-			rq = port_unpack(port, &count);
-			GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
-				  engine->name,
-				  port->context_id, count,
-				  rq ? rq->global_seqno : 0,
-				  rq ? rq->fence.context : 0,
-				  rq ? rq->fence.seqno : 0,
-				  intel_engine_get_seqno(engine),
-				  rq ? rq_prio(rq) : 0);
+			/*
+			 * We rely on the hardware being strongly
+			 * ordered, that the breadcrumb write is
+			 * coherent (visible from the CPU) before the
+			 * user interrupt and CSB is processed.
+			 */
+			GEM_BUG_ON(!i915_request_completed(rq));
 
-			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+			execlists_context_schedule_out(rq,
+						       INTEL_CONTEXT_SCHEDULE_OUT);
+			i915_request_put(rq);
 
-			GEM_BUG_ON(count == 0);
-			if (--count == 0) {
-				/*
-				 * On the final event corresponding to the
-				 * submission of this context, we expect either
-				 * an element-switch event or a completion
-				 * event (and on completion, the active-idle
-				 * marker). No more preemptions, lite-restore
-				 * or otherwise.
-				 */
-				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-				GEM_BUG_ON(port_isset(&port[1]) &&
-					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-				GEM_BUG_ON(!port_isset(&port[1]) &&
-					   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+			GEM_TRACE("%s completed ctx=%d\n",
+				  engine->name, port->context_id);
 
-				/*
-				 * We rely on the hardware being strongly
-				 * ordered, that the breadcrumb write is
-				 * coherent (visible from the CPU) before the
-				 * user interrupt and CSB is processed.
-				 */
-				GEM_BUG_ON(!i915_request_completed(rq));
-
-				execlists_context_schedule_out(rq,
-							       INTEL_CONTEXT_SCHEDULE_OUT);
-				i915_request_put(rq);
-
-				GEM_TRACE("%s completed ctx=%d\n",
-					  engine->name, port->context_id);
-
-				port = execlists_port_complete(execlists, port);
-				if (port_isset(port))
-					execlists_user_begin(execlists, port);
-				else
-					execlists_user_end(execlists);
-			} else {
-				port_set(port, port_pack(rq, count));
-			}
+			port = execlists_port_complete(execlists, port);
+			if (port_isset(port))
+				execlists_user_begin(execlists, port);
+			else
+				execlists_user_end(execlists);
+		} else {
+			port_set(port, port_pack(rq, count));
 		}
+	}
 
-		if (head != execlists->csb_head) {
-			execlists->csb_head = head;
-			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-			       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-		}
-	} while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+	if (head != execlists->csb_head) {
+		execlists->csb_head = head;
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+	}
 
 	if (unlikely(fw))
 		intel_uncore_forcewake_put(i915, execlists->fw_domains);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 16/19] drm/i915/execlists: Unify CSB access pointers
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (13 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 15/19] drm/i915/execlists: Process one CSB interrupt at a time Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17  7:40 ` [PATCH 17/19] drm/i915/execlists: Process the CSB directly from inside the irq handler Chris Wilson
                   ` (6 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

Following the removal of the last workarounds, the only CSB mmio access
is for the old vGPU interface. The mmio registers presented by vGPU do
not require forcewake and can be treated as ordinary volatile memory,
i.e. they behave just like the HWSP access just at a different location.
We can reduce the CSB access inside the irq handler to a set of
read/write/buffer pointers and treat the various paths identically and
not worry about forcewake.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
 drivers/gpu/drm/i915/intel_lrc.c        | 110 ++++++++++--------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
 3 files changed, 62 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index b1a1ca0758ce..ad6508e991f3 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -25,7 +25,6 @@
 #include <drm/drm_print.h>
 
 #include "i915_drv.h"
-#include "i915_vgpu.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
@@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 	i915_gem_batch_pool_init(&engine->batch_pool, engine);
 }
 
-static bool csb_force_mmio(struct drm_i915_private *i915)
-{
-	/* Older GVT emulation depends upon intercepting CSB mmio */
-	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
-		return true;
-
-	return false;
-}
-
 static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
-	execlists->csb_use_mmio = csb_force_mmio(engine->i915);
-
 	execlists->port_mask = 1;
 	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0c7108249244..6701778c7dc1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_workarounds.h"
@@ -963,13 +964,8 @@ static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
-	struct drm_i915_private *i915 = engine->i915;
-
-	/* The HWSP contains a (cacheable) mirror of the CSB */
-	const u32 *buf =
-		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+	const u32 * const buf = execlists->csb_status;
 	unsigned int head, tail;
-	bool fw = false;
 
 	/*
 	 * We must never release our device wakeref until after we have
@@ -981,32 +977,16 @@ static void process_csb(struct intel_engine_cs *engine)
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 	smp_mb__after_atomic();
 
-	if (unlikely(execlists->csb_use_mmio)) {
-		intel_uncore_forcewake_get(i915, execlists->fw_domains);
-		fw = true;
-
-		buf = (u32 * __force)
-			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-
-		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-		tail = GEN8_CSB_WRITE_PTR(head);
-		head = GEN8_CSB_READ_PTR(head);
-		execlists->csb_head = head;
-	} else {
-		const int write_idx =
-			intel_hws_csb_write_index(i915) -
-			I915_HWS_CSB_BUF0_INDEX;
+	/* Note that csb_write, csb_status may be either in HWSP or mmio */
+	head = execlists->csb_head;
+	tail = READ_ONCE(*execlists->csb_write);
+	GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+	if (unlikely(head == tail))
+		return;
 
-		head = execlists->csb_head;
-		tail = READ_ONCE(buf[write_idx]);
-		rmb(); /* Hopefully paired with a wmb() in HW */
-	}
-	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-		  engine->name,
-		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+	rmb(); /* Hopefully paired with a wmb() in HW */
 
-	while (head != tail) {
+	do {
 		struct i915_request *rq;
 		unsigned int status;
 		unsigned int count;
@@ -1119,16 +1099,11 @@ static void process_csb(struct intel_engine_cs *engine)
 		} else {
 			port_set(port, port_pack(rq, count));
 		}
-	}
-
-	if (head != execlists->csb_head) {
-		execlists->csb_head = head;
-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-	}
+	} while (head != tail);
 
-	if (unlikely(fw))
-		intel_uncore_forcewake_put(i915, execlists->fw_domains);
+	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+	       execlists->csb_read);
+	execlists->csb_head = head;
 }
 
 /*
@@ -2365,28 +2340,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 static void
 logical_ring_setup(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	enum forcewake_domains fw_domains;
-
 	intel_engine_setup_common(engine);
 
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
 
-	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
-						    RING_ELSP(engine),
-						    FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_PTR(engine),
-						     FW_REG_READ | FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_BUF_BASE(engine),
-						     FW_REG_READ);
-
-	engine->execlists.fw_domains = fw_domains;
-
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
 
@@ -2394,34 +2352,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	logical_ring_default_irqs(engine);
 }
 
+static bool csb_force_mmio(struct drm_i915_private *i915)
+{
+	/* Older GVT emulation depends upon intercepting CSB mmio */
+	return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
+}
+
 static int logical_ring_init(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
 	int ret;
 
 	ret = intel_engine_init_common(engine);
 	if (ret)
 		goto error;
 
-	if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
-		engine->execlists.submit_reg = engine->i915->regs +
+	if (HAS_LOGICAL_RING_ELSQ(i915)) {
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
-		engine->execlists.ctrl_reg = engine->i915->regs +
+		execlists->ctrl_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
 	} else {
-		engine->execlists.submit_reg = engine->i915->regs +
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_ELSP(engine));
 	}
 
-	engine->execlists.preempt_complete_status = ~0u;
-	if (engine->i915->preempt_context) {
+	execlists->preempt_complete_status = ~0u;
+	if (i915->preempt_context) {
 		struct intel_context *ce =
-			to_intel_context(engine->i915->preempt_context, engine);
+			to_intel_context(i915->preempt_context, engine);
 
-		engine->execlists.preempt_complete_status =
+		execlists->preempt_complete_status =
 			upper_32_bits(ce->lrc_desc);
 	}
 
-	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_read =
+		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+	if (csb_force_mmio(i915)) {
+		execlists->csb_status =
+			i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+
+		execlists->csb_write = execlists->csb_read;
+	} else {
+		execlists->csb_status =
+			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+
+		execlists->csb_write =
+			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4ad9c5842575..0a96088f522f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -299,19 +299,30 @@ struct intel_engine_execlists {
 	struct rb_node *first;
 
 	/**
-	 * @fw_domains: forcewake domains for irq tasklet
+	 * @csb_head: context status buffer head
 	 */
-	unsigned int fw_domains;
+	unsigned int csb_head;
 
 	/**
-	 * @csb_head: context status buffer head
+	 * @csb_read: control register for Context Switch buffer
+	 *
+	 * Note this register is always in mmio.
 	 */
-	unsigned int csb_head;
+	u32 *csb_read;
 
 	/**
-	 * @csb_use_mmio: access csb through mmio, instead of hwsp
+	 * @csb_write: control register for Context Switch buffer
+	 *
+	 * Note this register may be either mmio or HWSP shadow.
+	 */
+	u32 *csb_write;
+
+	/**
+	 * @csb_status: status array for Context Switch buffer
+	 *
+	 * Note these register may be either mmio or HWSP shadow.
 	 */
-	bool csb_use_mmio;
+	u32 *csb_status;
 
 	/**
 	 * @preempt_complete_status: expected CSB upon completing preemption
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 17/19] drm/i915/execlists: Process the CSB directly from inside the irq handler
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (14 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 16/19] drm/i915/execlists: Unify CSB access pointers Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17  7:40 ` [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd) Chris Wilson
                   ` (5 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

As we now only use the cached HWSP access to read the CSB buffer and no
longer use any forcewaked mmio, processing the CSB is fast and possible
to do so directly from inside the CS interrupt handler.

We have to rearrange the irq handler slightly as we wish to preserve the
single threaded access to the event buffer and ELSP, so we need to
perform the processing with the master interrupt still disabled to avoid
concurrent CS interrupts on other CPUs. We also have to ensure that no
one else is updating the execlist->port[] simultaneously (i.e.
execlists_dequeue) and so encompass ourselves inside the
engine->timeline.lock (although we may lift this restriction in the
future by separating the two phases to operate on distinct port[]), and
similarly must flush the interrupt handler and disable further processing
during a reset.

References: https://bugs.freedesktop.org/show_bug.cgi?id=106373
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.h         |  5 +++
 drivers/gpu/drm/i915/i915_irq.c         | 14 +++-----
 drivers/gpu/drm/i915/intel_engine_cs.c  |  8 ++---
 drivers/gpu/drm/i915/intel_lrc.c        | 45 +++++++++++--------------
 drivers/gpu/drm/i915/intel_lrc.h        |  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 6 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 5bf24cfc218c..5b098acec566 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -79,4 +79,9 @@ static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
 		tasklet_unlock_wait(t);
 }
 
+static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
+{
+	return likely(!atomic_read(&t->count));
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 460878572515..3f139ff64385 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1462,13 +1462,11 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	bool tasklet = false;
 
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
-		if (READ_ONCE(engine->execlists.active))
-			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
-						    &engine->irq_posted);
+		intel_engine_handle_execlists_irq(engine);
+		tasklet = true;
 	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
@@ -1479,7 +1477,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 
 	if (tasklet)
-		tasklet_hi_schedule(&execlists->tasklet);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
@@ -2165,6 +2163,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		I915_WRITE(VLV_IER, 0);
 
 		gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
+		gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
 
 		if (iir & I915_DISPLAY_PORT_INTERRUPT)
 			hotplug_status = i9xx_hpd_irq_ack(dev_priv);
@@ -2189,8 +2188,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
 		POSTING_READ(GEN8_MASTER_IRQ);
 
-		gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
-
 		if (hotplug_status)
 			i9xx_hpd_irq_handler(dev_priv, hotplug_status);
 
@@ -2761,6 +2758,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 
 	/* Find, clear, then process each source of interrupt */
 	gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
+	gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	if (master_ctl & ~GEN8_GT_IRQS) {
@@ -2771,8 +2769,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 
 	I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
 
-	gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
-
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ad6508e991f3..7165a3d21443 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1297,12 +1297,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		read = GEN8_CSB_READ_PTR(ptr);
 		write = GEN8_CSB_WRITE_PTR(ptr);
-		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
+		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
 			   read, execlists->csb_head,
 			   write,
 			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
-			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
-					  &engine->irq_posted)),
 			   yesno(test_bit(TASKLET_STATE_SCHED,
 					  &engine->execlists.tasklet.state)),
 			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
@@ -1483,11 +1481,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock(&b->rb_lock);
 	local_irq_restore(flags);
 
-	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
+	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
 		   engine->irq_posted,
 		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-				  &engine->irq_posted)),
-		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
 				  &engine->irq_posted)));
 
 	drm_printf(m, "HWSP:\n");
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6701778c7dc1..954eb3a71051 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -567,8 +567,10 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 {
 	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
 
+	__unwind_incomplete_requests(container_of(execlists,
+						  typeof(struct intel_engine_cs),
+						  execlists));
 	execlists_cancel_port_requests(execlists);
-	execlists_unwind_incomplete_requests(execlists);
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
@@ -880,14 +882,6 @@ static void reset_irq(struct intel_engine_cs *engine)
 	synchronize_hardirq(engine->i915->drm.irq);
 
 	clear_gtiir(engine);
-
-	/*
-	 * The port is checked prior to scheduling a tasklet, but
-	 * just in case we have suspended the tasklet to do the
-	 * wedging make sure that when it wakes, it decides there
-	 * is no work to do by clearing the irq_posted bit.
-	 */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -960,7 +954,13 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	local_irq_restore(flags);
 }
 
-static void process_csb(struct intel_engine_cs *engine)
+static inline bool
+reset_in_progress(const struct intel_engine_execlists *execlists)
+{
+	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
+}
+
+void intel_engine_handle_execlists_irq(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -973,9 +973,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	GEM_BUG_ON(!engine->i915->gt.awake);
 
-	/* Clear before reading to catch new interrupts */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	smp_mb__after_atomic();
+	if (reset_in_progress(execlists))
+		return;
 
 	/* Note that csb_write, csb_status may be either in HWSP or mmio */
 	head = execlists->csb_head;
@@ -986,6 +985,8 @@ static void process_csb(struct intel_engine_cs *engine)
 
 	rmb(); /* Hopefully paired with a wmb() in HW */
 
+	spin_lock(&engine->timeline.lock);
+
 	do {
 		struct i915_request *rq;
 		unsigned int status;
@@ -1104,6 +1105,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
 	       execlists->csb_read);
 	execlists->csb_head = head;
+
+	spin_unlock(&engine->timeline.lock);
 }
 
 /*
@@ -1114,19 +1117,10 @@ static void execlists_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 
-	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
+	GEM_TRACE("%s awake?=%d, active=%x\n",
 		  engine->name,
 		  engine->i915->gt.awake,
-		  engine->execlists.active,
-		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
-
-	/*
-	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
-	 * imposing the cost of a locked atomic transaction when submitting a
-	 * new request (outside of the context-switch interrupt).
-	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
+		  engine->execlists.active);
 
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
@@ -1836,8 +1830,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * and avoid blaming an innocent request if the stall was due to the
 	 * preemption itself.
 	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
+	synchronize_hardirq(engine->i915->drm.irq);
 
 	/*
 	 * The last active request can then be no later than the last request
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194e930c..772ca3993e51 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -102,6 +102,7 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
 struct drm_i915_private;
 struct i915_gem_context;
 
+void intel_engine_handle_execlists_irq(struct intel_engine_cs *engine);
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0a96088f522f..42a136810e15 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -358,7 +358,6 @@ struct intel_engine_cs {
 	atomic_t irq_count;
 	unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
-#define ENGINE_IRQ_EXECLIST 1
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (15 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 17/19] drm/i915/execlists: Process the CSB directly from inside the irq handler Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17 13:13   ` Tvrtko Ursulin
  2018-05-17  7:40 ` [PATCH 19/19] drm/i915: Combine gt irq ack/handlers Chris Wilson
                   ` (4 subsequent siblings)
  21 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a
bottom half"), we came to the conclusion that running our CSB processing
and ELSP submission from inside the irq handler was a bad idea. A really
bad idea as we could impose nearly 1s latency on other users of the
system, on average! Deferring our work to a tasklet allowed us to do the
processing with irqs enabled, reducing the impact to an average of about
50us.

We have since eradicated the use of forcewaked mmio from inside the CSB
processing and ELSP submission, bringing the impact down to around 5us
(on Kabylake); an order of magnitude better than our measurements 2
years ago on Broadwell and only about 2x worse on average than the
gem_syslatency on an unladen system.

Comparing the impact on the maximum latency observed over a 120s interval,
repeated several times (using gem_syslatency, similar to RT's cyclictest)
while the system is fully laden with i915 nops, we see that direct
submission definitely worsens the response but not to the same outlandish
degree as before.

x Unladen baseline
+ Using tasklet
* Direct submission

+------------------------------------------------------------------------+
|xx x          ++    +++ +                           *  * *   ** *** *  *|
||A|              |__AM__|                               |_____A_M___|   |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  10             5            18            10           9.3     3.6530049
+  10            72           120           108         102.9     15.758243
*  10           255           348           316         305.7      28.74814

And with a background load

+------------------------------------------------------------------------+
|x                          +           *              *                 |
|x                    +     + + + +  + +* * ** ++      * *   *          *|
|A                        |_______A_____|__|_______A___M______|          |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  10             4            11             9           8.5     2.1730675
+  10           633          1388           972           993     243.33744
*  10          1152          2109          1608        1488.3     314.80719

References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c  | 11 ++------
 drivers/gpu/drm/i915/intel_lrc.c | 44 +++++++++++++++++---------------
 2 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3f139ff64385..8b61ebf5cb4a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1462,22 +1462,15 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
-	bool tasklet = false;
-
-	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
 		intel_engine_handle_execlists_irq(engine);
-		tasklet = true;
-	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
 		if (intel_engine_uses_guc(engine))
-			tasklet = true;
+			tasklet_hi_schedule(&engine->execlists.tasklet);
 
 		notify_ring(engine);
 	}
-
-	if (tasklet)
-		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 954eb3a71051..37839d89e03a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -575,7 +575,7 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
-static void __execlists_dequeue(struct intel_engine_cs *engine)
+static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -587,7 +587,11 @@ static void __execlists_dequeue(struct intel_engine_cs *engine)
 
 	lockdep_assert_held(&engine->timeline.lock);
 
-	/* Hardware submission is through 2 ports. Conceptually each port
+	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+		return;
+
+	/*
+	 * Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
 	 * static for a context, and unique to each, so we only execute
 	 * requests belonging to a single context from each ring. RING_HEAD
@@ -777,15 +781,6 @@ static void __execlists_dequeue(struct intel_engine_cs *engine)
 		   !port_isset(engine->execlists.port));
 }
 
-static void execlists_dequeue(struct intel_engine_cs *engine)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	__execlists_dequeue(engine);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
@@ -1106,6 +1101,7 @@ void intel_engine_handle_execlists_irq(struct intel_engine_cs *engine)
 	       execlists->csb_read);
 	execlists->csb_head = head;
 
+	execlists_dequeue(engine);
 	spin_unlock(&engine->timeline.lock);
 }
 
@@ -1122,8 +1118,9 @@ static void execlists_submission_tasklet(unsigned long data)
 		  engine->i915->gt.awake,
 		  engine->execlists.active);
 
-	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
-		execlists_dequeue(engine);
+	spin_lock_irq(&engine->timeline.lock);
+	execlists_dequeue(engine);
+	spin_unlock_irq(&engine->timeline.lock);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -1134,16 +1131,20 @@ static void queue_request(struct intel_engine_cs *engine,
 		      &lookup_priolist(engine, prio)->requests);
 }
 
-static void __submit_queue(struct intel_engine_cs *engine, int prio)
+static void __update_queue(struct intel_engine_cs *engine, int prio)
 {
 	engine->execlists.queue_priority = prio;
-	tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
-	if (prio > engine->execlists.queue_priority)
-		__submit_queue(engine, prio);
+	if (prio > engine->execlists.queue_priority) {
+		__update_queue(engine, prio);
+		if (!intel_engine_uses_guc(engine))
+			execlists_dequeue(engine);
+		else
+			tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1155,11 +1156,12 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	queue_request(engine, &request->sched, rq_prio(request));
-	submit_queue(engine, rq_prio(request));
 
 	GEM_BUG_ON(!engine->execlists.first);
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
+	submit_queue(engine, rq_prio(request));
+
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
@@ -1286,8 +1288,10 @@ static void execlists_schedule(struct i915_request *request,
 		}
 
 		if (prio > engine->execlists.queue_priority &&
-		    i915_sw_fence_done(&sched_to_request(node)->submit))
-			__submit_queue(engine, prio);
+		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
+			__update_queue(engine, prio);
+			tasklet_hi_schedule(&engine->execlists.tasklet);
+		}
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 19/19] drm/i915: Combine gt irq ack/handlers
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (16 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd) Chris Wilson
@ 2018-05-17  7:40 ` Chris Wilson
  2018-05-17  8:01 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/19] drm/i915: Move request->ctx aside Patchwork
                   ` (3 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17  7:40 UTC (permalink / raw)
  To: intel-gfx

Having abandoned the split approach of acking then handling the GT irqs
(sacrificed to use the interrupt handler to guaranteed exclusive access
to the irq data), pull the two routines into one to let the compiler
eliminate the redundant storage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 87 ++++++++++++++-------------------
 1 file changed, 37 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8b61ebf5cb4a..e6c4007ecfd4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1473,10 +1473,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 }
 
-static void gen8_gt_irq_ack(struct drm_i915_private *i915,
-			    u32 master_ctl, u32 gt_iir[4])
+static void gen8_gt_irq_handler(struct drm_i915_private *i915, u32 master_ctl)
 {
 	void __iomem * const regs = i915->regs;
+	u32 iir;
 
 #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
 		      GEN8_GT_BCS_IRQ | \
@@ -1487,58 +1487,49 @@ static void gen8_gt_irq_ack(struct drm_i915_private *i915,
 		      GEN8_GT_GUC_IRQ)
 
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0));
-		if (likely(gt_iir[0]))
-			raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]);
+		iir = raw_reg_read(regs, GEN8_GT_IIR(0));
+		if (likely(iir)) {
+			raw_reg_write(regs, GEN8_GT_IIR(0), iir);
+
+			gen8_cs_irq_handler(i915->engine[RCS],
+					    iir >> GEN8_RCS_IRQ_SHIFT);
+			gen8_cs_irq_handler(i915->engine[BCS],
+					    iir >> GEN8_BCS_IRQ_SHIFT);
+		}
 	}
 
 	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
-		gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1));
-		if (likely(gt_iir[1]))
-			raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]);
-	}
-
-	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
-		gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2));
-		if (likely(gt_iir[2] & (i915->pm_rps_events |
-					i915->pm_guc_events)))
-			raw_reg_write(regs, GEN8_GT_IIR(2),
-				      gt_iir[2] & (i915->pm_rps_events |
-						   i915->pm_guc_events));
+		iir = raw_reg_read(regs, GEN8_GT_IIR(1));
+		if (likely(iir)) {
+			raw_reg_write(regs, GEN8_GT_IIR(1), iir);
+
+			gen8_cs_irq_handler(i915->engine[VCS],
+					    iir >> GEN8_VCS1_IRQ_SHIFT);
+			gen8_cs_irq_handler(i915->engine[VCS2],
+					    iir >> GEN8_VCS2_IRQ_SHIFT);
+		}
 	}
 
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3));
-		if (likely(gt_iir[3]))
-			raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]);
-	}
-}
+		iir = raw_reg_read(regs, GEN8_GT_IIR(3));
+		if (likely(iir)) {
+			raw_reg_write(regs, GEN8_GT_IIR(3), iir);
 
-static void gen8_gt_irq_handler(struct drm_i915_private *i915,
-				u32 master_ctl, u32 gt_iir[4])
-{
-	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		gen8_cs_irq_handler(i915->engine[RCS],
-				    gt_iir[0] >> GEN8_RCS_IRQ_SHIFT);
-		gen8_cs_irq_handler(i915->engine[BCS],
-				    gt_iir[0] >> GEN8_BCS_IRQ_SHIFT);
-	}
-
-	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
-		gen8_cs_irq_handler(i915->engine[VCS],
-				    gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT);
-		gen8_cs_irq_handler(i915->engine[VCS2],
-				    gt_iir[1] >> GEN8_VCS2_IRQ_SHIFT);
-	}
-
-	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		gen8_cs_irq_handler(i915->engine[VECS],
-				    gt_iir[3] >> GEN8_VECS_IRQ_SHIFT);
+			gen8_cs_irq_handler(i915->engine[VECS],
+					    iir >> GEN8_VECS_IRQ_SHIFT);
+		}
 	}
 
 	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
-		gen6_rps_irq_handler(i915, gt_iir[2]);
-		gen9_guc_irq_handler(i915, gt_iir[2]);
+		iir = raw_reg_read(regs, GEN8_GT_IIR(2));
+		if (likely(iir & (i915->pm_rps_events | i915->pm_guc_events))) {
+			raw_reg_write(regs, GEN8_GT_IIR(2),
+				      iir & (i915->pm_rps_events |
+					     i915->pm_guc_events));
+
+			gen6_rps_irq_handler(i915, iir);
+			gen9_guc_irq_handler(i915, iir);
+		}
 	}
 }
 
@@ -2127,7 +2118,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		u32 master_ctl, iir;
 		u32 pipe_stats[I915_MAX_PIPES] = {};
 		u32 hotplug_status = 0;
-		u32 gt_iir[4];
 		u32 ier = 0;
 
 		master_ctl = I915_READ(GEN8_MASTER_IRQ) & ~GEN8_MASTER_IRQ_CONTROL;
@@ -2155,8 +2145,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		ier = I915_READ(VLV_IER);
 		I915_WRITE(VLV_IER, 0);
 
-		gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
-		gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
+		gen8_gt_irq_handler(dev_priv, master_ctl);
 
 		if (iir & I915_DISPLAY_PORT_INTERRUPT)
 			hotplug_status = i9xx_hpd_irq_ack(dev_priv);
@@ -2737,7 +2726,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 {
 	struct drm_i915_private *dev_priv = to_i915(arg);
 	u32 master_ctl;
-	u32 gt_iir[4];
 
 	if (!intel_irqs_enabled(dev_priv))
 		return IRQ_NONE;
@@ -2750,8 +2738,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 	I915_WRITE_FW(GEN8_MASTER_IRQ, 0);
 
 	/* Find, clear, then process each source of interrupt */
-	gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
-	gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
+	gen8_gt_irq_handler(dev_priv, master_ctl);
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	if (master_ctl & ~GEN8_GT_IRQS) {
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/19] drm/i915: Move request->ctx aside
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (17 preceding siblings ...)
  2018-05-17  7:40 ` [PATCH 19/19] drm/i915: Combine gt irq ack/handlers Chris Wilson
@ 2018-05-17  8:01 ` Patchwork
  2018-05-17  8:06 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  21 siblings, 0 replies; 42+ messages in thread
From: Patchwork @ 2018-05-17  8:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/19] drm/i915: Move request->ctx aside
URL   : https://patchwork.freedesktop.org/series/43307/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
61b8422e1e2d drm/i915: Move request->ctx aside
2bb24771dc50 drm/i915: Move fiddling with engine->last_retired_context
e413648660a8 drm/i915: Store a pointer to intel_context in i915_request
cdb7874e5959 drm/i915: Pull the context->pin_count dec into the common intel_context_unpin
881c9a393191 drm/i915: Be irqsafe inside reset
7f53d8f97784 drm/i915: Make intel_engine_dump irqsafe
22e514de28e8 drm/i915/execlists: Handle copying default context state for atomic reset
-:17: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#17: 
References: 5692251c254a ("drm/i915/lrc: Scrub the GPU state of the guilty hanging request")

-:17: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 5692251c254a ("drm/i915/lrc: Scrub the GPU state of the guilty hanging request")'
#17: 
References: 5692251c254a ("drm/i915/lrc: Scrub the GPU state of the guilty hanging request")

total: 1 errors, 1 warnings, 0 checks, 55 lines checked
4510e41550db drm/i915: Allow init_breadcrumbs to be used from irq context
90e61963b796 drm/i915/execlists: HWACK checking superseded checking port[0].count
838e29f5244a drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler
1a251707029f drm/i915/execlists: Double check rpm wakeref
3a17d4cca75a drm/i915: After reset on sanitization, reset the engine backends
17100b91e099 drm/i915/execlists: Reset the CSB head tracking on reset/sanitization
-:41: WARNING:LONG_LINE: line over 100 characters
#41: FILE: drivers/gpu/drm/i915/intel_lrc.c:995:
+				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));

total: 0 errors, 1 warnings, 0 checks, 52 lines checked
4fc2d904fb93 drm/i915/execlists: Pull submit after dequeue under timeline lock
97b89d846cee drm/i915/execlists: Process one CSB interrupt at a time
-:40: WARNING:MEMORY_BARRIER: memory barrier without comment
#40: FILE: drivers/gpu/drm/i915/intel_lrc.c:982:
+	smp_mb__after_atomic();

-:82: WARNING:LONG_LINE: line over 100 characters
#82: FILE: drivers/gpu/drm/i915/intel_lrc.c:1006:
+		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",

-:83: WARNING:LONG_LINE: line over 100 characters
#83: FILE: drivers/gpu/drm/i915/intel_lrc.c:1007:
+		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");

-:145: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#145: FILE: drivers/gpu/drm/i915/intel_lrc.c:1038:
+			  status, buf[2*head + 1],
 			               ^

-:181: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#181: FILE: drivers/gpu/drm/i915/intel_lrc.c:1056:
+		    buf[2*head + 1] == execlists->preempt_complete_status) {
 		         ^

total: 0 errors, 3 warnings, 2 checks, 305 lines checked
0764078e971c drm/i915/execlists: Unify CSB access pointers
c3ad3709a86e drm/i915/execlists: Process the CSB directly from inside the irq handler
9af36d245179 drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
-:52: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#52: 
References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")

-:52: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")'
#52: 
References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")

total: 1 errors, 1 warnings, 0 checks, 126 lines checked
caac3766a115 drm/i915: Combine gt irq ack/handlers

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/19] drm/i915: Move request->ctx aside
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (18 preceding siblings ...)
  2018-05-17  8:01 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/19] drm/i915: Move request->ctx aside Patchwork
@ 2018-05-17  8:06 ` Patchwork
  2018-05-17  8:16 ` ✓ Fi.CI.BAT: success " Patchwork
  2018-05-17 11:05 ` ✗ Fi.CI.IGT: failure " Patchwork
  21 siblings, 0 replies; 42+ messages in thread
From: Patchwork @ 2018-05-17  8:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/19] drm/i915: Move request->ctx aside
URL   : https://patchwork.freedesktop.org/series/43307/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Move request->ctx aside
Okay!

Commit: drm/i915: Move fiddling with engine->last_retired_context
Okay!

Commit: drm/i915: Store a pointer to intel_context in i915_request
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3663:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3664:16: warning: expression using sizeof(void)

Commit: drm/i915: Pull the context->pin_count dec into the common intel_context_unpin
Okay!

Commit: drm/i915: Be irqsafe inside reset
Okay!

Commit: drm/i915: Make intel_engine_dump irqsafe
Okay!

Commit: drm/i915/execlists: Handle copying default context state for atomic reset
Okay!

Commit: drm/i915: Allow init_breadcrumbs to be used from irq context
Okay!

Commit: drm/i915/execlists: HWACK checking superseded checking port[0].count
Okay!

Commit: drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler
-O:drivers/gpu/drm/i915/intel_ringbuffer.h:656:23: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/intel_ringbuffer.h:656:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_ringbuffer.h:663:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_ringbuffer.h:663:23: warning: expression using sizeof(void)

Commit: drm/i915/execlists: Double check rpm wakeref
Okay!

Commit: drm/i915: After reset on sanitization, reset the engine backends
Okay!

Commit: drm/i915/execlists: Reset the CSB head tracking on reset/sanitization
Okay!

Commit: drm/i915/execlists: Pull submit after dequeue under timeline lock
Okay!

Commit: drm/i915/execlists: Process one CSB interrupt at a time
Okay!

Commit: drm/i915/execlists: Unify CSB access pointers
+drivers/gpu/drm/i915/intel_lrc.c:1105:25:    expected void volatile [noderef] <asn:2>*addr
+drivers/gpu/drm/i915/intel_lrc.c:1105:25:    got unsigned int [usertype] *csb_read
+drivers/gpu/drm/i915/intel_lrc.c:1105:25: warning: incorrect type in argument 2 (different address spaces)
+drivers/gpu/drm/i915/intel_lrc.c:2391:29:    expected unsigned int [usertype] *csb_read
+drivers/gpu/drm/i915/intel_lrc.c:2391:29:    got void [noderef] <asn:2>*
+drivers/gpu/drm/i915/intel_lrc.c:2391:29: warning: incorrect type in assignment (different address spaces)
+drivers/gpu/drm/i915/intel_lrc.c:2394:39:    expected unsigned int [usertype] *csb_status
+drivers/gpu/drm/i915/intel_lrc.c:2394:39:    got void [noderef] <asn:2>*
+drivers/gpu/drm/i915/intel_lrc.c:2394:39: warning: incorrect type in assignment (different address spaces)

Commit: drm/i915/execlists: Process the CSB directly from inside the irq handler
-O:drivers/gpu/drm/i915/intel_lrc.c:1105:25:    expected void volatile [noderef] <asn:2>*addr
-O:drivers/gpu/drm/i915/intel_lrc.c:1105:25:    got unsigned int [usertype] *csb_read
-O:drivers/gpu/drm/i915/intel_lrc.c:1105:25: warning: incorrect type in argument 2 (different address spaces)
+drivers/gpu/drm/i915/intel_lrc.c:1106:25:    expected void volatile [noderef] <asn:2>*addr
+drivers/gpu/drm/i915/intel_lrc.c:1106:25:    got unsigned int [usertype] *csb_read
+drivers/gpu/drm/i915/intel_lrc.c:1106:25: warning: incorrect type in argument 2 (different address spaces)

Commit: drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
-O:drivers/gpu/drm/i915/intel_lrc.c:1106:25:    expected void volatile [noderef] <asn:2>*addr
-O:drivers/gpu/drm/i915/intel_lrc.c:1106:25:    got unsigned int [usertype] *csb_read
-O:drivers/gpu/drm/i915/intel_lrc.c:1106:25: warning: incorrect type in argument 2 (different address spaces)
+drivers/gpu/drm/i915/intel_lrc.c:1101:25:    expected void volatile [noderef] <asn:2>*addr
+drivers/gpu/drm/i915/intel_lrc.c:1101:25:    got unsigned int [usertype] *csb_read
+drivers/gpu/drm/i915/intel_lrc.c:1101:25: warning: incorrect type in argument 2 (different address spaces)

Commit: drm/i915: Combine gt irq ack/handlers
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/19] drm/i915: Move request->ctx aside
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (19 preceding siblings ...)
  2018-05-17  8:06 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-05-17  8:16 ` Patchwork
  2018-05-17 11:05 ` ✗ Fi.CI.IGT: failure " Patchwork
  21 siblings, 0 replies; 42+ messages in thread
From: Patchwork @ 2018-05-17  8:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/19] drm/i915: Move request->ctx aside
URL   : https://patchwork.freedesktop.org/series/43307/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4195 -> Patchwork_9025 =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9025 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9025, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/43307/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9025:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_gttfill@basic:
      fi-pnv-d510:        SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_9025 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_mmap_gtt@basic-small-bo-tiledx:
      fi-gdg-551:         PASS -> FAIL (fdo#102575)

    igt@kms_pipe_crc_basic@read-crc-pipe-c:
      fi-skl-guc:         PASS -> FAIL (fdo#103191, fdo#104724)

    
    ==== Possible fixes ====

    igt@kms_flip@basic-flip-vs-wf_vblank:
      fi-glk-j4005:       FAIL (fdo#100368) -> PASS
      fi-skl-6770hq:      FAIL (fdo#100368) -> PASS

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
      fi-bxt-dsi:         INCOMPLETE (fdo#103927) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724


== Participating hosts (40 -> 37) ==

  Additional (1): fi-snb-2520m 
  Missing    (4): fi-ctg-p8600 fi-ilk-m540 fi-bsw-cyan fi-skl-6700hq 


== Build changes ==

    * Linux: CI_DRM_4195 -> Patchwork_9025

  CI_DRM_4195: d39f8e221f7d4187e7948b6af02a1f45110e817c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4487: eccae1360d6d01e73c6af2bd97122cef708207ef @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9025: caac3766a115f814cca90e6cd47abcc27243dff1 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4487: 6ab75f7eb5e1dccbb773e1739beeb2d7cbd6ad0d @ git://anongit.freedesktop.org/piglit


== Linux commits ==

caac3766a115 drm/i915: Combine gt irq ack/handlers
9af36d245179 drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
c3ad3709a86e drm/i915/execlists: Process the CSB directly from inside the irq handler
0764078e971c drm/i915/execlists: Unify CSB access pointers
97b89d846cee drm/i915/execlists: Process one CSB interrupt at a time
4fc2d904fb93 drm/i915/execlists: Pull submit after dequeue under timeline lock
17100b91e099 drm/i915/execlists: Reset the CSB head tracking on reset/sanitization
3a17d4cca75a drm/i915: After reset on sanitization, reset the engine backends
1a251707029f drm/i915/execlists: Double check rpm wakeref
838e29f5244a drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler
90e61963b796 drm/i915/execlists: HWACK checking superseded checking port[0].count
4510e41550db drm/i915: Allow init_breadcrumbs to be used from irq context
22e514de28e8 drm/i915/execlists: Handle copying default context state for atomic reset
7f53d8f97784 drm/i915: Make intel_engine_dump irqsafe
881c9a393191 drm/i915: Be irqsafe inside reset
cdb7874e5959 drm/i915: Pull the context->pin_count dec into the common intel_context_unpin
e413648660a8 drm/i915: Store a pointer to intel_context in i915_request
2bb24771dc50 drm/i915: Move fiddling with engine->last_retired_context
61b8422e1e2d drm/i915: Move request->ctx aside

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9025/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 04/19] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin
  2018-05-17  7:40 ` [PATCH 04/19] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin Chris Wilson
@ 2018-05-17 10:20   ` Tvrtko Ursulin
  2018-05-17 10:35     ` Chris Wilson
  0 siblings, 1 reply; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 10:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 08:40, Chris Wilson wrote:
> As all backends implement the same pin_count mechanism and do a
> dec-and-test as their first step, pull that into the common
> intel_context_unpin(). This also pulls into the caller, eliminating the
> indirect call in the usual steady state case. The intel_context_pin()
> side is a little more complicated as it combines the lookup/alloc as
> well as pinning the state, and so is left for a later date.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.h      |  4 ++++
>   drivers/gpu/drm/i915/intel_lrc.c             | 13 +------------
>   drivers/gpu/drm/i915/intel_ringbuffer.c      |  6 ------
>   drivers/gpu/drm/i915/selftests/mock_engine.c |  3 ---
>   4 files changed, 5 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 749a4ff566f5..c3262b4dd2ee 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -285,6 +285,10 @@ static inline void __intel_context_pin(struct intel_context *ce)
>   
>   static inline void intel_context_unpin(struct intel_context *ce)
>   {
> +	GEM_BUG_ON(!ce->pin_count);
> +	if (--ce->pin_count)
> +		return;
> +
>   	GEM_BUG_ON(!ce->ops);
>   	ce->ops->unpin(ce);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 960948617748..f3470b95d64e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1344,7 +1344,7 @@ static void execlists_context_destroy(struct intel_context *ce)
>   	__i915_gem_object_release_unless_active(ce->state->obj);
>   }
>   
> -static void __execlists_context_unpin(struct intel_context *ce)
> +static void execlists_context_unpin(struct intel_context *ce)
>   {
>   	intel_ring_unpin(ce->ring);
>   
> @@ -1355,17 +1355,6 @@ static void __execlists_context_unpin(struct intel_context *ce)
>   	i915_gem_context_put(ce->gem_context);
>   }
>   
> -static void execlists_context_unpin(struct intel_context *ce)
> -{
> -	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);

Do you want to preserve these asserts?

> -	GEM_BUG_ON(ce->pin_count == 0);
> -
> -	if (--ce->pin_count)
> -		return;
> -
> -	__execlists_context_unpin(ce);
> -}
> -
>   static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
>   {
>   	unsigned int flags;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 0c0c9f531e4e..001cf6bcb349 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1195,12 +1195,6 @@ static void intel_ring_context_destroy(struct intel_context *ce)
>   
>   static void intel_ring_context_unpin(struct intel_context *ce)
>   {
> -	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
> -	GEM_BUG_ON(ce->pin_count == 0);
> -
> -	if (--ce->pin_count)
> -		return;
> -
>   	if (ce->state) {
>   		ce->state->obj->pin_global--;
>   		i915_vma_unpin(ce->state);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 33eddfc1f8ce..f1ac7453053e 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -74,9 +74,6 @@ static void hw_delay_complete(struct timer_list *t)
>   
>   static void mock_context_unpin(struct intel_context *ce)
>   {
> -	if (--ce->pin_count)
> -		return;
> -
>   	i915_gem_context_put(ce->gem_context);
>   }
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 05/19] drm/i915: Be irqsafe inside reset
  2018-05-17  7:40 ` [PATCH 05/19] drm/i915: Be irqsafe inside reset Chris Wilson
@ 2018-05-17 10:27   ` Tvrtko Ursulin
  2018-05-17 10:46     ` Chris Wilson
  0 siblings, 1 reply; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 10:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 08:40, Chris Wilson wrote:
> As we want to be able to call i915_reset_engine and co from a softirq or

Just by glancing i915_reset_engine looks to heavy weight to ever be 
callable from softirq/timer context. There is even a flush_workqueue in 
there.

> timer context, we need to be irqsafe at all timers. So we have to forgo
> the simple spin_lock_irq for the full spin_lock_irqsave.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 6 ++++--
>   1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 03874b50ada9..a3885adec78a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3137,15 +3137,17 @@ i915_gem_reset_request(struct intel_engine_cs *engine,

Or you meant to write i915_gem_reset_request / i915_gem_reset_engine in 
the commit message?

Regards,

Tvrtko

>   		 */
>   		request = i915_gem_find_active_request(engine);
>   		if (request) {
> +			unsigned long flags;
> +
>   			i915_gem_context_mark_innocent(request->gem_context);
>   			dma_fence_set_error(&request->fence, -EAGAIN);
>   
>   			/* Rewind the engine to replay the incomplete rq */
> -			spin_lock_irq(&engine->timeline.lock);
> +			spin_lock_irqsave(&engine->timeline.lock, flags);
>   			request = list_prev_entry(request, link);
>   			if (&request->link == &engine->timeline.requests)
>   				request = NULL;
> -			spin_unlock_irq(&engine->timeline.lock);
> +			spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   		}
>   	}
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 06/19] drm/i915: Make intel_engine_dump irqsafe
  2018-05-17  7:40 ` [PATCH 06/19] drm/i915: Make intel_engine_dump irqsafe Chris Wilson
@ 2018-05-17 10:28   ` Tvrtko Ursulin
  0 siblings, 0 replies; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 10:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 08:40, Chris Wilson wrote:
> To be useful later, enable intel_engine_dump() to be called from irq
> context (i.e. using saving and restoring irq start rather than assuming
> we enter with irqs enabled).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c | 11 +++++++----
>   1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 828b7377d0d0..333318b340e1 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1358,6 +1358,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	const struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct i915_gpu_error * const error = &engine->i915->gpu_error;
>   	struct i915_request *rq, *last;
> +	unsigned long flags;
>   	struct rb_node *rb;
>   	int count;
>   
> @@ -1424,7 +1425,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
>   	}
>   
> -	spin_lock_irq(&engine->timeline.lock);
> +	local_irq_save(flags);
> +	spin_lock(&engine->timeline.lock);
>   
>   	last = NULL;
>   	count = 0;
> @@ -1466,16 +1468,17 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   		print_request(m, last, "\t\tQ ");
>   	}
>   
> -	spin_unlock_irq(&engine->timeline.lock);
> +	spin_unlock(&engine->timeline.lock);
>   
> -	spin_lock_irq(&b->rb_lock);
> +	spin_lock(&b->rb_lock);
>   	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
>   		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
>   
>   		drm_printf(m, "\t%s [%d] waiting for %x\n",
>   			   w->tsk->comm, w->tsk->pid, w->seqno);
>   	}
> -	spin_unlock_irq(&b->rb_lock);
> +	spin_unlock(&b->rb_lock);
> +	local_irq_restore(flags);
>   
>   	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
>   		   engine->irq_posted,
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 04/19] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin
  2018-05-17 10:20   ` Tvrtko Ursulin
@ 2018-05-17 10:35     ` Chris Wilson
  0 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17 10:35 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-17 11:20:22)
> 
> On 17/05/2018 08:40, Chris Wilson wrote:
> > As all backends implement the same pin_count mechanism and do a
> > dec-and-test as their first step, pull that into the common
> > intel_context_unpin(). This also pulls into the caller, eliminating the
> > indirect call in the usual steady state case. The intel_context_pin()
> > side is a little more complicated as it combines the lookup/alloc as
> > well as pinning the state, and so is left for a later date.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.h      |  4 ++++
> >   drivers/gpu/drm/i915/intel_lrc.c             | 13 +------------
> >   drivers/gpu/drm/i915/intel_ringbuffer.c      |  6 ------
> >   drivers/gpu/drm/i915/selftests/mock_engine.c |  3 ---
> >   4 files changed, 5 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> > index 749a4ff566f5..c3262b4dd2ee 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> > @@ -285,6 +285,10 @@ static inline void __intel_context_pin(struct intel_context *ce)
> >   
> >   static inline void intel_context_unpin(struct intel_context *ce)
> >   {
> > +     GEM_BUG_ON(!ce->pin_count);
> > +     if (--ce->pin_count)
> > +             return;
> > +
> >       GEM_BUG_ON(!ce->ops);
> >       ce->ops->unpin(ce);
> >   }
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 960948617748..f3470b95d64e 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -1344,7 +1344,7 @@ static void execlists_context_destroy(struct intel_context *ce)
> >       __i915_gem_object_release_unless_active(ce->state->obj);
> >   }
> >   
> > -static void __execlists_context_unpin(struct intel_context *ce)
> > +static void execlists_context_unpin(struct intel_context *ce)
> >   {
> >       intel_ring_unpin(ce->ring);
> >   
> > @@ -1355,17 +1355,6 @@ static void __execlists_context_unpin(struct intel_context *ce)
> >       i915_gem_context_put(ce->gem_context);
> >   }
> >   
> > -static void execlists_context_unpin(struct intel_context *ce)
> > -{
> > -     lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
> 
> Do you want to preserve these asserts?

They were to document ce->pin_count as guarded by the mutex. And our
headers wouldn't accept putting it into the inline.

So ~o~. It lost its immediate relevance, and the unpin branch calls
should each be guarded by the lockdep assert where required. The one
that's missing would be obj->pin_global--. :|

(As for struct_mutex removal I think targeting the unpin branches is
going to be my first step...)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 07/19] drm/i915/execlists: Handle copying default context state for atomic reset
  2018-05-17  7:40 ` [PATCH 07/19] drm/i915/execlists: Handle copying default context state for atomic reset Chris Wilson
@ 2018-05-17 10:37   ` Tvrtko Ursulin
  0 siblings, 0 replies; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 10:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 08:40, Chris Wilson wrote:
> We want to be able to reset the GPU from inside a timer callback
> (hardirq context). One step requires us to copy the default context
> state over to the guilty context, which means we need to plan in advance
> to have that object accessible from within an atomic context. The atomic
> context prevents us from pinning the object or in peeking into the
> shmemfs backing store (all may sleep), so we choose to pin the
> default_state into memory when the engine becomes active. This
> compromise allows us to swap out the default state when idle, if
> required.
> 
> References: 5692251c254a ("drm/i915/lrc: Scrub the GPU state of the guilty hanging request")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c  | 15 +++++++++++++++
>   drivers/gpu/drm/i915/intel_lrc.c        | 15 ++++-----------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
>   3 files changed, 20 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 333318b340e1..b1a1ca0758ce 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1082,6 +1082,11 @@ void intel_engines_park(struct drm_i915_private *i915)
>   		if (engine->park)
>   			engine->park(engine);
>   
> +		if (engine->pinned_default_state) {
> +			i915_gem_object_unpin_map(engine->default_state);
> +			engine->pinned_default_state = NULL;
> +		}
> +
>   		i915_gem_batch_pool_fini(&engine->batch_pool);
>   		engine->execlists.no_priolist = false;
>   	}
> @@ -1099,6 +1104,16 @@ void intel_engines_unpark(struct drm_i915_private *i915)
>   	enum intel_engine_id id;
>   
>   	for_each_engine(engine, i915, id) {
> +		void *map;
> +
> +		/* Pin the default state for fast resets from atomic context. */
> +		map = NULL;
> +		if (engine->default_state)
> +			map = i915_gem_object_pin_map(engine->default_state,
> +						      I915_MAP_WB);
> +		if (!IS_ERR_OR_NULL(map))
> +			engine->pinned_default_state = map;
> +
>   		if (engine->unpark)
>   			engine->unpark(engine);
>   
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f3470b95d64e..49283b3d3ebb 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1966,17 +1966,10 @@ static void execlists_reset(struct intel_engine_cs *engine,
>   	 * to recreate its own state.
>   	 */
>   	regs = request->hw_context->lrc_reg_state;
> -	if (engine->default_state) {
> -		void *defaults;
> -
> -		defaults = i915_gem_object_pin_map(engine->default_state,
> -						   I915_MAP_WB);
> -		if (!IS_ERR(defaults)) {
> -			memcpy(regs, /* skip restoring the vanilla PPHWSP */
> -			       defaults + LRC_STATE_PN * PAGE_SIZE,
> -			       engine->context_size - PAGE_SIZE);
> -			i915_gem_object_unpin_map(engine->default_state);
> -		}
> +	if (engine->pinned_default_state) {
> +		memcpy(regs, /* skip restoring the vanilla PPHWSP */
> +		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
> +		       engine->context_size - PAGE_SIZE);
>   	}
>   	execlists_init_reg_state(regs,
>   				 request->gem_context, engine, request->ring);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 20c4e13efc0d..acef385c4c80 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -342,6 +342,7 @@ struct intel_engine_cs {
>   	struct i915_timeline timeline;
>   
>   	struct drm_i915_gem_object *default_state;
> +	void *pinned_default_state;
>   
>   	atomic_t irq_count;
>   	unsigned long irq_posted;
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 08/19] drm/i915: Allow init_breadcrumbs to be used from irq context
  2018-05-17  7:40 ` [PATCH 08/19] drm/i915: Allow init_breadcrumbs to be used from irq context Chris Wilson
@ 2018-05-17 10:40   ` Tvrtko Ursulin
  0 siblings, 0 replies; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 10:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 08:40, Chris Wilson wrote:
> In order to support engine reset from irq (timer) context, we need to be
> able to re-initialise the breadcrumbs. So we need to promote the plain
> spin_lock_irq to a safe spin_lock_irqsave.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 5 +++--
>   1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 18e643df523e..86a987b8ac66 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -846,8 +846,9 @@ static void cancel_fake_irq(struct intel_engine_cs *engine)
>   void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
>   {
>   	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	unsigned long flags;
>   
> -	spin_lock_irq(&b->irq_lock);
> +	spin_lock_irqsave(&b->irq_lock, flags);
>   
>   	/*
>   	 * Leave the fake_irq timer enabled (if it is running), but clear the
> @@ -871,7 +872,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
>   	 */
>   	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);

Could demote them to __clear_bit but that's not new in this patch.

>   
> -	spin_unlock_irq(&b->irq_lock);
> +	spin_unlock_irqrestore(&b->irq_lock, flags);
>   }
>   
>   void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 05/19] drm/i915: Be irqsafe inside reset
  2018-05-17 10:27   ` Tvrtko Ursulin
@ 2018-05-17 10:46     ` Chris Wilson
  0 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17 10:46 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-17 11:27:20)
> 
> On 17/05/2018 08:40, Chris Wilson wrote:
> > As we want to be able to call i915_reset_engine and co from a softirq or
> 
> Just by glancing i915_reset_engine looks to heavy weight to ever be 
> callable from softirq/timer context. There is even a flush_workqueue in 
> there.

There isn't by the time we finish :)

> > timer context, we need to be irqsafe at all timers. So we have to forgo
> > the simple spin_lock_irq for the full spin_lock_irqsave.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c | 6 ++++--
> >   1 file changed, 4 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 03874b50ada9..a3885adec78a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3137,15 +3137,17 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
> 
> Or you meant to write i915_gem_reset_request / i915_gem_reset_engine in 
> the commit message?

static int try_preempt_reset(struct intel_engine_execlists *execlists)
{
	struct tasklet_struct * const t = &execlists->tasklet;
	int err = -EBUSY;

        if (tasklet_trylock(t)) {
                struct intel_engine_cs *engine =
                        container_of(execlists, typeof(*engine), execlists);
                const unsigned int bit = I915_RESET_ENGINE + engine->id;
                unsigned long *lock = &engine->i915->gpu_error.flags;

                t->func(t->data);
                if (!execlists_is_active(execlists,
                                         EXECLISTS_ACTIVE_PREEMPT_TIMEOUT)) {
                        /* Nothing to do; the tasklet was just delayed. */
                        err = 0;
                } else if (!test_and_set_bit(bit, lock)) {
                        tasklet_disable_nosync(t);
                        err = i915_reset_engine(engine, "preemption time out");
                        tasklet_enable(t);

                        clear_bit(bit, lock);
                        wake_up_bit(lock, bit);
                }

                tasklet_unlock(t);
        }

	return err;
}

is what I'm aiming for.

And even a test case to call it in irq-off and other atomic contexts.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/19] drm/i915/execlists: HWACK checking superseded checking port[0].count
  2018-05-17  7:40 ` [PATCH 09/19] drm/i915/execlists: HWACK checking superseded checking port[0].count Chris Wilson
@ 2018-05-17 10:55   ` Tvrtko Ursulin
  2018-05-17 17:03     ` Chris Wilson
  0 siblings, 1 reply; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 10:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 08:40, Chris Wilson wrote:
> The HWACK bit more generically solves the problem of resubmitting ESLP
> while the hardware is still processing the current ELSP write. We no
> longer need to check port[0].count itself.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 2 --
>   1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 49283b3d3ebb..857ab04452f0 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -608,8 +608,6 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   		GEM_BUG_ON(!execlists_is_active(execlists,
>   						EXECLISTS_ACTIVE_USER));
>   		GEM_BUG_ON(!port_count(&port[0]));
> -		if (port_count(&port[0]) > 1)
> -			return false;
>   
>   		/*
>   		 * If we write to ELSP a second time before the HW has had
> 

Looks indeed the same behaviour. Both before and after we wait for 
preempted event before can submit more to the same port.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 10/19] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler
  2018-05-17  7:40 ` [PATCH 10/19] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler Chris Wilson
@ 2018-05-17 10:58   ` Tvrtko Ursulin
  2018-05-17 11:24     ` Chris Wilson
  0 siblings, 1 reply; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 10:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 08:40, Chris Wilson wrote:
> Store whether or not we need to kick the guc's execlists emulation on
> the engine itself to avoid chasing the device info.

We do not chase device info but modparams in this case.

> gen8_cs_irq_handler                          512     428     -84

I guess my point from before, (unfortunately I forgot to reply), was how 
much of the saving remains if GEM_BUG_ON is compiled out?

If nothing or almost nothing, I don't see a need to fiddle with this now.

Regards,

Tvrtko

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_irq.c             | 4 +++-
>   drivers/gpu/drm/i915/intel_guc_submission.c | 1 +
>   drivers/gpu/drm/i915/intel_lrc.c            | 1 +
>   drivers/gpu/drm/i915/intel_ringbuffer.h     | 7 +++++++
>   4 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index f9bc3aaa90d0..460878572515 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1472,8 +1472,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   	}
>   
>   	if (iir & GT_RENDER_USER_INTERRUPT) {
> +		if (intel_engine_uses_guc(engine))
> +			tasklet = true;
> +
>   		notify_ring(engine);
> -		tasklet |= USES_GUC_SUBMISSION(engine->i915);
>   	}
>   
>   	if (tasklet)
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 133367a17863..d9fcd5db4ea4 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -1312,6 +1312,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
>   		engine->unpark = guc_submission_unpark;
>   
>   		engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
> +		engine->flags |= I915_ENGINE_USES_GUC;
>   	}
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 857ab04452f0..4928e9ad7826 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2305,6 +2305,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>   	engine->park = NULL;
>   	engine->unpark = NULL;
>   
> +	engine->flags &= ~I915_ENGINE_USES_GUC;
>   	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>   	if (engine->i915->preempt_context)
>   		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index acef385c4c80..4ad9c5842575 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -574,6 +574,7 @@ struct intel_engine_cs {
>   #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
>   #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
> +#define I915_ENGINE_USES_GUC         BIT(3)
>   	unsigned int flags;
>   
>   	/*
> @@ -651,6 +652,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
>   }
>   
> +static inline bool
> +intel_engine_uses_guc(const struct intel_engine_cs *engine)
> +{
> +	return engine->flags & I915_ENGINE_USES_GUC;
> +}
> +
>   static inline bool __execlists_need_preempt(int prio, int last)
>   {
>   	return prio > max(0, last);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 11/19] drm/i915/execlists: Double check rpm wakeref
  2018-05-17  7:40 ` [PATCH 11/19] drm/i915/execlists: Double check rpm wakeref Chris Wilson
@ 2018-05-17 11:04   ` Tvrtko Ursulin
  0 siblings, 0 replies; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 11:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 08:40, Chris Wilson wrote:
> As we are splitting processing the CSB events from submitting the ELSP,
> we also need to duplicate the check that we hold a device wakeref for our
> hardware access to the disjoint locations.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 26 ++++++++++++++++----------
>   1 file changed, 16 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4928e9ad7826..6d3b03299b0c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -449,6 +449,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>   	struct execlist_port *port = execlists->port;
>   	unsigned int n;
>   
> +	/*
> +	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> +	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
> +	 * not be relinquished until the device is idle (see
> +	 * i915_gem_idle_work_handler()). As a precaution, we make sure
> +	 * that all ELSP are drained i.e. we have processed the CSB,
> +	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
> +	 */
> +	GEM_BUG_ON(!engine->i915->gt.awake);

Hmm.. I think it would be better to leave it in the tasklet and just add 
another instance to process_csb.

Having it only deep in execlists_submit_ports could miss confusions when 
upper layers want to submit and state say it is not possible yet.

> +
>   	/*
>   	 * ELSQ note: the submit queue is not cleared after being submitted
>   	 * to the HW so we need to make sure we always clean it up. This is
> @@ -959,6 +969,12 @@ static void process_csb(struct intel_engine_cs *engine)
>   	struct drm_i915_private *i915 = engine->i915;
>   	bool fw = false;
>   
> +	/*
> +	 * We must never release our device wakeref until after we have
> +	 * finished processing all potential interrupts from the hardware.
> +	 */
> +	GEM_BUG_ON(!engine->i915->gt.awake);
> +
>   	do {
>   		/* The HWSP contains a (cacheable) mirror of the CSB */
>   		const u32 *buf =
> @@ -1139,16 +1155,6 @@ static void execlists_submission_tasklet(unsigned long data)
>   		  engine->execlists.active,
>   		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
>   
> -	/*
> -	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> -	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
> -	 * not be relinquished until the device is idle (see
> -	 * i915_gem_idle_work_handler()). As a precaution, we make sure
> -	 * that all ELSP are drained i.e. we have processed the CSB,
> -	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
> -	 */
> -	GEM_BUG_ON(!engine->i915->gt.awake);
> -
>   	/*
>   	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
>   	 * imposing the cost of a locked atomic transaction when submitting a
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [01/19] drm/i915: Move request->ctx aside
  2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
                   ` (20 preceding siblings ...)
  2018-05-17  8:16 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-05-17 11:05 ` Patchwork
  21 siblings, 0 replies; 42+ messages in thread
From: Patchwork @ 2018-05-17 11:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/19] drm/i915: Move request->ctx aside
URL   : https://patchwork.freedesktop.org/series/43307/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4195_full -> Patchwork_9025_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_9025_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9025_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/43307/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9025_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@gem_eio@reset-stress:
      shard-glk:          PASS -> DMESG-FAIL +2

    igt@gem_eio@wait-immediate:
      shard-apl:          PASS -> DMESG-FAIL +2

    igt@gem_ringfill@basic-default-hang:
      shard-kbl:          PASS -> DMESG-WARN +18

    igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-onoff:
      shard-apl:          PASS -> FAIL +3

    igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-mmap-gtt:
      shard-glk:          PASS -> FAIL +6

    igt@kms_rotation_crc@sprite-rotation-270:
      shard-kbl:          PASS -> FAIL +3

    igt@kms_vblank@pipe-b-wait-forked-busy-hang:
      shard-glk:          PASS -> DMESG-WARN +19

    igt@kms_vblank@pipe-c-ts-continuation-idle-hang:
      shard-apl:          PASS -> DMESG-WARN +18

    igt@perf_pmu@enable-race-rcs0:
      shard-kbl:          PASS -> DMESG-FAIL +2

    
    ==== Warnings ====

    igt@gem_exec_schedule@preempt-other-vebox:
      shard-apl:          PASS -> SKIP +1
      shard-kbl:          PASS -> SKIP

    igt@kms_flip@2x-flip-vs-modeset-vs-hang:
      shard-glk:          PASS -> SKIP +2

    
== Known issues ==

  Here are the changes found in Patchwork_9025_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_hangcheck:
      shard-apl:          NOTRUN -> INCOMPLETE (fdo#103927)

    igt@gem_eio@execbuf:
      shard-glk:          PASS -> FAIL (fdo#105957) +18

    igt@gem_eio@in-flight-immediate:
      shard-apl:          PASS -> FAIL (fdo#105957) +19

    igt@gem_eio@in-flight-suspend:
      shard-kbl:          PASS -> FAIL (fdo#105957) +20
      shard-apl:          PASS -> INCOMPLETE (fdo#103927)
      shard-glk:          PASS -> INCOMPLETE (fdo#103359, k.org#198133) +1

    igt@kms_atomic_transition@1x-modeset-transitions-nonblocking:
      shard-glk:          PASS -> FAIL (fdo#105703)

    igt@kms_flip_tiling@flip-to-y-tiled:
      shard-glk:          PASS -> FAIL (fdo#104724, fdo#103822)

    igt@perf_pmu@enable-race-vcs1:
      shard-kbl:          PASS -> INCOMPLETE (fdo#103665)

    
    ==== Possible fixes ====

    igt@drv_selftest@live_gtt:
      shard-apl:          INCOMPLETE (fdo#103927) -> PASS

    igt@gem_exec_store@cachelines-bsd:
      shard-hsw:          FAIL (fdo#100007) -> PASS

    igt@kms_flip@2x-plain-flip-fb-recreate-interruptible:
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@kms_flip@dpms-vs-vblank-race-interruptible:
      shard-hsw:          FAIL (fdo#103060) -> PASS

    igt@kms_flip_tiling@flip-to-x-tiled:
      shard-glk:          FAIL (fdo#104724, fdo#103822) -> PASS

    igt@kms_flip_tiling@flip-y-tiled:
      shard-glk:          FAIL (fdo#104724) -> PASS

    igt@kms_setmode@basic:
      shard-apl:          FAIL (fdo#99912) -> PASS
      shard-kbl:          FAIL (fdo#99912) -> PASS

    
    ==== Warnings ====

    igt@drv_selftest@live_hangcheck:
      shard-kbl:          DMESG-FAIL -> INCOMPLETE (fdo#103665)
      shard-glk:          DMESG-FAIL -> INCOMPLETE (fdo#103359, k.org#198133)

    
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105703 https://bugs.freedesktop.org/show_bug.cgi?id=105703
  fdo#105957 https://bugs.freedesktop.org/show_bug.cgi?id=105957
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (9 -> 9) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4195 -> Patchwork_9025

  CI_DRM_4195: d39f8e221f7d4187e7948b6af02a1f45110e817c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4487: eccae1360d6d01e73c6af2bd97122cef708207ef @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9025: caac3766a115f814cca90e6cd47abcc27243dff1 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4487: 6ab75f7eb5e1dccbb773e1739beeb2d7cbd6ad0d @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9025/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 10/19] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler
  2018-05-17 10:58   ` Tvrtko Ursulin
@ 2018-05-17 11:24     ` Chris Wilson
  2018-05-17 13:13       ` Tvrtko Ursulin
  0 siblings, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17 11:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-17 11:58:21)
> 
> On 17/05/2018 08:40, Chris Wilson wrote:
> > Store whether or not we need to kick the guc's execlists emulation on
> > the engine itself to avoid chasing the device info.
> 
> We do not chase device info but modparams in this case.
> 
> > gen8_cs_irq_handler                          512     428     -84
> 
> I guess my point from before, (unfortunately I forgot to reply), was how 
> much of the saving remains if GEM_BUG_ON is compiled out?

Remember the motto of killing off checking globals and only checking
derived state? :) (I'm definitely not in favour of sprinkling more global
checking CAPS over the code.)

Out of the debug build

gen8_cs_irq_handler                          170     185     +15
gen8_cs_irq_handler                          170     128     -42 (later)
gen8_cs_irq_handler                          170     128     -42 (+ USES_GUC_SUBMISSION)

Hmm,

With USES_GUC_SUBMISSION:

   0x000000000000a780 <+0>:	callq  0xa785 <gen8_cs_irq_handler+5>
   0x000000000000a785 <+5>:	push   %r12
   0x000000000000a787 <+7>:	mov    %esi,%r12d
   0x000000000000a78a <+10>:	and    $0x1,%r12d
   0x000000000000a78e <+14>:	and    $0x100,%esi
   0x000000000000a794 <+20>:	push   %rbp
   0x000000000000a795 <+21>:	push   %rbx
   0x000000000000a796 <+22>:	mov    %rdi,%rbx
   0x000000000000a799 <+25>:	je     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a79b <+27>:	lea    0x458(%rdi),%rdi
   0x000000000000a7a2 <+34>:	callq  0xa7a7 <gen8_cs_irq_handler+39>
   0x000000000000a7a7 <+39>:	mov    0x458(%rbx),%eax
   0x000000000000a7ad <+45>:	test   %eax,%eax
   0x000000000000a7af <+47>:	je     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a7b1 <+49>:	lock btsq $0x1,0x108(%rbx)
   0x000000000000a7bb <+59>:	setae  %bpl
   0x000000000000a7bf <+63>:	test   %r12d,%r12d
   0x000000000000a7c2 <+66>:	je     0xa7fa <gen8_cs_irq_handler+122>
   0x000000000000a7c4 <+68>:	jmp    0xa7cd <gen8_cs_irq_handler+77>
   0x000000000000a7c6 <+70>:	test   %r12d,%r12d
   0x000000000000a7c9 <+73>:	je     0xa812 <gen8_cs_irq_handler+146>
   0x000000000000a7cb <+75>:	xor    %ebp,%ebp
   0x000000000000a7cd <+77>:	lea    0x240(%rbx),%rdi
   0x000000000000a7d4 <+84>:	callq  0xa7d9 <gen8_cs_irq_handler+89>
   0x000000000000a7d9 <+89>:	testb  $0x1,0x240(%rbx)
   0x000000000000a7e0 <+96>:	jne    0xa820 <gen8_cs_irq_handler+160>
   0x000000000000a7e2 <+98>:	mov    $0x0,%rdi
   0x000000000000a7e9 <+105>:	callq  0xa7ee <gen8_cs_irq_handler+110>
   0x000000000000a7ee <+110>:	movzbl 0x0(%rip),%eax        # 0xa7f5 <gen8_cs_irq_handler+117>
   0x000000000000a7f5 <+117>:	and    $0x1,%eax
   0x000000000000a7f8 <+120>:	or     %eax,%ebp
   0x000000000000a7fa <+122>:	test   %bpl,%bpl
   0x000000000000a7fd <+125>:	je     0xa812 <gen8_cs_irq_handler+146>
   0x000000000000a7ff <+127>:	lea    0x3d8(%rbx),%rdi
   0x000000000000a806 <+134>:	lock btsq $0x0,0x3e0(%rbx)
   0x000000000000a810 <+144>:	jae    0xa817 <gen8_cs_irq_handler+151>
   0x000000000000a812 <+146>:	pop    %rbx
   0x000000000000a813 <+147>:	pop    %rbp
   0x000000000000a814 <+148>:	pop    %r12
   0x000000000000a816 <+150>:	retq   
   0x000000000000a817 <+151>:	pop    %rbx
   0x000000000000a818 <+152>:	pop    %rbp
   0x000000000000a819 <+153>:	pop    %r12
   0x000000000000a81b <+155>:	jmpq   0xa820 <gen8_cs_irq_handler+160>
   0x000000000000a820 <+160>:	mov    %rbx,%rdi
   0x000000000000a823 <+163>:	callq  0xa4a0 <notify_ring>
   0x000000000000a828 <+168>:	jmp    0xa7e2 <gen8_cs_irq_handler+98>

to

   0x000000000000a780 <+0>:	callq  0xa785 <gen8_cs_irq_handler+5>
   0x000000000000a785 <+5>:	push   %r12
   0x000000000000a787 <+7>:	push   %rbp
   0x000000000000a788 <+8>:	mov    %esi,%ebp
   0x000000000000a78a <+10>:	and    $0x1,%ebp
   0x000000000000a78d <+13>:	and    $0x100,%esi
   0x000000000000a793 <+19>:	push   %rbx
   0x000000000000a794 <+20>:	mov    %rdi,%rbx
   0x000000000000a797 <+23>:	je     0xa7cb <gen8_cs_irq_handler+75>
   0x000000000000a799 <+25>:	lea    0x458(%rdi),%rdi
   0x000000000000a7a0 <+32>:	callq  0xa7a5 <gen8_cs_irq_handler+37>
   0x000000000000a7a5 <+37>:	mov    0x458(%rbx),%eax
   0x000000000000a7ab <+43>:	test   %eax,%eax
   0x000000000000a7ad <+45>:	je     0xa7cb <gen8_cs_irq_handler+75>
   0x000000000000a7af <+47>:	lock btsq $0x1,0x108(%rbx)
   0x000000000000a7b9 <+57>:	setae  %r12b
   0x000000000000a7bd <+61>:	test   %ebp,%ebp
   0x000000000000a7bf <+63>:	jne    0xa7d2 <gen8_cs_irq_handler+82>
   0x000000000000a7c1 <+65>:	test   %r12b,%r12b
   0x000000000000a7c4 <+68>:	jne    0xa805 <gen8_cs_irq_handler+133>
   0x000000000000a7c6 <+70>:	pop    %rbx
   0x000000000000a7c7 <+71>:	pop    %rbp
   0x000000000000a7c8 <+72>:	pop    %r12
   0x000000000000a7ca <+74>:	retq   
   0x000000000000a7cb <+75>:	test   %ebp,%ebp
   0x000000000000a7cd <+77>:	je     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a7cf <+79>:	xor    %r12d,%r12d
   0x000000000000a7d2 <+82>:	lea    0x5d8(%rbx),%rdi
   0x000000000000a7d9 <+89>:	callq  0xa7de <gen8_cs_irq_handler+94>
   0x000000000000a7de <+94>:	mov    0x5d8(%rbx),%ebp
   0x000000000000a7e4 <+100>:	lea    0x240(%rbx),%rdi
   0x000000000000a7eb <+107>:	callq  0xa7f0 <gen8_cs_irq_handler+112>
   0x000000000000a7f0 <+112>:	movzbl 0x240(%rbx),%eax
   0x000000000000a7f7 <+119>:	and    $0x8,%ebp
   0x000000000000a7fa <+122>:	and    $0x1,%eax
   0x000000000000a7fd <+125>:	test   %ebp,%ebp
   0x000000000000a7ff <+127>:	je     0xa821 <gen8_cs_irq_handler+161>
   0x000000000000a801 <+129>:	test   %al,%al
   0x000000000000a803 <+131>:	jne    0xa82f <gen8_cs_irq_handler+175>
   0x000000000000a805 <+133>:	lea    0x3d8(%rbx),%rdi
   0x000000000000a80c <+140>:	lock btsq $0x0,0x3e0(%rbx)
   0x000000000000a816 <+150>:	jb     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a818 <+152>:	pop    %rbx
   0x000000000000a819 <+153>:	pop    %rbp
   0x000000000000a81a <+154>:	pop    %r12
   0x000000000000a81c <+156>:	jmpq   0xa821 <gen8_cs_irq_handler+161>
   0x000000000000a821 <+161>:	test   %al,%al
   0x000000000000a823 <+163>:	je     0xa7c1 <gen8_cs_irq_handler+65>
   0x000000000000a825 <+165>:	mov    %rbx,%rdi
   0x000000000000a828 <+168>:	callq  0xa4a0 <notify_ring>
   0x000000000000a82d <+173>:	jmp    0xa7c1 <gen8_cs_irq_handler+65>
   0x000000000000a82f <+175>:	mov    %rbx,%rdi
   0x000000000000a832 <+178>:	callq  0xa4a0 <notify_ring>
   0x000000000000a837 <+183>:	jmp    0xa805 <gen8_cs_irq_handler+133>

And then onwards to

   0x000000000000a680 <+0>:	callq  0xa685 <gen8_cs_irq_handler+5>
   0x000000000000a685 <+5>:	push   %rbx
   0x000000000000a686 <+6>:	mov    %rdi,%rbx
   0x000000000000a689 <+9>:	sub    $0x8,%rsp
   0x000000000000a68d <+13>:	test   $0x100,%esi
   0x000000000000a693 <+19>:	jne    0xa6ca <gen8_cs_irq_handler+74>
   0x000000000000a695 <+21>:	and    $0x1,%esi
   0x000000000000a698 <+24>:	je     0xa6c4 <gen8_cs_irq_handler+68>
   0x000000000000a69a <+26>:	lea    0x5f0(%rbx),%rdi
   0x000000000000a6a1 <+33>:	callq  0xa6a6 <gen8_cs_irq_handler+38>
   0x000000000000a6a6 <+38>:	testb  $0x8,0x5f0(%rbx)
   0x000000000000a6ad <+45>:	jne    0xa6e6 <gen8_cs_irq_handler+102>
   0x000000000000a6af <+47>:	lea    0x240(%rbx),%rdi
   0x000000000000a6b6 <+54>:	callq  0xa6bb <gen8_cs_irq_handler+59>
   0x000000000000a6bb <+59>:	testb  $0x1,0x240(%rbx)
   0x000000000000a6c2 <+66>:	jne    0xa6d9 <gen8_cs_irq_handler+89>
   0x000000000000a6c4 <+68>:	add    $0x8,%rsp
   0x000000000000a6c8 <+72>:	pop    %rbx
   0x000000000000a6c9 <+73>:	retq   
   0x000000000000a6ca <+74>:	mov    %esi,0x4(%rsp)
   0x000000000000a6ce <+78>:	callq  0xa6d3 <gen8_cs_irq_handler+83>
   0x000000000000a6d3 <+83>:	mov    0x4(%rsp),%esi
   0x000000000000a6d7 <+87>:	jmp    0xa695 <gen8_cs_irq_handler+21>
   0x000000000000a6d9 <+89>:	add    $0x8,%rsp
   0x000000000000a6dd <+93>:	mov    %rbx,%rdi
   0x000000000000a6e0 <+96>:	pop    %rbx
   0x000000000000a6e1 <+97>:	jmpq   0xa3a0 <notify_ring>
   0x000000000000a6e6 <+102>:	lea    0x3d8(%rbx),%rdi
   0x000000000000a6ed <+109>:	lock btsq $0x0,0x3e0(%rbx)
   0x000000000000a6f7 <+119>:	jb     0xa6af <gen8_cs_irq_handler+47>
   0x000000000000a6f9 <+121>:	callq  0xa6fe <gen8_cs_irq_handler+126>
   0x000000000000a6fe <+126>:	jmp    0xa6af <gen8_cs_irq_handler+47>

> If nothing or almost nothing, I don't see a need to fiddle with this now.

Also please consider later patches which also need conditionals as
execlists is unfortunately gaining new tricks that are harder to pull
off with the current guc submission :|
(Might be time to stop mixing the two backends?)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
  2018-05-17  7:40 ` [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd) Chris Wilson
@ 2018-05-17 13:13   ` Tvrtko Ursulin
  2018-05-17 17:07     ` Chris Wilson
  2018-05-18 21:21     ` Chris Wilson
  0 siblings, 2 replies; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 13:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 08:40, Chris Wilson wrote:
> Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a
> bottom half"), we came to the conclusion that running our CSB processing
> and ELSP submission from inside the irq handler was a bad idea. A really
> bad idea as we could impose nearly 1s latency on other users of the
> system, on average! Deferring our work to a tasklet allowed us to do the
> processing with irqs enabled, reducing the impact to an average of about
> 50us.
> 
> We have since eradicated the use of forcewaked mmio from inside the CSB
> processing and ELSP submission, bringing the impact down to around 5us
> (on Kabylake); an order of magnitude better than our measurements 2
> years ago on Broadwell and only about 2x worse on average than the
> gem_syslatency on an unladen system.
> 
> Comparing the impact on the maximum latency observed over a 120s interval,
> repeated several times (using gem_syslatency, similar to RT's cyclictest)
> while the system is fully laden with i915 nops, we see that direct
> submission definitely worsens the response but not to the same outlandish
> degree as before.
> 
> x Unladen baseline
> + Using tasklet
> * Direct submission
> 
> +------------------------------------------------------------------------+
> |xx x          ++    +++ +                           *  * *   ** *** *  *|
> ||A|              |__AM__|                               |_____A_M___|   |
> +------------------------------------------------------------------------+

What are these headers? This one and below, I cannot decipher them at all.

>      N           Min           Max        Median           Avg        Stddev
> x  10             5            18            10           9.3     3.6530049
> +  10            72           120           108         102.9     15.758243
> *  10           255           348           316         305.7      28.74814

In micro-seconds? so tasklet is 108us median? Direct submission 316us 
median?

> 
> And with a background load

This is IO background load?

Regards,

Tvrtko

> 
> +------------------------------------------------------------------------+
> |x                          +           *              *                 |
> |x                    +     + + + +  + +* * ** ++      * *   *          *|
> |A                        |_______A_____|__|_______A___M______|          |
> +------------------------------------------------------------------------+
>      N           Min           Max        Median           Avg        Stddev
> x  10             4            11             9           8.5     2.1730675
> +  10           633          1388           972           993     243.33744
> *  10          1152          2109          1608        1488.3     314.80719
> 
> References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_irq.c  | 11 ++------
>   drivers/gpu/drm/i915/intel_lrc.c | 44 +++++++++++++++++---------------
>   2 files changed, 26 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3f139ff64385..8b61ebf5cb4a 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1462,22 +1462,15 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
>   static void
>   gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   {
> -	bool tasklet = false;
> -
> -	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> +	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
>   		intel_engine_handle_execlists_irq(engine);
> -		tasklet = true;
> -	}
>   
>   	if (iir & GT_RENDER_USER_INTERRUPT) {
>   		if (intel_engine_uses_guc(engine))
> -			tasklet = true;
> +			tasklet_hi_schedule(&engine->execlists.tasklet);
>   
>   		notify_ring(engine);
>   	}
> -
> -	if (tasklet)
> -		tasklet_hi_schedule(&engine->execlists.tasklet);
>   }
>   
>   static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 954eb3a71051..37839d89e03a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -575,7 +575,7 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
>   	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> -static void __execlists_dequeue(struct intel_engine_cs *engine)
> +static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> @@ -587,7 +587,11 @@ static void __execlists_dequeue(struct intel_engine_cs *engine)
>   
>   	lockdep_assert_held(&engine->timeline.lock);
>   
> -	/* Hardware submission is through 2 ports. Conceptually each port
> +	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> +		return;
> +
> +	/*
> +	 * Hardware submission is through 2 ports. Conceptually each port
>   	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
>   	 * static for a context, and unique to each, so we only execute
>   	 * requests belonging to a single context from each ring. RING_HEAD
> @@ -777,15 +781,6 @@ static void __execlists_dequeue(struct intel_engine_cs *engine)
>   		   !port_isset(engine->execlists.port));
>   }
>   
> -static void execlists_dequeue(struct intel_engine_cs *engine)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> -	__execlists_dequeue(engine);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> -}
> -
>   void
>   execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
>   {
> @@ -1106,6 +1101,7 @@ void intel_engine_handle_execlists_irq(struct intel_engine_cs *engine)
>   	       execlists->csb_read);
>   	execlists->csb_head = head;
>   
> +	execlists_dequeue(engine);
>   	spin_unlock(&engine->timeline.lock);
>   }
>   
> @@ -1122,8 +1118,9 @@ static void execlists_submission_tasklet(unsigned long data)
>   		  engine->i915->gt.awake,
>   		  engine->execlists.active);
>   
> -	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> -		execlists_dequeue(engine);
> +	spin_lock_irq(&engine->timeline.lock);
> +	execlists_dequeue(engine);
> +	spin_unlock_irq(&engine->timeline.lock);
>   }
>   
>   static void queue_request(struct intel_engine_cs *engine,
> @@ -1134,16 +1131,20 @@ static void queue_request(struct intel_engine_cs *engine,
>   		      &lookup_priolist(engine, prio)->requests);
>   }
>   
> -static void __submit_queue(struct intel_engine_cs *engine, int prio)
> +static void __update_queue(struct intel_engine_cs *engine, int prio)
>   {
>   	engine->execlists.queue_priority = prio;
> -	tasklet_hi_schedule(&engine->execlists.tasklet);
>   }
>   
>   static void submit_queue(struct intel_engine_cs *engine, int prio)
>   {
> -	if (prio > engine->execlists.queue_priority)
> -		__submit_queue(engine, prio);
> +	if (prio > engine->execlists.queue_priority) {
> +		__update_queue(engine, prio);
> +		if (!intel_engine_uses_guc(engine))
> +			execlists_dequeue(engine);
> +		else
> +			tasklet_hi_schedule(&engine->execlists.tasklet);
> +	}
>   }
>   
>   static void execlists_submit_request(struct i915_request *request)
> @@ -1155,11 +1156,12 @@ static void execlists_submit_request(struct i915_request *request)
>   	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	queue_request(engine, &request->sched, rq_prio(request));
> -	submit_queue(engine, rq_prio(request));
>   
>   	GEM_BUG_ON(!engine->execlists.first);
>   	GEM_BUG_ON(list_empty(&request->sched.link));
>   
> +	submit_queue(engine, rq_prio(request));
> +
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
> @@ -1286,8 +1288,10 @@ static void execlists_schedule(struct i915_request *request,
>   		}
>   
>   		if (prio > engine->execlists.queue_priority &&
> -		    i915_sw_fence_done(&sched_to_request(node)->submit))
> -			__submit_queue(engine, prio);
> +		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
> +			__update_queue(engine, prio);
> +			tasklet_hi_schedule(&engine->execlists.tasklet);
> +		}
>   	}
>   
>   	spin_unlock_irq(&engine->timeline.lock);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 10/19] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler
  2018-05-17 11:24     ` Chris Wilson
@ 2018-05-17 13:13       ` Tvrtko Ursulin
  0 siblings, 0 replies; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-17 13:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 12:24, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-05-17 11:58:21)
>>
>> On 17/05/2018 08:40, Chris Wilson wrote:
>>> Store whether or not we need to kick the guc's execlists emulation on
>>> the engine itself to avoid chasing the device info.
>>
>> We do not chase device info but modparams in this case.
>>
>>> gen8_cs_irq_handler                          512     428     -84
>>
>> I guess my point from before, (unfortunately I forgot to reply), was how
>> much of the saving remains if GEM_BUG_ON is compiled out?
> 
> Remember the motto of killing off checking globals and only checking
> derived state? :) (I'm definitely not in favour of sprinkling more global
> checking CAPS over the code.)
> 
> Out of the debug build
> 
> gen8_cs_irq_handler                          170     185     +15
> gen8_cs_irq_handler                          170     128     -42 (later)
> gen8_cs_irq_handler                          170     128     -42 (+ USES_GUC_SUBMISSION)

I don't get which is which.

[snip]

>> If nothing or almost nothing, I don't see a need to fiddle with this now.
> 
> Also please consider later patches which also need conditionals as
> execlists is unfortunately gaining new tricks that are harder to pull
> off with the current guc submission :|
> (Might be time to stop mixing the two backends?)

I might say fine if the commit message contained truth. :)

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/19] drm/i915/execlists: HWACK checking superseded checking port[0].count
  2018-05-17 10:55   ` Tvrtko Ursulin
@ 2018-05-17 17:03     ` Chris Wilson
  0 siblings, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-17 17:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-17 11:55:29)
> 
> On 17/05/2018 08:40, Chris Wilson wrote:
> > The HWACK bit more generically solves the problem of resubmitting ESLP
> > while the hardware is still processing the current ELSP write. We no
> > longer need to check port[0].count itself.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_lrc.c | 2 --
> >   1 file changed, 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 49283b3d3ebb..857ab04452f0 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -608,8 +608,6 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
> >               GEM_BUG_ON(!execlists_is_active(execlists,
> >                                               EXECLISTS_ACTIVE_USER));
> >               GEM_BUG_ON(!port_count(&port[0]));
> > -             if (port_count(&port[0]) > 1)
> > -                     return false;
> >   
> >               /*
> >                * If we write to ELSP a second time before the HW has had
> > 
> 
> Looks indeed the same behaviour. Both before and after we wait for 
> preempted event before can submit more to the same port.
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Pushed this one as I think it's a nice standalone cleanup and been
meaning to do it for a while.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
  2018-05-17 13:13   ` Tvrtko Ursulin
@ 2018-05-17 17:07     ` Chris Wilson
  2018-05-18  8:06       ` Tvrtko Ursulin
  2018-05-18 21:21     ` Chris Wilson
  1 sibling, 1 reply; 42+ messages in thread
From: Chris Wilson @ 2018-05-17 17:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-17 14:13:00)
> 
> On 17/05/2018 08:40, Chris Wilson wrote:
> > Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a
> > bottom half"), we came to the conclusion that running our CSB processing
> > and ELSP submission from inside the irq handler was a bad idea. A really
> > bad idea as we could impose nearly 1s latency on other users of the
> > system, on average! Deferring our work to a tasklet allowed us to do the
> > processing with irqs enabled, reducing the impact to an average of about
> > 50us.
> > 
> > We have since eradicated the use of forcewaked mmio from inside the CSB
> > processing and ELSP submission, bringing the impact down to around 5us
> > (on Kabylake); an order of magnitude better than our measurements 2
> > years ago on Broadwell and only about 2x worse on average than the
> > gem_syslatency on an unladen system.
> > 
> > Comparing the impact on the maximum latency observed over a 120s interval,
> > repeated several times (using gem_syslatency, similar to RT's cyclictest)
> > while the system is fully laden with i915 nops, we see that direct
> > submission definitely worsens the response but not to the same outlandish
> > degree as before.
> > 
> > x Unladen baseline
> > + Using tasklet
> > * Direct submission
> > 
> > +------------------------------------------------------------------------+
> > |xx x          ++    +++ +                           *  * *   ** *** *  *|
> > ||A|              |__AM__|                               |_____A_M___|   |
> > +------------------------------------------------------------------------+
> 
> What are these headers? This one and below, I cannot decipher them at all.

Ministat histogram. The headers being the label for the charts; it's a
bit flat so hard to tell it's a histogram.

> >      N           Min           Max        Median           Avg        Stddev
> > x  10             5            18            10           9.3     3.6530049
> > +  10            72           120           108         102.9     15.758243
> > *  10           255           348           316         305.7      28.74814
> 
> In micro-seconds? so tasklet is 108us median? Direct submission 316us 
> median?

Yup, more runs required so you have prettier graphs and units.

> > And with a background load
> 
> This is IO background load?

Yes, background writeout.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
  2018-05-17 17:07     ` Chris Wilson
@ 2018-05-18  8:06       ` Tvrtko Ursulin
  2018-05-18  8:18         ` Chris Wilson
  2018-05-18 19:36         ` Chris Wilson
  0 siblings, 2 replies; 42+ messages in thread
From: Tvrtko Ursulin @ 2018-05-18  8:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/05/2018 18:07, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-05-17 14:13:00)
>>
>> On 17/05/2018 08:40, Chris Wilson wrote:
>>> Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a
>>> bottom half"), we came to the conclusion that running our CSB processing
>>> and ELSP submission from inside the irq handler was a bad idea. A really
>>> bad idea as we could impose nearly 1s latency on other users of the
>>> system, on average! Deferring our work to a tasklet allowed us to do the
>>> processing with irqs enabled, reducing the impact to an average of about
>>> 50us.
>>>
>>> We have since eradicated the use of forcewaked mmio from inside the CSB
>>> processing and ELSP submission, bringing the impact down to around 5us
>>> (on Kabylake); an order of magnitude better than our measurements 2
>>> years ago on Broadwell and only about 2x worse on average than the
>>> gem_syslatency on an unladen system.
>>>
>>> Comparing the impact on the maximum latency observed over a 120s interval,
>>> repeated several times (using gem_syslatency, similar to RT's cyclictest)
>>> while the system is fully laden with i915 nops, we see that direct
>>> submission definitely worsens the response but not to the same outlandish
>>> degree as before.
>>>
>>> x Unladen baseline
>>> + Using tasklet
>>> * Direct submission
>>>
>>> +------------------------------------------------------------------------+
>>> |xx x          ++    +++ +                           *  * *   ** *** *  *|
>>> ||A|              |__AM__|                               |_____A_M___|   |
>>> +------------------------------------------------------------------------+
>>
>> What are these headers? This one and below, I cannot decipher them at all.
> 
> Ministat histogram. The headers being the label for the charts; it's a
> bit flat so hard to tell it's a histogram.
> 
>>>       N           Min           Max        Median           Avg        Stddev
>>> x  10             5            18            10           9.3     3.6530049
>>> +  10            72           120           108         102.9     15.758243
>>> *  10           255           348           316         305.7      28.74814
>>
>> In micro-seconds? so tasklet is 108us median? Direct submission 316us
>> median?
> 
> Yup, more runs required so you have prettier graphs and units.

Biggest problem for me is that in these tests it is only showing as 
significantly worse than the current tasklet. So it is kind of difficult 
the sell the series. :)

If it only solves issues when submitting from a RT task then it is not 
so interesting. RT tasks are rare and which deal with 3d/media/ocl 
probably even rarer. Or you know of any?

Or perhaps you need to add data from gem_latency as well if that shows 
some wins purely on the i915 side?

Regards,

Tvrtko

>>> And with a background load
>>
>> This is IO background load?
> 
> Yes, background writeout.
> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
  2018-05-18  8:06       ` Tvrtko Ursulin
@ 2018-05-18  8:18         ` Chris Wilson
  2018-05-18 19:36         ` Chris Wilson
  1 sibling, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-18  8:18 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-18 09:06:03)
> 
> On 17/05/2018 18:07, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-05-17 14:13:00)
> >>
> >> On 17/05/2018 08:40, Chris Wilson wrote:
> >>> Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a
> >>> bottom half"), we came to the conclusion that running our CSB processing
> >>> and ELSP submission from inside the irq handler was a bad idea. A really
> >>> bad idea as we could impose nearly 1s latency on other users of the
> >>> system, on average! Deferring our work to a tasklet allowed us to do the
> >>> processing with irqs enabled, reducing the impact to an average of about
> >>> 50us.
> >>>
> >>> We have since eradicated the use of forcewaked mmio from inside the CSB
> >>> processing and ELSP submission, bringing the impact down to around 5us
> >>> (on Kabylake); an order of magnitude better than our measurements 2
> >>> years ago on Broadwell and only about 2x worse on average than the
> >>> gem_syslatency on an unladen system.
> >>>
> >>> Comparing the impact on the maximum latency observed over a 120s interval,
> >>> repeated several times (using gem_syslatency, similar to RT's cyclictest)
> >>> while the system is fully laden with i915 nops, we see that direct
> >>> submission definitely worsens the response but not to the same outlandish
> >>> degree as before.
> >>>
> >>> x Unladen baseline
> >>> + Using tasklet
> >>> * Direct submission
> >>>
> >>> +------------------------------------------------------------------------+
> >>> |xx x          ++    +++ +                           *  * *   ** *** *  *|
> >>> ||A|              |__AM__|                               |_____A_M___|   |
> >>> +------------------------------------------------------------------------+
> >>
> >> What are these headers? This one and below, I cannot decipher them at all.
> > 
> > Ministat histogram. The headers being the label for the charts; it's a
> > bit flat so hard to tell it's a histogram.
> > 
> >>>       N           Min           Max        Median           Avg        Stddev
> >>> x  10             5            18            10           9.3     3.6530049
> >>> +  10            72           120           108         102.9     15.758243
> >>> *  10           255           348           316         305.7      28.74814
> >>
> >> In micro-seconds? so tasklet is 108us median? Direct submission 316us
> >> median?
> > 
> > Yup, more runs required so you have prettier graphs and units.
> 
> Biggest problem for me is that in these tests it is only showing as 
> significantly worse than the current tasklet. So it is kind of difficult 
> the sell the series. :)

Ba! Compared to the previous best state 2 years ago, we're an order
magnitude better. (Unbelievable!)

I think it's simply a lack of a reference point, we're about 100% faster
in some microbenchmark stress igts, about 10% faster in regular stress
igts, and will not suffer some of the 200ms timeout -> wedged!

Any latency is bad, but at what point do we worry about the trade-off
between our latency and the rest of the system?
 
> If it only solves issues when submitting from a RT task then it is not 
> so interesting. RT tasks are rare and which deal with 3d/media/ocl 
> probably even rarer. Or you know of any?

No, it's not just an issue from RT. We see examples of the driver being
so starved we declare the system is wedged, to much more mundane
artifacts of our throughput being diminished due to the latency in
submitting work.
 
> Or perhaps you need to add data from gem_latency as well if that shows 
> some wins purely on the i915 side?

The patch before (process CSB from interrupt handler) is justifiable
just by the bugs it solves. This patch, I think is justifiable by our
perf gains and the impact it has for low latency requirements. Or we can
wait until the next patch is mature, that tries to reduce the lock
contention by splitting the data structures.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
  2018-05-18  8:06       ` Tvrtko Ursulin
  2018-05-18  8:18         ` Chris Wilson
@ 2018-05-18 19:36         ` Chris Wilson
  1 sibling, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-18 19:36 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-18 09:06:03)
> 
> On 17/05/2018 18:07, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-05-17 14:13:00)
> >>
> >> On 17/05/2018 08:40, Chris Wilson wrote:
> >>> Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a
> >>> bottom half"), we came to the conclusion that running our CSB processing
> >>> and ELSP submission from inside the irq handler was a bad idea. A really
> >>> bad idea as we could impose nearly 1s latency on other users of the
> >>> system, on average! Deferring our work to a tasklet allowed us to do the
> >>> processing with irqs enabled, reducing the impact to an average of about
> >>> 50us.
> >>>
> >>> We have since eradicated the use of forcewaked mmio from inside the CSB
> >>> processing and ELSP submission, bringing the impact down to around 5us
> >>> (on Kabylake); an order of magnitude better than our measurements 2
> >>> years ago on Broadwell and only about 2x worse on average than the
> >>> gem_syslatency on an unladen system.
> >>>
> >>> Comparing the impact on the maximum latency observed over a 120s interval,
> >>> repeated several times (using gem_syslatency, similar to RT's cyclictest)
> >>> while the system is fully laden with i915 nops, we see that direct
> >>> submission definitely worsens the response but not to the same outlandish
> >>> degree as before.
> >>>
> >>> x Unladen baseline
> >>> + Using tasklet
> >>> * Direct submission
> >>>
> >>> +------------------------------------------------------------------------+
> >>> |xx x          ++    +++ +                           *  * *   ** *** *  *|
> >>> ||A|              |__AM__|                               |_____A_M___|   |
> >>> +------------------------------------------------------------------------+
> >>
> >> What are these headers? This one and below, I cannot decipher them at all.
> > 
> > Ministat histogram. The headers being the label for the charts; it's a
> > bit flat so hard to tell it's a histogram.
> > 
> >>>       N           Min           Max        Median           Avg        Stddev
> >>> x  10             5            18            10           9.3     3.6530049
> >>> +  10            72           120           108         102.9     15.758243
> >>> *  10           255           348           316         305.7      28.74814
> >>
> >> In micro-seconds? so tasklet is 108us median? Direct submission 316us
> >> median?
> > 
> > Yup, more runs required so you have prettier graphs and units.
> 
> Biggest problem for me is that in these tests it is only showing as 
> significantly worse than the current tasklet. So it is kind of difficult 
> the sell the series. :)

As a reference point, on bdw from before we introduced tasklets:
gem_syslatency: cycles=20849458, latency mean=678.892us max=8308500us
gem_syslatency: cycles=22964600, latency mean=1583.312us max=5991894us
gem_syslatency: cycles=21404190, latency mean=1766.220us max=12423925us
gem_syslatency: cycles=22779405, latency mean=1698.282us max=9110117us
gem_syslatency: cycles=22021655, latency mean=1921.855us max=7193398us

Message from syslogd@broadwell at May 18 18:27:47 ...
 kernel:[ 8991.394488] NMI watchdog: BUG: soft lockup - CPU#2 stuck for
 22s! [gem_syslatency:1800]
 gem_syslatency: cycles=21218381, latency mean=1711.088us max=13932087us
 gem_syslatency: cycles=22189636, latency mean=1724.968us max=15483237us
 gem_syslatency: cycles=21941275, latency mean=1697.738us max=6065099us
 gem_syslatency: cycles=21896529, latency mean=1748.536us max=8007063us
 gem_syslatency: cycles=22053588, latency mean=1677.465us max=6604678us
 gem_syslatency: cycles=21657859, latency mean=1570.085us max=10346811us
 gem_syslatency: cycles=21627270, latency mean=1227.782us max=10489759us
 gem_syslatency: cycles=22441025, latency mean=1635.776us max=5932223us
 gem_syslatency: cycles=21890354, latency mean=1790.042us max=7956373us
 gem_syslatency: cycles=20976620, latency mean=989.215us max=11631632us
 gem_syslatency: cycles=22087242, latency mean=1723.138us max=12337920us
 gem_syslatency: cycles=22800746, latency mean=1749.050us max=7080445us

* at this point, the sata driver ate itself and took out the fs.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
  2018-05-17 13:13   ` Tvrtko Ursulin
  2018-05-17 17:07     ` Chris Wilson
@ 2018-05-18 21:21     ` Chris Wilson
  1 sibling, 0 replies; 42+ messages in thread
From: Chris Wilson @ 2018-05-18 21:21 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-17 14:13:00)
> 
> On 17/05/2018 08:40, Chris Wilson wrote:
> > Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a
> > bottom half"), we came to the conclusion that running our CSB processing
> > and ELSP submission from inside the irq handler was a bad idea. A really
> > bad idea as we could impose nearly 1s latency on other users of the
> > system, on average! Deferring our work to a tasklet allowed us to do the
> > processing with irqs enabled, reducing the impact to an average of about
> > 50us.
> > 
> > We have since eradicated the use of forcewaked mmio from inside the CSB
> > processing and ELSP submission, bringing the impact down to around 5us
> > (on Kabylake); an order of magnitude better than our measurements 2
> > years ago on Broadwell and only about 2x worse on average than the
> > gem_syslatency on an unladen system.
> > 
> > Comparing the impact on the maximum latency observed over a 120s interval,
> > repeated several times (using gem_syslatency, similar to RT's cyclictest)
> > while the system is fully laden with i915 nops, we see that direct
> > submission definitely worsens the response but not to the same outlandish
> > degree as before.
> > 
> > x Unladen baseline
> > + Using tasklet
> > * Direct submission
> > 
> > +------------------------------------------------------------------------+
> > |xx x          ++    +++ +                           *  * *   ** *** *  *|
> > ||A|              |__AM__|                               |_____A_M___|   |
> > +------------------------------------------------------------------------+
> 
> What are these headers? This one and below, I cannot decipher them at all.
> 
> >      N           Min           Max        Median           Avg        Stddev
> > x  10             5            18            10           9.3     3.6530049
> > +  10            72           120           108         102.9     15.758243
> > *  10           255           348           316         305.7      28.74814
> 
> In micro-seconds? so tasklet is 108us median? Direct submission 316us 
> median?

Perspective, ivb ringbuf:

x syslatency-ringbuf-mean.txt
+ syslatency-ringbuf-max.txt
+------------------------------------------------------------------------+
|   xx                                                           +       |
|   xx                                                           +       |
|   xx                                                           +       |
|   xx                                                          ++       |
|  xxxx                                       +  +      +  ++ + ++       |
|x xxxx      +                             +  +  ++  +  ++ ++++ ++++    +|
|  |AM                                         |__________A__M_______|   |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  30         5.395        17.893        13.626     13.135367     2.2143809
+  30            33           169           143     135.86667     25.609445

Using execlists+tasklet is on par with irqoff (i.e. max) latency of
ringbuf. (There should be very little irq disabling for ringbuf.) Normal
average latency of execlists+direct_submission is still better than ivb,
but that is almost entirely generational differences.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2018-05-18 21:21 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-17  7:40 [PATCH 01/19] drm/i915: Move request->ctx aside Chris Wilson
2018-05-17  7:40 ` [PATCH 02/19] drm/i915: Move fiddling with engine->last_retired_context Chris Wilson
2018-05-17  7:40 ` [PATCH 03/19] drm/i915: Store a pointer to intel_context in i915_request Chris Wilson
2018-05-17  7:40 ` [PATCH 04/19] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin Chris Wilson
2018-05-17 10:20   ` Tvrtko Ursulin
2018-05-17 10:35     ` Chris Wilson
2018-05-17  7:40 ` [PATCH 05/19] drm/i915: Be irqsafe inside reset Chris Wilson
2018-05-17 10:27   ` Tvrtko Ursulin
2018-05-17 10:46     ` Chris Wilson
2018-05-17  7:40 ` [PATCH 06/19] drm/i915: Make intel_engine_dump irqsafe Chris Wilson
2018-05-17 10:28   ` Tvrtko Ursulin
2018-05-17  7:40 ` [PATCH 07/19] drm/i915/execlists: Handle copying default context state for atomic reset Chris Wilson
2018-05-17 10:37   ` Tvrtko Ursulin
2018-05-17  7:40 ` [PATCH 08/19] drm/i915: Allow init_breadcrumbs to be used from irq context Chris Wilson
2018-05-17 10:40   ` Tvrtko Ursulin
2018-05-17  7:40 ` [PATCH 09/19] drm/i915/execlists: HWACK checking superseded checking port[0].count Chris Wilson
2018-05-17 10:55   ` Tvrtko Ursulin
2018-05-17 17:03     ` Chris Wilson
2018-05-17  7:40 ` [PATCH 10/19] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler Chris Wilson
2018-05-17 10:58   ` Tvrtko Ursulin
2018-05-17 11:24     ` Chris Wilson
2018-05-17 13:13       ` Tvrtko Ursulin
2018-05-17  7:40 ` [PATCH 11/19] drm/i915/execlists: Double check rpm wakeref Chris Wilson
2018-05-17 11:04   ` Tvrtko Ursulin
2018-05-17  7:40 ` [PATCH 12/19] drm/i915: After reset on sanitization, reset the engine backends Chris Wilson
2018-05-17  7:40 ` [PATCH 13/19] drm/i915/execlists: Reset the CSB head tracking on reset/sanitization Chris Wilson
2018-05-17  7:40 ` [PATCH 14/19] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
2018-05-17  7:40 ` [PATCH 15/19] drm/i915/execlists: Process one CSB interrupt at a time Chris Wilson
2018-05-17  7:40 ` [PATCH 16/19] drm/i915/execlists: Unify CSB access pointers Chris Wilson
2018-05-17  7:40 ` [PATCH 17/19] drm/i915/execlists: Process the CSB directly from inside the irq handler Chris Wilson
2018-05-17  7:40 ` [PATCH 18/19] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd) Chris Wilson
2018-05-17 13:13   ` Tvrtko Ursulin
2018-05-17 17:07     ` Chris Wilson
2018-05-18  8:06       ` Tvrtko Ursulin
2018-05-18  8:18         ` Chris Wilson
2018-05-18 19:36         ` Chris Wilson
2018-05-18 21:21     ` Chris Wilson
2018-05-17  7:40 ` [PATCH 19/19] drm/i915: Combine gt irq ack/handlers Chris Wilson
2018-05-17  8:01 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/19] drm/i915: Move request->ctx aside Patchwork
2018-05-17  8:06 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-05-17  8:16 ` ✓ Fi.CI.BAT: success " Patchwork
2018-05-17 11:05 ` ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.