All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 001/262] drm/i915: Move request->ctx aside
@ 2018-05-17  6:03 Chris Wilson
  2018-05-17  6:03 ` [PATCH 002/262] drm/i915: Move fiddling with engine->last_retired_context Chris Wilson
                   ` (55 more replies)
  0 siblings, 56 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we want to store the intel_context pointer inside
i915_request, as it is frequently access via a convoluted dance when
submitting the request to hw. Having two context pointers inside
i915_request leads to confusion so first rename the existing
i915_gem_context pointer to i915_request.gem_context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c          |  4 +--
 drivers/gpu/drm/i915/i915_debugfs.c           |  4 +--
 drivers/gpu/drm/i915/i915_gem.c               | 10 +++---
 drivers/gpu/drm/i915/i915_gpu_error.c         | 18 ++++++-----
 drivers/gpu/drm/i915/i915_request.c           | 12 +++----
 drivers/gpu/drm/i915/i915_request.h           |  2 +-
 drivers/gpu/drm/i915/i915_trace.h             | 10 +++---
 drivers/gpu/drm/i915/intel_engine_cs.c        |  2 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |  7 +++--
 drivers/gpu/drm/i915/intel_lrc.c              | 31 ++++++++++---------
 drivers/gpu/drm/i915/intel_ringbuffer.c       | 12 +++----
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  5 ++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |  2 +-
 13 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index c2d183b91500..17f9f8d7e148 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -205,7 +205,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 
 static inline bool is_gvt_request(struct i915_request *req)
 {
-	return i915_gem_context_force_single_submission(req->ctx);
+	return i915_gem_context_force_single_submission(req->gem_context);
 }
 
 static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id)
@@ -305,7 +305,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 	struct i915_request *req = workload->req;
 
 	if (IS_KABYLAKE(req->i915) &&
-	    is_inhibit_context(req->ctx, req->engine->id))
+	    is_inhibit_context(req->gem_context, req->engine->id))
 		intel_vgpu_restore_inhibit_context(vgpu, req);
 
 	/* allocate shadow ring buffer */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 13e7b9e4a6e6..ee8e2ff2c426 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -542,8 +542,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 						   struct i915_request,
 						   client_link);
 		rcu_read_lock();
-		task = pid_task(request && request->ctx->pid ?
-				request->ctx->pid : file->pid,
+		task = pid_task(request && request->gem_context->pid ?
+				request->gem_context->pid : file->pid,
 				PIDTYPE_PID);
 		print_file_stats(m, task ? task->comm : "<unknown>", stats);
 		rcu_read_unlock();
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b0fe452ce17c..a20f8db5729d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3067,7 +3067,7 @@ static void skip_request(struct i915_request *request)
 static void engine_skip_context(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct i915_gem_context *hung_ctx = request->ctx;
+	struct i915_gem_context *hung_ctx = request->gem_context;
 	struct i915_timeline *timeline = request->timeline;
 	unsigned long flags;
 
@@ -3077,7 +3077,7 @@ static void engine_skip_context(struct i915_request *request)
 	spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING);
 
 	list_for_each_entry_continue(request, &engine->timeline.requests, link)
-		if (request->ctx == hung_ctx)
+		if (request->gem_context == hung_ctx)
 			skip_request(request);
 
 	list_for_each_entry(request, &timeline->requests, link)
@@ -3123,11 +3123,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 	}
 
 	if (stalled) {
-		i915_gem_context_mark_guilty(request->ctx);
+		i915_gem_context_mark_guilty(request->gem_context);
 		skip_request(request);
 
 		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(request->ctx))
+		if (i915_gem_context_is_banned(request->gem_context))
 			engine_skip_context(request);
 	} else {
 		/*
@@ -3137,7 +3137,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 		 */
 		request = i915_gem_find_active_request(engine);
 		if (request) {
-			i915_gem_context_mark_innocent(request->ctx);
+			i915_gem_context_mark_innocent(request->gem_context);
 			dma_fence_set_error(&request->fence, -EAGAIN);
 
 			/* Rewind the engine to replay the incomplete rq */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index df234dc23274..7cc7d3bc731b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1287,9 +1287,11 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 static void record_request(struct i915_request *request,
 			   struct drm_i915_error_request *erq)
 {
-	erq->context = request->ctx->hw_id;
+	struct i915_gem_context *ctx = request->gem_context;
+
+	erq->context = ctx->hw_id;
 	erq->sched_attr = request->sched.attr;
-	erq->ban_score = atomic_read(&request->ctx->ban_score);
+	erq->ban_score = atomic_read(&ctx->ban_score);
 	erq->seqno = request->global_seqno;
 	erq->jiffies = request->emitted_jiffies;
 	erq->start = i915_ggtt_offset(request->ring->vma);
@@ -1297,7 +1299,7 @@ static void record_request(struct i915_request *request,
 	erq->tail = request->tail;
 
 	rcu_read_lock();
-	erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
+	erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0;
 	rcu_read_unlock();
 }
 
@@ -1461,12 +1463,12 @@ static void gem_record_rings(struct i915_gpu_state *error)
 
 		request = i915_gem_find_active_request(engine);
 		if (request) {
+			struct i915_gem_context *ctx = request->gem_context;
 			struct intel_ring *ring;
 
-			ee->vm = request->ctx->ppgtt ?
-				&request->ctx->ppgtt->base : &ggtt->base;
+			ee->vm = ctx->ppgtt ? &ctx->ppgtt->base : &ggtt->base;
 
-			record_context(&ee->context, request->ctx);
+			record_context(&ee->context, ctx);
 
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
@@ -1483,11 +1485,11 @@ static void gem_record_rings(struct i915_gpu_state *error)
 
 			ee->ctx =
 				i915_error_object_create(i915,
-							 to_intel_context(request->ctx,
+							 to_intel_context(ctx,
 									  engine)->state);
 
 			error->simulated |=
-				i915_gem_context_no_error_capture(request->ctx);
+				i915_gem_context_no_error_capture(ctx);
 
 			ee->rq_head = request->head;
 			ee->rq_post = request->postfix;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8928894dd9c7..fe8810a6a339 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -384,7 +384,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	 */
 	if (engine->last_retired_context)
 		intel_context_unpin(engine->last_retired_context, engine);
-	engine->last_retired_context = rq->ctx;
+	engine->last_retired_context = rq->gem_context;
 }
 
 static void __retire_engine_upto(struct intel_engine_cs *engine,
@@ -455,8 +455,8 @@ static void i915_request_retire(struct i915_request *request)
 	i915_request_remove_from_client(request);
 
 	/* Retirement decays the ban score as it is a sign of ctx progress */
-	atomic_dec_if_positive(&request->ctx->ban_score);
-	intel_context_unpin(request->ctx, request->engine);
+	atomic_dec_if_positive(&request->gem_context->ban_score);
+	intel_context_unpin(request->gem_context, request->engine);
 
 	__retire_engine_upto(request->engine, request);
 
@@ -760,7 +760,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	INIT_LIST_HEAD(&rq->active_list);
 	rq->i915 = i915;
 	rq->engine = engine;
-	rq->ctx = ctx;
+	rq->gem_context = ctx;
 	rq->ring = ring;
 	rq->timeline = ring->timeline;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
@@ -814,7 +814,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		goto err_unwind;
 
 	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(rq->ctx, engine);
+	__intel_context_pin(rq->gem_context, engine);
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
@@ -1113,7 +1113,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule)
-		engine->schedule(request, &request->ctx->sched);
+		engine->schedule(request, &request->gem_context->sched);
 	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index eddbd4245cb3..dddecd9ffd0c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -93,7 +93,7 @@ struct i915_request {
 	 * i915_request_free() will then decrement the refcount on the
 	 * context.
 	 */
-	struct i915_gem_context *ctx;
+	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
 	struct intel_ring *ring;
 	struct i915_timeline *timeline;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 8cc3a256f29d..5d4f78765083 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -624,7 +624,7 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
@@ -651,7 +651,7 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
@@ -696,7 +696,7 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
@@ -727,7 +727,7 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
@@ -815,7 +815,7 @@ TRACE_EVENT(i915_request_wait_begin,
 	     */
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->ring = rq->engine->id;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d4e159ae65a6..faaaf2638bb8 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1020,7 +1020,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
 	 */
 	rq = __i915_gem_active_peek(&engine->timeline.last_request);
 	if (rq)
-		return rq->ctx == kernel_context;
+		return rq->gem_context == kernel_context;
 	else
 		return engine->last_retired_context == kernel_context;
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 637e852888ec..a432a193f3c4 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -513,8 +513,9 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
 	struct intel_guc_client *client = guc->execbuf_client;
 	struct intel_engine_cs *engine = rq->engine;
-	u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(rq->ctx,
-								 engine));
+	u32 ctx_desc =
+		lower_32_bits(intel_lr_context_descriptor(rq->gem_context,
+							  engine));
 	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
 	spin_lock(&client->wq_lock);
@@ -725,7 +726,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 		struct i915_request *rq, *rn;
 
 		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
-			if (last && rq->ctx != last->ctx) {
+			if (last && rq->gem_context != last->gem_context) {
 				if (port == last_port) {
 					__list_del_many(&p->requests,
 							&rq->sched.link);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 646ecf267411..3173dc58a3b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -418,9 +418,10 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 
 static u64 execlists_update_context(struct i915_request *rq)
 {
-	struct intel_context *ce = to_intel_context(rq->ctx, rq->engine);
+	struct intel_context *ce =
+		to_intel_context(rq->gem_context, rq->engine);
 	struct i915_hw_ppgtt *ppgtt =
-		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
+		rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
@@ -681,7 +682,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 			 * second request, and so we never need to tell the
 			 * hardware about the first.
 			 */
-			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+			if (last && !can_merge_ctx(rq->gem_context,
+						   last->gem_context)) {
 				/*
 				 * If we are on the second port and cannot
 				 * combine this request with the last, then we
@@ -700,14 +702,14 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 				 * the same context (even though a different
 				 * request) to the second port.
 				 */
-				if (ctx_single_port_submission(last->ctx) ||
-				    ctx_single_port_submission(rq->ctx)) {
+				if (ctx_single_port_submission(last->gem_context) ||
+				    ctx_single_port_submission(rq->gem_context)) {
 					__list_del_many(&p->requests,
 							&rq->sched.link);
 					goto done;
 				}
 
-				GEM_BUG_ON(last->ctx == rq->ctx);
+				GEM_BUG_ON(last->gem_context == rq->gem_context);
 
 				if (submit)
 					port_assign(port, last);
@@ -1438,7 +1440,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
 static int execlists_request_alloc(struct i915_request *request)
 {
 	struct intel_context *ce =
-		to_intel_context(request->ctx, request->engine);
+		to_intel_context(request->gem_context, request->engine);
 	int ret;
 
 	GEM_BUG_ON(!ce->pin_count);
@@ -1955,7 +1957,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = to_intel_context(request->ctx, engine)->lrc_reg_state;
+	regs = to_intel_context(request->gem_context, engine)->lrc_reg_state;
 	if (engine->default_state) {
 		void *defaults;
 
@@ -1968,7 +1970,8 @@ static void execlists_reset(struct intel_engine_cs *engine,
 			i915_gem_object_unpin_map(engine->default_state);
 		}
 	}
-	execlists_init_reg_state(regs, request->ctx, engine, request->ring);
+	execlists_init_reg_state(regs,
+				 request->gem_context, engine, request->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
 	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
@@ -1990,7 +1993,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 
 static int intel_logical_ring_emit_pdps(struct i915_request *rq)
 {
-	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+	struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
 	struct intel_engine_cs *engine = rq->engine;
 	const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
 	u32 *cs;
@@ -2029,15 +2032,15 @@ static int gen8_emit_bb_start(struct i915_request *rq,
 	 * it is unsafe in case of lite-restore (because the ctx is
 	 * not idle). PML4 is allocated during ppgtt init so this is
 	 * not needed in 48-bit.*/
-	if (rq->ctx->ppgtt &&
-	    (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) &&
-	    !i915_vm_is_48bit(&rq->ctx->ppgtt->base) &&
+	if (rq->gem_context->ppgtt &&
+	    (intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) &&
+	    !i915_vm_is_48bit(&rq->gem_context->ppgtt->base) &&
 	    !intel_vgpu_active(rq->i915)) {
 		ret = intel_logical_ring_emit_pdps(rq);
 		if (ret)
 			return ret;
 
-		rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
+		rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
 	}
 
 	cs = intel_ring_begin(rq, 6);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6f200a747176..53703012ec75 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -571,8 +571,8 @@ static void reset_ring(struct intel_engine_cs *engine,
 	 */
 	if (request) {
 		struct drm_i915_private *dev_priv = request->i915;
-		struct intel_context *ce = to_intel_context(request->ctx,
-							    engine);
+		struct intel_context *ce =
+			to_intel_context(request->gem_context, engine);
 		struct i915_hw_ppgtt *ppgtt;
 
 		if (ce->state) {
@@ -584,7 +584,7 @@ static void reset_ring(struct intel_engine_cs *engine,
 				   CCID_EN);
 		}
 
-		ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
+		ppgtt = request->gem_context->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
 		if (ppgtt) {
 			u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
 
@@ -1458,7 +1458,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 
 	*cs++ = MI_NOOP;
 	*cs++ = MI_SET_CONTEXT;
-	*cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags;
+	*cs++ = i915_ggtt_offset(to_intel_context(rq->gem_context, engine)->state) | flags;
 	/*
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
@@ -1526,7 +1526,7 @@ static int remap_l3(struct i915_request *rq, int slice)
 static int switch_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_gem_context *to_ctx = rq->ctx;
+	struct i915_gem_context *to_ctx = rq->gem_context;
 	struct i915_hw_ppgtt *to_mm =
 		to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	struct i915_gem_context *from_ctx = engine->legacy_active_context;
@@ -1597,7 +1597,7 @@ static int ring_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count);
+	GEM_BUG_ON(!to_intel_context(request->gem_context, request->engine)->pin_count);
 
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 438e0b045a2c..2c4e77c050dc 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -105,7 +105,10 @@ static int emit_recurse_batch(struct hang *h,
 			      struct i915_request *rq)
 {
 	struct drm_i915_private *i915 = h->i915;
-	struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
+	struct i915_address_space *vm =
+		rq->gem_context->ppgtt ?
+		&rq->gem_context->ppgtt->base :
+		&i915->ggtt.base;
 	struct i915_vma *hws, *vma;
 	unsigned int flags;
 	u32 *batch;
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 1b8a07125150..68cb9126b3e1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -83,7 +83,7 @@ static int emit_recurse_batch(struct spinner *spin,
 			      struct i915_request *rq,
 			      u32 arbitration_command)
 {
-	struct i915_address_space *vm = &rq->ctx->ppgtt->base;
+	struct i915_address_space *vm = &rq->gem_context->ppgtt->base;
 	struct i915_vma *hws, *vma;
 	u32 *batch;
 	int err;
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 002/262] drm/i915: Move fiddling with engine->last_retired_context
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 003/262] drm/i915: Store a pointer to intel_context in i915_request Chris Wilson
                   ` (54 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Move the knowledge about resetting the current context tracking on the
engine from inside i915_gem_context.c into intel_engine_cs.c

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 12 ++----------
 drivers/gpu/drm/i915/intel_engine_cs.c  | 23 +++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 4bf18b5c6f1d..9e70f4dfa703 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -514,16 +514,8 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	for_each_engine(engine, dev_priv, id) {
-		engine->legacy_active_context = NULL;
-		engine->legacy_active_ppgtt = NULL;
-
-		if (!engine->last_retired_context)
-			continue;
-
-		intel_context_unpin(engine->last_retired_context, engine);
-		engine->last_retired_context = NULL;
-	}
+	for_each_engine(engine, dev_priv, id)
+		intel_engine_lost_context(engine);
 }
 
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index faaaf2638bb8..f510fa196e17 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1096,6 +1096,29 @@ void intel_engines_unpark(struct drm_i915_private *i915)
 	}
 }
 
+/**
+ * intel_engine_lost_context: called when the GPU is reset into unknown state
+ * @engine: the engine
+ *
+ * We have either reset the GPU or otherwise about to lose state tracking of
+ * the current GPU logical state (e.g. suspend). On next use, it is therefore
+ * imperative that we make no presumptions about the current state and load
+ * from scratch.
+ */
+void intel_engine_lost_context(struct intel_engine_cs *engine)
+{
+	struct i915_gem_context *ctx;
+
+	lockdep_assert_held(&engine->i915->drm.struct_mutex);
+
+	engine->legacy_active_context = NULL;
+	engine->legacy_active_ppgtt = NULL;
+
+	ctx = fetch_and_zero(&engine->last_retired_context);
+	if (ctx)
+		intel_context_unpin(ctx, engine);
+}
+
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 61f385a92484..2b16185e36c4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -1053,6 +1053,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
 bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
+void intel_engine_lost_context(struct intel_engine_cs *engine);
 
 void intel_engines_park(struct drm_i915_private *i915);
 void intel_engines_unpark(struct drm_i915_private *i915);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 003/262] drm/i915: Store a pointer to intel_context in i915_request
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
  2018-05-17  6:03 ` [PATCH 002/262] drm/i915: Move fiddling with engine->last_retired_context Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 004/262] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin Chris Wilson
                   ` (53 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

To ease the frequent and ugly pointer dance of
&request->gem_context->engine[request->engine->id] during request
submission, store that pointer as request->hw_context. One major
advantage that we will exploit later is that this decouples the logical
context state from the engine itself.

v2: Set mock_context->ops so we don't crash and burn in selftests.
    Cleanups from Tvrtko.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gvt/mmio_context.c       |   6 +-
 drivers/gpu/drm/i915/gvt/mmio_context.h       |   2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          | 141 +++++++-----------
 drivers/gpu/drm/i915/gvt/scheduler.h          |   1 -
 drivers/gpu/drm/i915/i915_drv.h               |   1 +
 drivers/gpu/drm/i915/i915_gem.c               |  12 +-
 drivers/gpu/drm/i915/i915_gem_context.c       |  17 ++-
 drivers/gpu/drm/i915/i915_gem_context.h       |  21 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c         |   3 +-
 drivers/gpu/drm/i915/i915_perf.c              |  25 ++--
 drivers/gpu/drm/i915/i915_request.c           |  34 ++---
 drivers/gpu/drm/i915/i915_request.h           |   1 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |  54 ++++---
 drivers/gpu/drm/i915/intel_guc_submission.c   |  10 +-
 drivers/gpu/drm/i915/intel_lrc.c              | 125 +++++++++-------
 drivers/gpu/drm/i915/intel_lrc.h              |   7 -
 drivers/gpu/drm/i915/intel_ringbuffer.c       | 100 ++++++++-----
 drivers/gpu/drm/i915/intel_ringbuffer.h       |   9 +-
 drivers/gpu/drm/i915/selftests/mock_context.c |   7 +
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  41 +++--
 20 files changed, 321 insertions(+), 296 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 0f949554d118..708170e61625 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -446,9 +446,9 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
 
 #define CTX_CONTEXT_CONTROL_VAL	0x03
 
-bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id)
+bool is_inhibit_context(struct intel_context *ce)
 {
-	u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state;
+	const u32 *reg_state = ce->lrc_reg_state;
 	u32 inhibit_mask =
 		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 
@@ -501,7 +501,7 @@ static void switch_mmio(struct intel_vgpu *pre,
 			 * itself.
 			 */
 			if (mmio->in_context &&
-			    !is_inhibit_context(s->shadow_ctx, ring_id))
+			    !is_inhibit_context(&s->shadow_ctx->__engine[ring_id]))
 				continue;
 
 			if (mmio->mask)
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h
index 0439eb8057a8..5c3b9ff9f96a 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.h
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.h
@@ -49,7 +49,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
 
 void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt);
 
-bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id);
+bool is_inhibit_context(struct intel_context *ce);
 
 int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
 				       struct i915_request *req);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 17f9f8d7e148..e1760030dda1 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -54,11 +54,8 @@ static void set_context_pdp_root_pointer(
 
 static void update_shadow_pdps(struct intel_vgpu_workload *workload)
 {
-	struct intel_vgpu *vgpu = workload->vgpu;
-	int ring_id = workload->ring_id;
-	struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
 	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->__engine[ring_id].state->obj;
+		workload->req->hw_context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 
@@ -128,9 +125,8 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
 	int ring_id = workload->ring_id;
-	struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
 	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->__engine[ring_id].state->obj;
+		workload->req->hw_context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *dst;
@@ -280,10 +276,8 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
-		struct intel_engine_cs *engine)
+static void shadow_context_descriptor_update(struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
 	u64 desc = 0;
 
 	desc = ce->lrc_desc;
@@ -292,7 +286,7 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
 	 * like GEN8_CTX_* cached in desc_template
 	 */
 	desc &= U64_MAX << 12;
-	desc |= ctx->desc_template & ((1ULL << 12) - 1);
+	desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
 
 	ce->lrc_desc = desc;
 }
@@ -300,12 +294,11 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
 static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
+	struct i915_request *req = workload->req;
 	void *shadow_ring_buffer_va;
 	u32 *cs;
-	struct i915_request *req = workload->req;
 
-	if (IS_KABYLAKE(req->i915) &&
-	    is_inhibit_context(req->gem_context, req->engine->id))
+	if (IS_KABYLAKE(req->i915) && is_inhibit_context(req->hw_context))
 		intel_vgpu_restore_inhibit_context(vgpu, req);
 
 	/* allocate shadow ring buffer */
@@ -353,60 +346,56 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	struct intel_vgpu_submission *s = &vgpu->submission;
 	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	int ring_id = workload->ring_id;
-	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
-	struct intel_ring *ring;
+	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
+	struct intel_context *ce;
 	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	if (workload->shadowed)
+	if (workload->req)
 		return 0;
 
+	/* pin shadow context by gvt even the shadow context will be pinned
+	 * when i915 alloc request. That is because gvt will update the guest
+	 * context from shadow context when workload is completed, and at that
+	 * moment, i915 may already unpined the shadow context to make the
+	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
+	 * the guest context, gvt can unpin the shadow_ctx safely.
+	 */
+	ce = intel_context_pin(shadow_ctx, engine);
+	if (IS_ERR(ce)) {
+		gvt_vgpu_err("fail to pin shadow context\n");
+		return PTR_ERR(ce);
+	}
+
 	shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
 	shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
 				    GEN8_CTX_ADDRESSING_MODE_SHIFT;
 
-	if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated))
-		shadow_context_descriptor_update(shadow_ctx,
-					dev_priv->engine[ring_id]);
+	if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated))
+		shadow_context_descriptor_update(ce);
 
 	ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
 	if (ret)
-		goto err_scan;
+		goto err_unpin;
 
 	if ((workload->ring_id == RCS) &&
 	    (workload->wa_ctx.indirect_ctx.size != 0)) {
 		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
 		if (ret)
-			goto err_scan;
-	}
-
-	/* pin shadow context by gvt even the shadow context will be pinned
-	 * when i915 alloc request. That is because gvt will update the guest
-	 * context from shadow context when workload is completed, and at that
-	 * moment, i915 may already unpined the shadow context to make the
-	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
-	 * the guest context, gvt can unpin the shadow_ctx safely.
-	 */
-	ring = intel_context_pin(shadow_ctx, engine);
-	if (IS_ERR(ring)) {
-		ret = PTR_ERR(ring);
-		gvt_vgpu_err("fail to pin shadow context\n");
-		goto err_shadow;
+			goto err_shadow;
 	}
 
 	ret = populate_shadow_context(workload);
 	if (ret)
-		goto err_unpin;
-	workload->shadowed = true;
+		goto err_shadow;
+
 	return 0;
 
-err_unpin:
-	intel_context_unpin(shadow_ctx, engine);
 err_shadow:
 	release_shadow_wa_ctx(&workload->wa_ctx);
-err_scan:
+err_unpin:
+	intel_context_unpin(ce);
 	return ret;
 }
 
@@ -414,7 +403,6 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload)
 {
 	int ring_id = workload->ring_id;
 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
 	struct i915_request *rq;
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
@@ -437,7 +425,6 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload)
 	return 0;
 
 err_unpin:
-	intel_context_unpin(shadow_ctx, engine);
 	release_shadow_wa_ctx(&workload->wa_ctx);
 	return ret;
 }
@@ -517,21 +504,13 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 	return ret;
 }
 
-static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
-	struct intel_vgpu_workload *workload = container_of(wa_ctx,
-					struct intel_vgpu_workload,
-					wa_ctx);
-	int ring_id = workload->ring_id;
-	struct intel_vgpu_submission *s = &workload->vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
-	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->__engine[ring_id].state->obj;
-	struct execlist_ring_context *shadow_ring_context;
-	struct page *page;
-
-	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-	shadow_ring_context = kmap_atomic(page);
+	struct intel_vgpu_workload *workload =
+		container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx);
+	struct i915_request *rq = workload->req;
+	struct execlist_ring_context *shadow_ring_context =
+		(struct execlist_ring_context *)rq->hw_context->lrc_reg_state;
 
 	shadow_ring_context->bb_per_ctx_ptr.val =
 		(shadow_ring_context->bb_per_ctx_ptr.val &
@@ -539,9 +518,6 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	shadow_ring_context->rcs_indirect_ctx.val =
 		(shadow_ring_context->rcs_indirect_ctx.val &
 		(~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma;
-
-	kunmap_atomic(shadow_ring_context);
-	return 0;
 }
 
 static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
@@ -670,12 +646,9 @@ static int prepare_workload(struct intel_vgpu_workload *workload)
 static int dispatch_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
-	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	int ring_id = workload->ring_id;
-	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
-	int ret = 0;
+	int ret;
 
 	gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
 		ring_id, workload);
@@ -687,10 +660,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 		goto out;
 
 	ret = prepare_workload(workload);
-	if (ret) {
-		intel_context_unpin(shadow_ctx, engine);
-		goto out;
-	}
 
 out:
 	if (ret)
@@ -765,27 +734,23 @@ static struct intel_vgpu_workload *pick_next_workload(
 
 static void update_guest_context(struct intel_vgpu_workload *workload)
 {
+	struct i915_request *rq = workload->req;
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
-	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
-	int ring_id = workload->ring_id;
-	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->__engine[ring_id].state->obj;
+	struct drm_i915_gem_object *ctx_obj = rq->hw_context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *src;
 	unsigned long context_gpa, context_page_num;
 	int i;
 
-	gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
-			workload->ctx_desc.lrca);
-
-	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
+	gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
+		      workload->ctx_desc.lrca);
 
+	context_page_num = rq->engine->context_size;
 	context_page_num = context_page_num >> PAGE_SHIFT;
 
-	if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
+	if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS)
 		context_page_num = 19;
 
 	i = 2;
@@ -858,6 +823,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 		scheduler->current_workload[ring_id];
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct i915_request *rq;
 	int event;
 
 	mutex_lock(&gvt->lock);
@@ -866,11 +832,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 	 * switch to make sure request is completed.
 	 * For the workload w/o request, directly complete the workload.
 	 */
-	if (workload->req) {
-		struct drm_i915_private *dev_priv =
-			workload->vgpu->gvt->dev_priv;
-		struct intel_engine_cs *engine =
-			dev_priv->engine[workload->ring_id];
+	rq = fetch_and_zero(&workload->req);
+	if (rq) {
 		wait_event(workload->shadow_ctx_status_wq,
 			   !atomic_read(&workload->shadow_ctx_active));
 
@@ -886,8 +849,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 				workload->status = 0;
 		}
 
-		i915_request_put(fetch_and_zero(&workload->req));
-
 		if (!workload->status && !(vgpu->resetting_eng &
 					   ENGINE_MASK(ring_id))) {
 			update_guest_context(workload);
@@ -896,10 +857,13 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 					 INTEL_GVT_EVENT_MAX)
 				intel_vgpu_trigger_virtual_event(vgpu, event);
 		}
-		mutex_lock(&dev_priv->drm.struct_mutex);
+
 		/* unpin shadow ctx as the shadow_ctx update is done */
-		intel_context_unpin(s->shadow_ctx, engine);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
+		mutex_lock(&rq->i915->drm.struct_mutex);
+		intel_context_unpin(rq->hw_context);
+		mutex_unlock(&rq->i915->drm.struct_mutex);
+
+		i915_request_put(rq);
 	}
 
 	gvt_dbg_sched("ring id %d complete workload %p status %d\n",
@@ -1270,7 +1234,6 @@ alloc_workload(struct intel_vgpu *vgpu)
 	atomic_set(&workload->shadow_ctx_active, 0);
 
 	workload->status = -EINPROGRESS;
-	workload->shadowed = false;
 	workload->vgpu = vgpu;
 
 	return workload;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index 6c644782193e..21eddab4a9cd 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -83,7 +83,6 @@ struct intel_vgpu_workload {
 	struct i915_request *req;
 	/* if this workload has been dispatched to i915? */
 	bool dispatched;
-	bool shadowed;
 	int status;
 
 	struct intel_vgpu_mm *shadow_mm;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 34c125e2d90c..e33c380b43e3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1950,6 +1950,7 @@ struct drm_i915_private {
 			 */
 			struct i915_perf_stream *exclusive_stream;
 
+			struct intel_context *pinned_ctx;
 			u32 specific_ctx_id;
 
 			struct hrtimer poll_check_timer;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a20f8db5729d..03874b50ada9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3181,14 +3181,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
 	i915_retire_requests(dev_priv);
 
 	for_each_engine(engine, dev_priv, id) {
-		struct i915_gem_context *ctx;
+		struct intel_context *ce;
 
 		i915_gem_reset_engine(engine,
 				      engine->hangcheck.active_request,
 				      stalled_mask & ENGINE_MASK(id));
-		ctx = fetch_and_zero(&engine->last_retired_context);
-		if (ctx)
-			intel_context_unpin(ctx, engine);
+		ce = fetch_and_zero(&engine->last_retired_context);
+		if (ce)
+			intel_context_unpin(ce);
 
 		/*
 		 * Ostensibily, we always want a context loaded for powersaving,
@@ -4897,13 +4897,13 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 
 static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 {
-	struct i915_gem_context *kernel_context = i915->kernel_context;
+	struct i915_gem_context *kctx = i915->kernel_context;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
 	for_each_engine(engine, i915, id) {
 		GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
-		GEM_BUG_ON(engine->last_retired_context != kernel_context);
+		GEM_BUG_ON(engine->last_retired_context->gem_context != kctx);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9e70f4dfa703..b69b18ef8120 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -127,14 +127,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
 		struct intel_context *ce = &ctx->__engine[n];
 
-		if (!ce->state)
-			continue;
-
-		WARN_ON(ce->pin_count);
-		if (ce->ring)
-			intel_ring_free(ce->ring);
-
-		__i915_gem_object_release_unless_active(ce->state->obj);
+		if (ce->ops)
+			ce->ops->destroy(ce);
 	}
 
 	kfree(ctx->name);
@@ -266,6 +260,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
 {
 	struct i915_gem_context *ctx;
+	unsigned int n;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -283,6 +278,12 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->i915 = dev_priv;
 	ctx->sched.priority = I915_PRIORITY_NORMAL;
 
+	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
+		struct intel_context *ce = &ctx->__engine[n];
+
+		ce->gem_context = ctx;
+	}
+
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index ace3b129c189..749a4ff566f5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -45,6 +45,11 @@ struct intel_ring;
 
 #define DEFAULT_CONTEXT_HANDLE 0
 
+struct intel_context_ops {
+	void (*unpin)(struct intel_context *ce);
+	void (*destroy)(struct intel_context *ce);
+};
+
 /**
  * struct i915_gem_context - client state
  *
@@ -144,11 +149,14 @@ struct i915_gem_context {
 
 	/** engine: per-engine logical HW state */
 	struct intel_context {
+		struct i915_gem_context *gem_context;
 		struct i915_vma *state;
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
 		u64 lrc_desc;
 		int pin_count;
+
+		const struct intel_context_ops *ops;
 	} __engine[I915_NUM_ENGINES];
 
 	/** ring_size: size for allocating the per-engine ring buffer */
@@ -263,25 +271,22 @@ to_intel_context(struct i915_gem_context *ctx,
 	return &ctx->__engine[engine->id];
 }
 
-static inline struct intel_ring *
+static inline struct intel_context *
 intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 {
 	return engine->context_pin(engine, ctx);
 }
 
-static inline void __intel_context_pin(struct i915_gem_context *ctx,
-				       const struct intel_engine_cs *engine)
+static inline void __intel_context_pin(struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
-
 	GEM_BUG_ON(!ce->pin_count);
 	ce->pin_count++;
 }
 
-static inline void intel_context_unpin(struct i915_gem_context *ctx,
-				       struct intel_engine_cs *engine)
+static inline void intel_context_unpin(struct intel_context *ce)
 {
-	engine->context_unpin(engine, ctx);
+	GEM_BUG_ON(!ce->ops);
+	ce->ops->unpin(ce);
 }
 
 /* i915_gem_context.c */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7cc7d3bc731b..145823f0b48e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1485,8 +1485,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
 
 			ee->ctx =
 				i915_error_object_create(i915,
-							 to_intel_context(ctx,
-									  engine)->state);
+							 request->hw_context->state);
 
 			error->simulated |=
 				i915_gem_context_no_error_capture(ctx);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 019bd2d073ad..4f0eb84b3c00 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1221,7 +1221,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
 	} else {
 		struct intel_engine_cs *engine = dev_priv->engine[RCS];
-		struct intel_ring *ring;
+		struct intel_context *ce;
 		int ret;
 
 		ret = i915_mutex_lock_interruptible(&dev_priv->drm);
@@ -1234,19 +1234,19 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		 *
 		 * NB: implied RCS engine...
 		 */
-		ring = intel_context_pin(stream->ctx, engine);
+		ce = intel_context_pin(stream->ctx, engine);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
-		if (IS_ERR(ring))
-			return PTR_ERR(ring);
+		if (IS_ERR(ce))
+			return PTR_ERR(ce);
 
+		dev_priv->perf.oa.pinned_ctx = ce;
 
 		/*
 		 * Explicitly track the ID (instead of calling
 		 * i915_ggtt_offset() on the fly) considering the difference
 		 * with gen8+ and execlists
 		 */
-		dev_priv->perf.oa.specific_ctx_id =
-			i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state);
+		dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
 	}
 
 	return 0;
@@ -1262,17 +1262,14 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct intel_context *ce;
 
-	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
-		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
-	} else {
-		struct intel_engine_cs *engine = dev_priv->engine[RCS];
+	dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
 
+	ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
+	if (ce) {
 		mutex_lock(&dev_priv->drm.struct_mutex);
-
-		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
-		intel_context_unpin(stream->ctx, engine);
-
+		intel_context_unpin(ce);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index fe8810a6a339..fc499bcbd105 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -383,8 +383,8 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	 * the subsequent request.
 	 */
 	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context, engine);
-	engine->last_retired_context = rq->gem_context;
+		intel_context_unpin(engine->last_retired_context);
+	engine->last_retired_context = rq->hw_context;
 }
 
 static void __retire_engine_upto(struct intel_engine_cs *engine,
@@ -456,7 +456,7 @@ static void i915_request_retire(struct i915_request *request)
 
 	/* Retirement decays the ban score as it is a sign of ctx progress */
 	atomic_dec_if_positive(&request->gem_context->ban_score);
-	intel_context_unpin(request->gem_context, request->engine);
+	intel_context_unpin(request->hw_context);
 
 	__retire_engine_upto(request->engine, request);
 
@@ -657,7 +657,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *i915 = engine->i915;
 	struct i915_request *rq;
-	struct intel_ring *ring;
+	struct intel_context *ce;
 	int ret;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -681,22 +681,21 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ring = intel_context_pin(ctx, engine);
-	if (IS_ERR(ring))
-		return ERR_CAST(ring);
-	GEM_BUG_ON(!ring);
+	ce = intel_context_pin(ctx, engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
 
 	ret = reserve_gt(i915);
 	if (ret)
 		goto err_unpin;
 
-	ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST);
+	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
 	if (ret)
 		goto err_unreserve;
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ring->request_list) &&
+	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
+	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
 	    i915_request_completed(rq))
 		i915_request_retire(rq);
 
@@ -761,8 +760,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->i915 = i915;
 	rq->engine = engine;
 	rq->gem_context = ctx;
-	rq->ring = ring;
-	rq->timeline = ring->timeline;
+	rq->hw_context = ce;
+	rq->ring = ce->ring;
+	rq->timeline = ce->ring->timeline;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
 
 	spin_lock_init(&rq->lock);
@@ -814,14 +814,14 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		goto err_unwind;
 
 	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(rq->gem_context, engine);
+	__intel_context_pin(ce);
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
 	return rq;
 
 err_unwind:
-	rq->ring->emit = rq->head;
+	ce->ring->emit = rq->head;
 
 	/* Make sure we didn't add ourselves to external state before freeing */
 	GEM_BUG_ON(!list_empty(&rq->active_list));
@@ -832,7 +832,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 err_unreserve:
 	unreserve_gt(i915);
 err_unpin:
-	intel_context_unpin(ctx, engine);
+	intel_context_unpin(ce);
 	return ERR_PTR(ret);
 }
 
@@ -1018,8 +1018,8 @@ i915_request_await_object(struct i915_request *to,
 void __i915_request_add(struct i915_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct intel_ring *ring = request->ring;
 	struct i915_timeline *timeline = request->timeline;
+	struct intel_ring *ring = request->ring;
 	struct i915_request *prev;
 	u32 *cs;
 	int err;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index dddecd9ffd0c..1bbbb7a9fa03 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -95,6 +95,7 @@ struct i915_request {
 	 */
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
+	struct intel_context *hw_context;
 	struct intel_ring *ring;
 	struct i915_timeline *timeline;
 	struct intel_signal_node signaling;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index f510fa196e17..828b7377d0d0 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -645,6 +645,12 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
 	return 0;
 }
 
+static void __intel_context_unpin(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine)
+{
+	intel_context_unpin(to_intel_context(ctx, engine));
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -658,7 +664,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
  */
 int intel_engine_init_common(struct intel_engine_cs *engine)
 {
-	struct intel_ring *ring;
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_context *ce;
 	int ret;
 
 	engine->set_default_submission(engine);
@@ -670,18 +677,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	 * be available. To avoid this we always pin the default
 	 * context.
 	 */
-	ring = intel_context_pin(engine->i915->kernel_context, engine);
-	if (IS_ERR(ring))
-		return PTR_ERR(ring);
+	ce = intel_context_pin(i915->kernel_context, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
 
 	/*
 	 * Similarly the preempt context must always be available so that
 	 * we can interrupt the engine at any time.
 	 */
-	if (engine->i915->preempt_context) {
-		ring = intel_context_pin(engine->i915->preempt_context, engine);
-		if (IS_ERR(ring)) {
-			ret = PTR_ERR(ring);
+	if (i915->preempt_context) {
+		ce = intel_context_pin(i915->preempt_context, engine);
+		if (IS_ERR(ce)) {
+			ret = PTR_ERR(ce);
 			goto err_unpin_kernel;
 		}
 	}
@@ -690,7 +697,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		goto err_unpin_preempt;
 
-	if (HWS_NEEDS_PHYSICAL(engine->i915))
+	if (HWS_NEEDS_PHYSICAL(i915))
 		ret = init_phys_status_page(engine);
 	else
 		ret = init_status_page(engine);
@@ -702,10 +709,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
-	if (engine->i915->preempt_context)
-		intel_context_unpin(engine->i915->preempt_context, engine);
+	if (i915->preempt_context)
+		__intel_context_unpin(i915->preempt_context, engine);
+
 err_unpin_kernel:
-	intel_context_unpin(engine->i915->kernel_context, engine);
+	__intel_context_unpin(i915->kernel_context, engine);
 	return ret;
 }
 
@@ -718,6 +726,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+
 	intel_engine_cleanup_scratch(engine);
 
 	if (HWS_NEEDS_PHYSICAL(engine->i915))
@@ -732,9 +742,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->default_state)
 		i915_gem_object_put(engine->default_state);
 
-	if (engine->i915->preempt_context)
-		intel_context_unpin(engine->i915->preempt_context, engine);
-	intel_context_unpin(engine->i915->kernel_context, engine);
+	if (i915->preempt_context)
+		__intel_context_unpin(i915->preempt_context, engine);
+	__intel_context_unpin(i915->kernel_context, engine);
 
 	i915_timeline_fini(&engine->timeline);
 }
@@ -1007,8 +1017,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
  */
 bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
 {
-	const struct i915_gem_context * const kernel_context =
-		engine->i915->kernel_context;
+	const struct intel_context *kernel_context =
+		to_intel_context(engine->i915->kernel_context, engine);
 	struct i915_request *rq;
 
 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
@@ -1020,7 +1030,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
 	 */
 	rq = __i915_gem_active_peek(&engine->timeline.last_request);
 	if (rq)
-		return rq->gem_context == kernel_context;
+		return rq->hw_context == kernel_context;
 	else
 		return engine->last_retired_context == kernel_context;
 }
@@ -1107,16 +1117,16 @@ void intel_engines_unpark(struct drm_i915_private *i915)
  */
 void intel_engine_lost_context(struct intel_engine_cs *engine)
 {
-	struct i915_gem_context *ctx;
+	struct intel_context *ce;
 
 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
 	engine->legacy_active_context = NULL;
 	engine->legacy_active_ppgtt = NULL;
 
-	ctx = fetch_and_zero(&engine->last_retired_context);
-	if (ctx)
-		intel_context_unpin(ctx, engine);
+	ce = fetch_and_zero(&engine->last_retired_context);
+	if (ce)
+		intel_context_unpin(ce);
 }
 
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index a432a193f3c4..133367a17863 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -513,9 +513,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
 	struct intel_guc_client *client = guc->execbuf_client;
 	struct intel_engine_cs *engine = rq->engine;
-	u32 ctx_desc =
-		lower_32_bits(intel_lr_context_descriptor(rq->gem_context,
-							  engine));
+	u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
 	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
 	spin_lock(&client->wq_lock);
@@ -553,8 +551,8 @@ static void inject_preempt_context(struct work_struct *work)
 					     preempt_work[engine->id]);
 	struct intel_guc_client *client = guc->preempt_client;
 	struct guc_stage_desc *stage_desc = __get_stage_desc(client);
-	u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner,
-								 engine));
+	u32 ctx_desc = lower_32_bits(to_intel_context(client->owner,
+						      engine)->lrc_desc);
 	u32 data[7];
 
 	/*
@@ -726,7 +724,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 		struct i915_request *rq, *rn;
 
 		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
-			if (last && rq->gem_context != last->gem_context) {
+			if (last && rq->hw_context != last->hw_context) {
 				if (port == last_port) {
 					__list_del_many(&p->requests,
 							&rq->sched.link);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3173dc58a3b3..960948617748 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,7 +164,8 @@
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
-					    struct intel_engine_cs *engine);
+					    struct intel_engine_cs *engine,
+					    struct intel_context *ce);
 static void execlists_init_reg_state(u32 *reg_state,
 				     struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine,
@@ -189,12 +190,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 		!i915_request_completed(last));
 }
 
-/**
- * intel_lr_context_descriptor_update() - calculate & cache the descriptor
- * 					  descriptor for a pinned context
- * @ctx: Context to work on
- * @engine: Engine the descriptor will be used with
- *
+/*
  * The context descriptor encodes various attributes of a context,
  * including its GTT address and some flags. Because it's fairly
  * expensive to calculate, we'll just do it once and cache the result,
@@ -222,9 +218,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
  */
 static void
 intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
-				   struct intel_engine_cs *engine)
+				   struct intel_engine_cs *engine,
+				   struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
 	u64 desc;
 
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -418,8 +414,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 
 static u64 execlists_update_context(struct i915_request *rq)
 {
-	struct intel_context *ce =
-		to_intel_context(rq->gem_context, rq->engine);
+	struct intel_context *ce = rq->hw_context;
 	struct i915_hw_ppgtt *ppgtt =
 		rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
@@ -496,14 +491,14 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
-static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
+static bool ctx_single_port_submission(const struct intel_context *ce)
 {
 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
-		i915_gem_context_force_single_submission(ctx));
+		i915_gem_context_force_single_submission(ce->gem_context));
 }
 
-static bool can_merge_ctx(const struct i915_gem_context *prev,
-			  const struct i915_gem_context *next)
+static bool can_merge_ctx(const struct intel_context *prev,
+			  const struct intel_context *next)
 {
 	if (prev != next)
 		return false;
@@ -682,8 +677,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 			 * second request, and so we never need to tell the
 			 * hardware about the first.
 			 */
-			if (last && !can_merge_ctx(rq->gem_context,
-						   last->gem_context)) {
+			if (last &&
+			    !can_merge_ctx(rq->hw_context, last->hw_context)) {
 				/*
 				 * If we are on the second port and cannot
 				 * combine this request with the last, then we
@@ -702,14 +697,14 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 				 * the same context (even though a different
 				 * request) to the second port.
 				 */
-				if (ctx_single_port_submission(last->gem_context) ||
-				    ctx_single_port_submission(rq->gem_context)) {
+				if (ctx_single_port_submission(last->hw_context) ||
+				    ctx_single_port_submission(rq->hw_context)) {
 					__list_del_many(&p->requests,
 							&rq->sched.link);
 					goto done;
 				}
 
-				GEM_BUG_ON(last->gem_context == rq->gem_context);
+				GEM_BUG_ON(last->hw_context == rq->hw_context);
 
 				if (submit)
 					port_assign(port, last);
@@ -1340,6 +1335,37 @@ static void execlists_schedule(struct i915_request *request,
 	spin_unlock_irq(&engine->timeline.lock);
 }
 
+static void execlists_context_destroy(struct intel_context *ce)
+{
+	GEM_BUG_ON(!ce->state);
+	GEM_BUG_ON(ce->pin_count);
+
+	intel_ring_free(ce->ring);
+	__i915_gem_object_release_unless_active(ce->state->obj);
+}
+
+static void __execlists_context_unpin(struct intel_context *ce)
+{
+	intel_ring_unpin(ce->ring);
+
+	ce->state->obj->pin_global--;
+	i915_gem_object_unpin_map(ce->state->obj);
+	i915_vma_unpin(ce->state);
+
+	i915_gem_context_put(ce->gem_context);
+}
+
+static void execlists_context_unpin(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
+	GEM_BUG_ON(ce->pin_count == 0);
+
+	if (--ce->pin_count)
+		return;
+
+	__execlists_context_unpin(ce);
+}
+
 static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 {
 	unsigned int flags;
@@ -1363,21 +1389,15 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	return i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, flags);
 }
 
-static struct intel_ring *
-execlists_context_pin(struct intel_engine_cs *engine,
-		      struct i915_gem_context *ctx)
+static struct intel_context *
+__execlists_context_pin(struct intel_engine_cs *engine,
+			struct i915_gem_context *ctx,
+			struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
 	void *vaddr;
 	int ret;
 
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-
-	if (likely(ce->pin_count++))
-		goto out;
-	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
-
-	ret = execlists_context_deferred_alloc(ctx, engine);
+	ret = execlists_context_deferred_alloc(ctx, engine, ce);
 	if (ret)
 		goto err;
 	GEM_BUG_ON(!ce->state);
@@ -1396,7 +1416,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
 	if (ret)
 		goto unpin_map;
 
-	intel_lr_context_descriptor_update(ctx, engine);
+	intel_lr_context_descriptor_update(ctx, engine, ce);
 
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
@@ -1405,8 +1425,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
 
 	ce->state->obj->pin_global++;
 	i915_gem_context_get(ctx);
-out:
-	return ce->ring;
+	return ce;
 
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
@@ -1417,33 +1436,33 @@ execlists_context_pin(struct intel_engine_cs *engine,
 	return ERR_PTR(ret);
 }
 
-static void execlists_context_unpin(struct intel_engine_cs *engine,
-				    struct i915_gem_context *ctx)
+static const struct intel_context_ops execlists_context_ops = {
+	.unpin = execlists_context_unpin,
+	.destroy = execlists_context_destroy,
+};
+
+static struct intel_context *
+execlists_context_pin(struct intel_engine_cs *engine,
+		      struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-	GEM_BUG_ON(ce->pin_count == 0);
 
-	if (--ce->pin_count)
-		return;
-
-	intel_ring_unpin(ce->ring);
+	if (likely(ce->pin_count++))
+		return ce;
+	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
-	ce->state->obj->pin_global--;
-	i915_gem_object_unpin_map(ce->state->obj);
-	i915_vma_unpin(ce->state);
+	ce->ops = &execlists_context_ops;
 
-	i915_gem_context_put(ctx);
+	return __execlists_context_pin(engine, ctx, ce);
 }
 
 static int execlists_request_alloc(struct i915_request *request)
 {
-	struct intel_context *ce =
-		to_intel_context(request->gem_context, request->engine);
 	int ret;
 
-	GEM_BUG_ON(!ce->pin_count);
+	GEM_BUG_ON(!request->hw_context->pin_count);
 
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
@@ -1957,7 +1976,7 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = to_intel_context(request->gem_context, engine)->lrc_reg_state;
+	regs = request->hw_context->lrc_reg_state;
 	if (engine->default_state) {
 		void *defaults;
 
@@ -2328,8 +2347,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->reset.finish = execlists_reset_finish;
 
 	engine->context_pin = execlists_context_pin;
-	engine->context_unpin = execlists_context_unpin;
-
 	engine->request_alloc = execlists_request_alloc;
 
 	engine->emit_flush = gen8_emit_flush;
@@ -2564,7 +2581,7 @@ static void execlists_init_reg_state(u32 *regs,
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
 	u32 base = engine->mmio_base;
-	bool rcs = engine->id == RCS;
+	bool rcs = engine->class == RENDER_CLASS;
 
 	/* A context is actually a big batch buffer with several
 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
@@ -2711,10 +2728,10 @@ populate_lr_context(struct i915_gem_context *ctx,
 }
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
-					    struct intel_engine_cs *engine)
+					    struct intel_engine_cs *engine,
+					    struct intel_context *ce)
 {
 	struct drm_i915_gem_object *ctx_obj;
-	struct intel_context *ce = to_intel_context(ctx, engine);
 	struct i915_vma *vma;
 	uint32_t context_size;
 	struct intel_ring *ring;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4ec7d8dd13c8..1593194e930c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,11 +104,4 @@ struct i915_gem_context;
 
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 
-static inline uint64_t
-intel_lr_context_descriptor(struct i915_gem_context *ctx,
-			    struct intel_engine_cs *engine)
-{
-	return to_intel_context(ctx, engine)->lrc_desc;
-}
-
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 53703012ec75..0c0c9f531e4e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -571,8 +571,7 @@ static void reset_ring(struct intel_engine_cs *engine,
 	 */
 	if (request) {
 		struct drm_i915_private *dev_priv = request->i915;
-		struct intel_context *ce =
-			to_intel_context(request->gem_context, engine);
+		struct intel_context *ce = request->hw_context;
 		struct i915_hw_ppgtt *ppgtt;
 
 		if (ce->state) {
@@ -1186,7 +1185,31 @@ intel_ring_free(struct intel_ring *ring)
 	kfree(ring);
 }
 
-static int context_pin(struct intel_context *ce)
+static void intel_ring_context_destroy(struct intel_context *ce)
+{
+	GEM_BUG_ON(ce->pin_count);
+
+	if (ce->state)
+		__i915_gem_object_release_unless_active(ce->state->obj);
+}
+
+static void intel_ring_context_unpin(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
+	GEM_BUG_ON(ce->pin_count == 0);
+
+	if (--ce->pin_count)
+		return;
+
+	if (ce->state) {
+		ce->state->obj->pin_global--;
+		i915_vma_unpin(ce->state);
+	}
+
+	i915_gem_context_put(ce->gem_context);
+}
+
+static int __context_pin(struct intel_context *ce)
 {
 	struct i915_vma *vma = ce->state;
 	int ret;
@@ -1275,25 +1298,19 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	return ERR_PTR(err);
 }
 
-static struct intel_ring *
-intel_ring_context_pin(struct intel_engine_cs *engine,
-		       struct i915_gem_context *ctx)
+static struct intel_context *
+__ring_context_pin(struct intel_engine_cs *engine,
+		   struct i915_gem_context *ctx,
+		   struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
-	int ret;
-
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-
-	if (likely(ce->pin_count++))
-		goto out;
-	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
+	int err;
 
 	if (!ce->state && engine->context_size) {
 		struct i915_vma *vma;
 
 		vma = alloc_context_vma(engine);
 		if (IS_ERR(vma)) {
-			ret = PTR_ERR(vma);
+			err = PTR_ERR(vma);
 			goto err;
 		}
 
@@ -1301,8 +1318,8 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
 	}
 
 	if (ce->state) {
-		ret = context_pin(ce);
-		if (ret)
+		err = __context_pin(ce);
+		if (err)
 			goto err;
 
 		ce->state->obj->pin_global++;
@@ -1310,32 +1327,37 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
 
 	i915_gem_context_get(ctx);
 
-out:
 	/* One ringbuffer to rule them all */
-	return engine->buffer;
+	GEM_BUG_ON(!engine->buffer);
+	ce->ring = engine->buffer;
+
+	return ce;
 
 err:
 	ce->pin_count = 0;
-	return ERR_PTR(ret);
+	return ERR_PTR(err);
 }
 
-static void intel_ring_context_unpin(struct intel_engine_cs *engine,
-				     struct i915_gem_context *ctx)
+static const struct intel_context_ops ring_context_ops = {
+	.unpin = intel_ring_context_unpin,
+	.destroy = intel_ring_context_destroy,
+};
+
+static struct intel_context *
+intel_ring_context_pin(struct intel_engine_cs *engine,
+		       struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-	GEM_BUG_ON(ce->pin_count == 0);
 
-	if (--ce->pin_count)
-		return;
+	if (likely(ce->pin_count++))
+		return ce;
+	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
-	if (ce->state) {
-		ce->state->obj->pin_global--;
-		i915_vma_unpin(ce->state);
-	}
+	ce->ops = &ring_context_ops;
 
-	i915_gem_context_put(ctx);
+	return __ring_context_pin(engine, ctx, ce);
 }
 
 static int intel_init_ring_buffer(struct intel_engine_cs *engine)
@@ -1346,10 +1368,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 
 	intel_engine_setup_common(engine);
 
-	err = intel_engine_init_common(engine);
-	if (err)
-		goto err;
-
 	timeline = i915_timeline_create(engine->i915, engine->name);
 	if (IS_ERR(timeline)) {
 		err = PTR_ERR(timeline);
@@ -1371,8 +1389,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	GEM_BUG_ON(engine->buffer);
 	engine->buffer = ring;
 
+	err = intel_engine_init_common(engine);
+	if (err)
+		goto err_unpin;
+
 	return 0;
 
+err_unpin:
+	intel_ring_unpin(ring);
 err_ring:
 	intel_ring_free(ring);
 err:
@@ -1458,7 +1482,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 
 	*cs++ = MI_NOOP;
 	*cs++ = MI_SET_CONTEXT;
-	*cs++ = i915_ggtt_offset(to_intel_context(rq->gem_context, engine)->state) | flags;
+	*cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
 	/*
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
@@ -1549,7 +1573,7 @@ static int switch_context(struct i915_request *rq)
 		hw_flags = MI_FORCE_RESTORE;
 	}
 
-	if (to_intel_context(to_ctx, engine)->state &&
+	if (rq->hw_context->state &&
 	    (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) {
 		GEM_BUG_ON(engine->id != RCS);
 
@@ -1597,7 +1621,7 @@ static int ring_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!to_intel_context(request->gem_context, request->engine)->pin_count);
+	GEM_BUG_ON(!request->hw_context->pin_count);
 
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
@@ -2028,8 +2052,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->reset.finish = reset_finish;
 
 	engine->context_pin = intel_ring_context_pin;
-	engine->context_unpin = intel_ring_context_unpin;
-
 	engine->request_alloc = ring_request_alloc;
 
 	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2b16185e36c4..20c4e13efc0d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -436,10 +436,9 @@ struct intel_engine_cs {
 
 	void		(*set_default_submission)(struct intel_engine_cs *engine);
 
-	struct intel_ring *(*context_pin)(struct intel_engine_cs *engine,
-					  struct i915_gem_context *ctx);
-	void		(*context_unpin)(struct intel_engine_cs *engine,
-					 struct i915_gem_context *ctx);
+	struct intel_context *(*context_pin)(struct intel_engine_cs *engine,
+					     struct i915_gem_context *ctx);
+
 	int		(*request_alloc)(struct i915_request *rq);
 	int		(*init_context)(struct i915_request *rq);
 
@@ -555,7 +554,7 @@ struct intel_engine_cs {
 	 * to the kernel context and trash it as the save may not happen
 	 * before the hardware is powered down.
 	 */
-	struct i915_gem_context *last_retired_context;
+	struct intel_context *last_retired_context;
 
 	/* We track the current MI_SET_CONTEXT in order to eliminate
 	 * redudant context switches. This presumes that requests are not
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 501becc47c0c..8904f1ce64e3 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915,
 	     const char *name)
 {
 	struct i915_gem_context *ctx;
+	unsigned int n;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -43,6 +44,12 @@ mock_context(struct drm_i915_private *i915,
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
 
+	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
+		struct intel_context *ce = &ctx->__engine[n];
+
+		ce->gem_context = ctx;
+	}
+
 	ret = ida_simple_get(&i915->contexts.hw_ida,
 			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
 	if (ret < 0)
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 26bf29d97007..33eddfc1f8ce 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -72,25 +72,37 @@ static void hw_delay_complete(struct timer_list *t)
 	spin_unlock(&engine->hw_lock);
 }
 
-static struct intel_ring *
-mock_context_pin(struct intel_engine_cs *engine,
-		 struct i915_gem_context *ctx)
+static void mock_context_unpin(struct intel_context *ce)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	if (--ce->pin_count)
+		return;
 
-	if (!ce->pin_count++)
-		i915_gem_context_get(ctx);
+	i915_gem_context_put(ce->gem_context);
+}
 
-	return engine->buffer;
+static void mock_context_destroy(struct intel_context *ce)
+{
+	GEM_BUG_ON(ce->pin_count);
 }
 
-static void mock_context_unpin(struct intel_engine_cs *engine,
-			       struct i915_gem_context *ctx)
+static const struct intel_context_ops mock_context_ops = {
+	.unpin = mock_context_unpin,
+	.destroy = mock_context_destroy,
+};
+
+static struct intel_context *
+mock_context_pin(struct intel_engine_cs *engine,
+		 struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
 
-	if (!--ce->pin_count)
-		i915_gem_context_put(ctx);
+	if (!ce->pin_count++) {
+		i915_gem_context_get(ctx);
+		ce->ring = engine->buffer;
+		ce->ops = &mock_context_ops;
+	}
+
+	return ce;
 }
 
 static int mock_request_alloc(struct i915_request *request)
@@ -185,7 +197,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.status_page.page_addr = (void *)(engine + 1);
 
 	engine->base.context_pin = mock_context_pin;
-	engine->base.context_unpin = mock_context_unpin;
 	engine->base.request_alloc = mock_request_alloc;
 	engine->base.emit_flush = mock_emit_flush;
 	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
@@ -238,11 +249,13 @@ void mock_engine_free(struct intel_engine_cs *engine)
 {
 	struct mock_engine *mock =
 		container_of(engine, typeof(*mock), base);
+	struct intel_context *ce;
 
 	GEM_BUG_ON(timer_pending(&mock->hw_delay));
 
-	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context, engine);
+	ce = fetch_and_zero(&engine->last_retired_context);
+	if (ce)
+		intel_context_unpin(ce);
 
 	mock_ring_free(engine->buffer);
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 004/262] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
  2018-05-17  6:03 ` [PATCH 002/262] drm/i915: Move fiddling with engine->last_retired_context Chris Wilson
  2018-05-17  6:03 ` [PATCH 003/262] drm/i915: Store a pointer to intel_context in i915_request Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 005/262] drm/i915: Be irqsafe inside reset Chris Wilson
                   ` (52 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

As all backends implement the same pin_count mechanism and do a
dec-and-test as their first step, pull that into the common
intel_context_unpin(). This also pulls into the caller, eliminating the
indirect call in the usual steady state case. The intel_context_pin()
side is a little more complicated as it combines the lookup/alloc as
well as pinning the state, and so is left for a later date.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.h      |  4 ++++
 drivers/gpu/drm/i915/intel_lrc.c             | 13 +------------
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  6 ------
 drivers/gpu/drm/i915/selftests/mock_engine.c |  3 ---
 4 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 749a4ff566f5..c3262b4dd2ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -285,6 +285,10 @@ static inline void __intel_context_pin(struct intel_context *ce)
 
 static inline void intel_context_unpin(struct intel_context *ce)
 {
+	GEM_BUG_ON(!ce->pin_count);
+	if (--ce->pin_count)
+		return;
+
 	GEM_BUG_ON(!ce->ops);
 	ce->ops->unpin(ce);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 960948617748..f3470b95d64e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1344,7 +1344,7 @@ static void execlists_context_destroy(struct intel_context *ce)
 	__i915_gem_object_release_unless_active(ce->state->obj);
 }
 
-static void __execlists_context_unpin(struct intel_context *ce)
+static void execlists_context_unpin(struct intel_context *ce)
 {
 	intel_ring_unpin(ce->ring);
 
@@ -1355,17 +1355,6 @@ static void __execlists_context_unpin(struct intel_context *ce)
 	i915_gem_context_put(ce->gem_context);
 }
 
-static void execlists_context_unpin(struct intel_context *ce)
-{
-	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
-	GEM_BUG_ON(ce->pin_count == 0);
-
-	if (--ce->pin_count)
-		return;
-
-	__execlists_context_unpin(ce);
-}
-
 static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 {
 	unsigned int flags;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0c0c9f531e4e..001cf6bcb349 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1195,12 +1195,6 @@ static void intel_ring_context_destroy(struct intel_context *ce)
 
 static void intel_ring_context_unpin(struct intel_context *ce)
 {
-	lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex);
-	GEM_BUG_ON(ce->pin_count == 0);
-
-	if (--ce->pin_count)
-		return;
-
 	if (ce->state) {
 		ce->state->obj->pin_global--;
 		i915_vma_unpin(ce->state);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 33eddfc1f8ce..f1ac7453053e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -74,9 +74,6 @@ static void hw_delay_complete(struct timer_list *t)
 
 static void mock_context_unpin(struct intel_context *ce)
 {
-	if (--ce->pin_count)
-		return;
-
 	i915_gem_context_put(ce->gem_context);
 }
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 005/262] drm/i915: Be irqsafe inside reset
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (2 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 004/262] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 006/262] drm/i915: Make intel_engine_dump irqsafe Chris Wilson
                   ` (51 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

As we want to be able to call i915_reset_engine and co from a softirq or
timer context, we need to be irqsafe at all timers. So we have to forgo
the simple spin_lock_irq for the full spin_lock_irqsave.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 03874b50ada9..a3885adec78a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3137,15 +3137,17 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 		 */
 		request = i915_gem_find_active_request(engine);
 		if (request) {
+			unsigned long flags;
+
 			i915_gem_context_mark_innocent(request->gem_context);
 			dma_fence_set_error(&request->fence, -EAGAIN);
 
 			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irq(&engine->timeline.lock);
+			spin_lock_irqsave(&engine->timeline.lock, flags);
 			request = list_prev_entry(request, link);
 			if (&request->link == &engine->timeline.requests)
 				request = NULL;
-			spin_unlock_irq(&engine->timeline.lock);
+			spin_unlock_irqrestore(&engine->timeline.lock, flags);
 		}
 	}
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 006/262] drm/i915: Make intel_engine_dump irqsafe
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (3 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 005/262] drm/i915: Be irqsafe inside reset Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 007/262] drm/i915/execlists: Handle copying default context state for atomic reset Chris Wilson
                   ` (50 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

To be useful later, enable intel_engine_dump() to be called from irq
context (i.e. using saving and restoring irq start rather than assuming
we enter with irqs enabled).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 828b7377d0d0..333318b340e1 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1358,6 +1358,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_request *rq, *last;
+	unsigned long flags;
 	struct rb_node *rb;
 	int count;
 
@@ -1424,7 +1425,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
 	}
 
-	spin_lock_irq(&engine->timeline.lock);
+	local_irq_save(flags);
+	spin_lock(&engine->timeline.lock);
 
 	last = NULL;
 	count = 0;
@@ -1466,16 +1468,17 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		print_request(m, last, "\t\tQ ");
 	}
 
-	spin_unlock_irq(&engine->timeline.lock);
+	spin_unlock(&engine->timeline.lock);
 
-	spin_lock_irq(&b->rb_lock);
+	spin_lock(&b->rb_lock);
 	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
 		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
 
 		drm_printf(m, "\t%s [%d] waiting for %x\n",
 			   w->tsk->comm, w->tsk->pid, w->seqno);
 	}
-	spin_unlock_irq(&b->rb_lock);
+	spin_unlock(&b->rb_lock);
+	local_irq_restore(flags);
 
 	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
 		   engine->irq_posted,
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 007/262] drm/i915/execlists: Handle copying default context state for atomic reset
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (4 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 006/262] drm/i915: Make intel_engine_dump irqsafe Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 008/262] drm/i915: Allow init_breadcrumbs to be used from irq context Chris Wilson
                   ` (49 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

We want to be able to reset the GPU from inside a timer callback
(hardirq context). One step requires us to copy the default context
state over to the guilty context, which means we need to plan in advance
to have that object accessible from within an atomic context. The atomic
context prevents us from pinning the object or in peeking into the
shmemfs backing store (all may sleep), so we choose to pin the
default_state into memory when the engine becomes active. This
compromise allows us to swap out the default state when idle, if
required.

References: 5692251c254a ("drm/i915/lrc: Scrub the GPU state of the guilty hanging request")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  | 15 +++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 15 ++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 333318b340e1..b1a1ca0758ce 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1082,6 +1082,11 @@ void intel_engines_park(struct drm_i915_private *i915)
 		if (engine->park)
 			engine->park(engine);
 
+		if (engine->pinned_default_state) {
+			i915_gem_object_unpin_map(engine->default_state);
+			engine->pinned_default_state = NULL;
+		}
+
 		i915_gem_batch_pool_fini(&engine->batch_pool);
 		engine->execlists.no_priolist = false;
 	}
@@ -1099,6 +1104,16 @@ void intel_engines_unpark(struct drm_i915_private *i915)
 	enum intel_engine_id id;
 
 	for_each_engine(engine, i915, id) {
+		void *map;
+
+		/* Pin the default state for fast resets from atomic context. */
+		map = NULL;
+		if (engine->default_state)
+			map = i915_gem_object_pin_map(engine->default_state,
+						      I915_MAP_WB);
+		if (!IS_ERR_OR_NULL(map))
+			engine->pinned_default_state = map;
+
 		if (engine->unpark)
 			engine->unpark(engine);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f3470b95d64e..49283b3d3ebb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1966,17 +1966,10 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * to recreate its own state.
 	 */
 	regs = request->hw_context->lrc_reg_state;
-	if (engine->default_state) {
-		void *defaults;
-
-		defaults = i915_gem_object_pin_map(engine->default_state,
-						   I915_MAP_WB);
-		if (!IS_ERR(defaults)) {
-			memcpy(regs, /* skip restoring the vanilla PPHWSP */
-			       defaults + LRC_STATE_PN * PAGE_SIZE,
-			       engine->context_size - PAGE_SIZE);
-			i915_gem_object_unpin_map(engine->default_state);
-		}
+	if (engine->pinned_default_state) {
+		memcpy(regs, /* skip restoring the vanilla PPHWSP */
+		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+		       engine->context_size - PAGE_SIZE);
 	}
 	execlists_init_reg_state(regs,
 				 request->gem_context, engine, request->ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 20c4e13efc0d..acef385c4c80 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -342,6 +342,7 @@ struct intel_engine_cs {
 	struct i915_timeline timeline;
 
 	struct drm_i915_gem_object *default_state;
+	void *pinned_default_state;
 
 	atomic_t irq_count;
 	unsigned long irq_posted;
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 008/262] drm/i915: Allow init_breadcrumbs to be used from irq context
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (5 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 007/262] drm/i915/execlists: Handle copying default context state for atomic reset Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 009/262] drm/i915/execlists: HWACK checking superseded checking port[0].count Chris Wilson
                   ` (48 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

In order to support engine reset from irq (timer) context, we need to be
able to re-initialise the breadcrumbs. So we need to promote the plain
spin_lock_irq to a safe spin_lock_irqsave.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 18e643df523e..86a987b8ac66 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -846,8 +846,9 @@ static void cancel_fake_irq(struct intel_engine_cs *engine)
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned long flags;
 
-	spin_lock_irq(&b->irq_lock);
+	spin_lock_irqsave(&b->irq_lock, flags);
 
 	/*
 	 * Leave the fake_irq timer enabled (if it is running), but clear the
@@ -871,7 +872,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 	 */
 	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
-	spin_unlock_irq(&b->irq_lock);
+	spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 009/262] drm/i915/execlists: HWACK checking superseded checking port[0].count
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (6 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 008/262] drm/i915: Allow init_breadcrumbs to be used from irq context Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 010/262] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler Chris Wilson
                   ` (47 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

The HWACK bit more generically solves the problem of resubmitting ESLP
while the hardware is still processing the current ELSP write. We no
longer need to check port[0].count itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 49283b3d3ebb..857ab04452f0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -608,8 +608,6 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		GEM_BUG_ON(!execlists_is_active(execlists,
 						EXECLISTS_ACTIVE_USER));
 		GEM_BUG_ON(!port_count(&port[0]));
-		if (port_count(&port[0]) > 1)
-			return false;
 
 		/*
 		 * If we write to ELSP a second time before the HW has had
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 010/262] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (7 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 009/262] drm/i915/execlists: HWACK checking superseded checking port[0].count Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 011/262] drm/i915/execlists: Double check rpm wakeref Chris Wilson
                   ` (46 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Store whether or not we need to kick the guc's execlists emulation on
the engine itself to avoid chasing the device info.

gen8_cs_irq_handler                          512     428     -84

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c             | 4 +++-
 drivers/gpu/drm/i915/intel_guc_submission.c | 1 +
 drivers/gpu/drm/i915/intel_lrc.c            | 1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h     | 7 +++++++
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f9bc3aaa90d0..460878572515 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1472,8 +1472,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
+		if (intel_engine_uses_guc(engine))
+			tasklet = true;
+
 		notify_ring(engine);
-		tasklet |= USES_GUC_SUBMISSION(engine->i915);
 	}
 
 	if (tasklet)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 133367a17863..d9fcd5db4ea4 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -1312,6 +1312,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 		engine->unpark = guc_submission_unpark;
 
 		engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
+		engine->flags |= I915_ENGINE_USES_GUC;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 857ab04452f0..4928e9ad7826 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2305,6 +2305,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->park = NULL;
 	engine->unpark = NULL;
 
+	engine->flags &= ~I915_ENGINE_USES_GUC;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (engine->i915->preempt_context)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index acef385c4c80..4ad9c5842575 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -574,6 +574,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
+#define I915_ENGINE_USES_GUC         BIT(3)
 	unsigned int flags;
 
 	/*
@@ -651,6 +652,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
 }
 
+static inline bool
+intel_engine_uses_guc(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_USES_GUC;
+}
+
 static inline bool __execlists_need_preempt(int prio, int last)
 {
 	return prio > max(0, last);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 011/262] drm/i915/execlists: Double check rpm wakeref
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (8 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 010/262] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 012/262] drm/i915: After reset on sanitization, reset the engine backends Chris Wilson
                   ` (45 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

As we are splitting processing the CSB events from submitting the ELSP,
we also need to duplicate the check that we hold a device wakeref for our
hardware access to the disjoint locations.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4928e9ad7826..6d3b03299b0c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -449,6 +449,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	struct execlist_port *port = execlists->port;
 	unsigned int n;
 
+	/*
+	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
+	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
+	 * not be relinquished until the device is idle (see
+	 * i915_gem_idle_work_handler()). As a precaution, we make sure
+	 * that all ELSP are drained i.e. we have processed the CSB,
+	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
+	 */
+	GEM_BUG_ON(!engine->i915->gt.awake);
+
 	/*
 	 * ELSQ note: the submit queue is not cleared after being submitted
 	 * to the HW so we need to make sure we always clean it up. This is
@@ -959,6 +969,12 @@ static void process_csb(struct intel_engine_cs *engine)
 	struct drm_i915_private *i915 = engine->i915;
 	bool fw = false;
 
+	/*
+	 * We must never release our device wakeref until after we have
+	 * finished processing all potential interrupts from the hardware.
+	 */
+	GEM_BUG_ON(!engine->i915->gt.awake);
+
 	do {
 		/* The HWSP contains a (cacheable) mirror of the CSB */
 		const u32 *buf =
@@ -1139,16 +1155,6 @@ static void execlists_submission_tasklet(unsigned long data)
 		  engine->execlists.active,
 		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
 
-	/*
-	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
-	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
-	 * not be relinquished until the device is idle (see
-	 * i915_gem_idle_work_handler()). As a precaution, we make sure
-	 * that all ELSP are drained i.e. we have processed the CSB,
-	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
-	 */
-	GEM_BUG_ON(!engine->i915->gt.awake);
-
 	/*
 	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
 	 * imposing the cost of a locked atomic transaction when submitting a
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 012/262] drm/i915: After reset on sanitization, reset the engine backends
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (9 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 011/262] drm/i915/execlists: Double check rpm wakeref Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 013/262] drm/i915/execlists: Reset the CSB head tracking on reset/sanitization Chris Wilson
                   ` (44 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

As we reset the GPU on suspend/resume, we also do need to reset the
engine state tracking so call into the engine backends. This is
especially important so that we can also sanitize the state tracking
across resume.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a3885adec78a..fa09837d0569 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4911,11 +4911,15 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
-	if (i915_terminally_wedged(&i915->gpu_error)) {
-		mutex_lock(&i915->drm.struct_mutex);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+
+	if (i915_terminally_wedged(&i915->gpu_error))
 		i915_gem_unset_wedged(i915);
-		mutex_unlock(&i915->drm.struct_mutex);
-	}
 
 	/*
 	 * If we inherit context state from the BIOS or earlier occupants
@@ -4925,8 +4929,19 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * it may impact the display and we are uncertain about the stability
 	 * of the reset, so this could be applied to even earlier gen.
 	 */
-	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
-		WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
+	err = -ENODEV;
+	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
+		if (!WARN_ON(intel_gpu_reset(i915, ALL_ENGINES))) {
+			for_each_engine(engine, i915, id) {
+				if (engine->reset.reset)
+					engine->reset.reset(engine, NULL);
+			}
+			err = 0;
+		}
+	}
+
+	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 013/262] drm/i915/execlists: Reset the CSB head tracking on reset/sanitization
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (10 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 012/262] drm/i915: After reset on sanitization, reset the engine backends Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 014/262] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
                   ` (43 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

We can avoid the mmio read of the CSB pointers after reset based on the
knowledge that the HW always start writing at entry 0 in the CSB buffer.
We need to reset our CSB head tracking after GPU reset (and on
sanitization after resume) so that we are expecting to read from entry
0.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6d3b03299b0c..14149d0912fb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -981,22 +981,19 @@ static void process_csb(struct intel_engine_cs *engine)
 			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
 		unsigned int head, tail;
 
-		if (unlikely(execlists->csb_use_mmio)) {
-			buf = (u32 * __force)
-				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-			execlists->csb_head = -1; /* force mmio read of CSB */
-		}
-
 		/* Clear before reading to catch new interrupts */
 		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 		smp_mb__after_atomic();
 
-		if (unlikely(execlists->csb_head == -1)) { /* after a reset */
+		if (unlikely(execlists->csb_use_mmio)) {
 			if (!fw) {
 				intel_uncore_forcewake_get(i915, execlists->fw_domains);
 				fw = true;
 			}
 
+			buf = (u32 * __force)
+				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+
 			head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
 			tail = GEN8_CSB_WRITE_PTR(head);
 			head = GEN8_CSB_READ_PTR(head);
@@ -1790,9 +1787,6 @@ static void enable_execlists(struct intel_engine_cs *engine)
 	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
 		   engine->status_page.ggtt_offset);
 	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
-
-	/* Following the reset, we need to reload the CSB read/write pointers */
-	engine->execlists.csb_head = -1;
 }
 
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
@@ -1945,6 +1939,9 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	__unwind_incomplete_requests(engine);
 	spin_unlock(&engine->timeline.lock);
 
+	/* Following the reset, we need to reload the CSB read/write pointers */
+	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+
 	local_irq_restore(flags);
 
 	/*
@@ -2436,6 +2433,8 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 			upper_32_bits(ce->lrc_desc);
 	}
 
+	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+
 	return 0;
 
 error:
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 014/262] drm/i915/execlists: Pull submit after dequeue under timeline lock
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (11 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 013/262] drm/i915/execlists: Reset the CSB head tracking on reset/sanitization Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 015/262] drm/i915/execlists: Process one CSB interrupt at a time Chris Wilson
                   ` (42 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will begin processing the CSB from inside the
interrupt handler. This means that updating the execlists->port[] will
no longer be locked by the tasklet but by the engine->timeline.lock
instead. Pull dequeue and submit under the same lock for protection.
(An alternative, future, plan is to keep the in/out arrays separate for
concurrent processing and reduced lock coverage.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 14149d0912fb..6c050d553d4e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -572,7 +572,7 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
-static bool __execlists_dequeue(struct intel_engine_cs *engine)
+static void __execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -627,11 +627,11 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		 * the HW to indicate that it has had a chance to respond.
 		 */
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-			return false;
+			return;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
 			inject_preempt_context(engine);
-			return false;
+			return;
 		}
 
 		/*
@@ -656,7 +656,7 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		 * priorities of the ports haven't been switch.
 		 */
 		if (port_count(&port[1]))
-			return false;
+			return;
 
 		/*
 		 * WaIdleLiteRestore:bdw,skl
@@ -756,8 +756,10 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		port != execlists->port ? rq_prio(last) : INT_MIN;
 
 	execlists->first = rb;
-	if (submit)
+	if (submit) {
 		port_assign(port, last);
+		execlists_submit_ports(engine);
+	}
 
 	/* We must always keep the beast fed if we have work piled up */
 	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
@@ -766,24 +768,19 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 	if (last)
 		execlists_user_begin(execlists, execlists->port);
 
-	return submit;
+	/* If the engine is now idle, so should be the flag; and vice versa. */
+	GEM_BUG_ON(execlists_is_active(&engine->execlists,
+				       EXECLISTS_ACTIVE_USER) ==
+		   !port_isset(engine->execlists.port));
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	unsigned long flags;
-	bool submit;
 
 	spin_lock_irqsave(&engine->timeline.lock, flags);
-	submit = __execlists_dequeue(engine);
+	__execlists_dequeue(engine);
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-	if (submit)
-		execlists_submit_ports(engine);
-
-	GEM_BUG_ON(port_isset(execlists->port) &&
-		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
 }
 
 void
@@ -1162,11 +1159,6 @@ static void execlists_submission_tasklet(unsigned long data)
 
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
-
-	/* If the engine is now idle, so should be the flag; and vice versa. */
-	GEM_BUG_ON(execlists_is_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_USER) ==
-		   !port_isset(engine->execlists.port));
 }
 
 static void queue_request(struct intel_engine_cs *engine,
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 015/262] drm/i915/execlists: Process one CSB interrupt at a time
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (12 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 014/262] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 016/262] drm/i915/execlists: Unify CSB access pointers Chris Wilson
                   ` (41 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will process the CSB events directly from the CS
interrupt handler, being called for each interrupt. Hence, we will no
longer have the need for a loop until the has-interrupt bit is clear,
and in the meantime can remove that small optimisation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 274 +++++++++++++++----------------
 1 file changed, 135 insertions(+), 139 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6c050d553d4e..0c7108249244 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -964,6 +964,11 @@ static void process_csb(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
 	struct drm_i915_private *i915 = engine->i915;
+
+	/* The HWSP contains a (cacheable) mirror of the CSB */
+	const u32 *buf =
+		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+	unsigned int head, tail;
 	bool fw = false;
 
 	/*
@@ -972,164 +977,155 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	GEM_BUG_ON(!engine->i915->gt.awake);
 
-	do {
-		/* The HWSP contains a (cacheable) mirror of the CSB */
-		const u32 *buf =
-			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
-		unsigned int head, tail;
+	/* Clear before reading to catch new interrupts */
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+	smp_mb__after_atomic();
 
-		/* Clear before reading to catch new interrupts */
-		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		smp_mb__after_atomic();
+	if (unlikely(execlists->csb_use_mmio)) {
+		intel_uncore_forcewake_get(i915, execlists->fw_domains);
+		fw = true;
 
-		if (unlikely(execlists->csb_use_mmio)) {
-			if (!fw) {
-				intel_uncore_forcewake_get(i915, execlists->fw_domains);
-				fw = true;
-			}
+		buf = (u32 * __force)
+			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
 
-			buf = (u32 * __force)
-				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+		tail = GEN8_CSB_WRITE_PTR(head);
+		head = GEN8_CSB_READ_PTR(head);
+		execlists->csb_head = head;
+	} else {
+		const int write_idx =
+			intel_hws_csb_write_index(i915) -
+			I915_HWS_CSB_BUF0_INDEX;
 
-			head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-			tail = GEN8_CSB_WRITE_PTR(head);
-			head = GEN8_CSB_READ_PTR(head);
-			execlists->csb_head = head;
-		} else {
-			const int write_idx =
-				intel_hws_csb_write_index(i915) -
-				I915_HWS_CSB_BUF0_INDEX;
+		head = execlists->csb_head;
+		tail = READ_ONCE(buf[write_idx]);
+		rmb(); /* Hopefully paired with a wmb() in HW */
+	}
+	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
+		  engine->name,
+		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
 
-			head = execlists->csb_head;
-			tail = READ_ONCE(buf[write_idx]);
-			rmb(); /* Hopefully paired with a wmb() in HW */
-		}
-		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-			  engine->name,
-			  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-			  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+	while (head != tail) {
+		struct i915_request *rq;
+		unsigned int status;
+		unsigned int count;
 
-		while (head != tail) {
-			struct i915_request *rq;
-			unsigned int status;
-			unsigned int count;
+		if (++head == GEN8_CSB_ENTRIES)
+			head = 0;
 
-			if (++head == GEN8_CSB_ENTRIES)
-				head = 0;
+		/*
+		 * We are flying near dragons again.
+		 *
+		 * We hold a reference to the request in execlist_port[]
+		 * but no more than that. We are operating in softirq
+		 * context and so cannot hold any mutex or sleep. That
+		 * prevents us stopping the requests we are processing
+		 * in port[] from being retired simultaneously (the
+		 * breadcrumb will be complete before we see the
+		 * context-switch). As we only hold the reference to the
+		 * request, any pointer chasing underneath the request
+		 * is subject to a potential use-after-free. Thus we
+		 * store all of the bookkeeping within port[] as
+		 * required, and avoid using unguarded pointers beneath
+		 * request itself. The same applies to the atomic
+		 * status notifier.
+		 */
 
-			/*
-			 * We are flying near dragons again.
-			 *
-			 * We hold a reference to the request in execlist_port[]
-			 * but no more than that. We are operating in softirq
-			 * context and so cannot hold any mutex or sleep. That
-			 * prevents us stopping the requests we are processing
-			 * in port[] from being retired simultaneously (the
-			 * breadcrumb will be complete before we see the
-			 * context-switch). As we only hold the reference to the
-			 * request, any pointer chasing underneath the request
-			 * is subject to a potential use-after-free. Thus we
-			 * store all of the bookkeeping within port[] as
-			 * required, and avoid using unguarded pointers beneath
-			 * request itself. The same applies to the atomic
-			 * status notifier.
-			 */
+		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+			  engine->name, head,
+			  status, buf[2*head + 1],
+			  execlists->active);
+
+		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+			      GEN8_CTX_STATUS_PREEMPTED))
+			execlists_set_active(execlists,
+					     EXECLISTS_ACTIVE_HWACK);
+		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+			execlists_clear_active(execlists,
+					       EXECLISTS_ACTIVE_HWACK);
+
+		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+			continue;
 
-			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
-			GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
-				  engine->name, head,
-				  status, buf[2*head + 1],
-				  execlists->active);
-
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_HWACK);
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_HWACK);
-
-			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-				continue;
+		/* We should never get a COMPLETED | IDLE_ACTIVE! */
+		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			/* We should never get a COMPLETED | IDLE_ACTIVE! */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		if (status & GEN8_CTX_STATUS_COMPLETE &&
+		    buf[2*head + 1] == execlists->preempt_complete_status) {
+			GEM_TRACE("%s preempt-idle\n", engine->name);
+			complete_preempt_context(execlists);
+			continue;
+		}
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
-				GEM_TRACE("%s preempt-idle\n", engine->name);
-				complete_preempt_context(execlists);
-				continue;
-			}
+		if (status & GEN8_CTX_STATUS_PREEMPTED &&
+		    execlists_is_active(execlists,
+					EXECLISTS_ACTIVE_PREEMPT))
+			continue;
 
-			if (status & GEN8_CTX_STATUS_PREEMPTED &&
-			    execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_PREEMPT))
-				continue;
+		GEM_BUG_ON(!execlists_is_active(execlists,
+						EXECLISTS_ACTIVE_USER));
 
-			GEM_BUG_ON(!execlists_is_active(execlists,
-							EXECLISTS_ACTIVE_USER));
+		rq = port_unpack(port, &count);
+		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
+			  engine->name,
+			  port->context_id, count,
+			  rq ? rq->global_seqno : 0,
+			  rq ? rq->fence.context : 0,
+			  rq ? rq->fence.seqno : 0,
+			  intel_engine_get_seqno(engine),
+			  rq ? rq_prio(rq) : 0);
+
+		/* Check the context/desc id for this event matches */
+		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+		GEM_BUG_ON(count == 0);
+		if (--count == 0) {
+			/*
+			 * On the final event corresponding to the
+			 * submission of this context, we expect either
+			 * an element-switch event or a completion
+			 * event (and on completion, the active-idle
+			 * marker). No more preemptions, lite-restore
+			 * or otherwise.
+			 */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+			GEM_BUG_ON(port_isset(&port[1]) &&
+				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+			GEM_BUG_ON(!port_isset(&port[1]) &&
+				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 
-			rq = port_unpack(port, &count);
-			GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
-				  engine->name,
-				  port->context_id, count,
-				  rq ? rq->global_seqno : 0,
-				  rq ? rq->fence.context : 0,
-				  rq ? rq->fence.seqno : 0,
-				  intel_engine_get_seqno(engine),
-				  rq ? rq_prio(rq) : 0);
+			/*
+			 * We rely on the hardware being strongly
+			 * ordered, that the breadcrumb write is
+			 * coherent (visible from the CPU) before the
+			 * user interrupt and CSB is processed.
+			 */
+			GEM_BUG_ON(!i915_request_completed(rq));
 
-			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+			execlists_context_schedule_out(rq,
+						       INTEL_CONTEXT_SCHEDULE_OUT);
+			i915_request_put(rq);
 
-			GEM_BUG_ON(count == 0);
-			if (--count == 0) {
-				/*
-				 * On the final event corresponding to the
-				 * submission of this context, we expect either
-				 * an element-switch event or a completion
-				 * event (and on completion, the active-idle
-				 * marker). No more preemptions, lite-restore
-				 * or otherwise.
-				 */
-				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-				GEM_BUG_ON(port_isset(&port[1]) &&
-					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-				GEM_BUG_ON(!port_isset(&port[1]) &&
-					   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+			GEM_TRACE("%s completed ctx=%d\n",
+				  engine->name, port->context_id);
 
-				/*
-				 * We rely on the hardware being strongly
-				 * ordered, that the breadcrumb write is
-				 * coherent (visible from the CPU) before the
-				 * user interrupt and CSB is processed.
-				 */
-				GEM_BUG_ON(!i915_request_completed(rq));
-
-				execlists_context_schedule_out(rq,
-							       INTEL_CONTEXT_SCHEDULE_OUT);
-				i915_request_put(rq);
-
-				GEM_TRACE("%s completed ctx=%d\n",
-					  engine->name, port->context_id);
-
-				port = execlists_port_complete(execlists, port);
-				if (port_isset(port))
-					execlists_user_begin(execlists, port);
-				else
-					execlists_user_end(execlists);
-			} else {
-				port_set(port, port_pack(rq, count));
-			}
+			port = execlists_port_complete(execlists, port);
+			if (port_isset(port))
+				execlists_user_begin(execlists, port);
+			else
+				execlists_user_end(execlists);
+		} else {
+			port_set(port, port_pack(rq, count));
 		}
+	}
 
-		if (head != execlists->csb_head) {
-			execlists->csb_head = head;
-			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-			       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-		}
-	} while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+	if (head != execlists->csb_head) {
+		execlists->csb_head = head;
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+	}
 
 	if (unlikely(fw))
 		intel_uncore_forcewake_put(i915, execlists->fw_domains);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 016/262] drm/i915/execlists: Unify CSB access pointers
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (13 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 015/262] drm/i915/execlists: Process one CSB interrupt at a time Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 017/262] drm/i915/execlists: Process the CSB directly from inside the irq handler Chris Wilson
                   ` (40 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Following the removal of the last workarounds, the only CSB mmio access
is for the old vGPU interface. The mmio registers presented by vGPU do
not require forcewake and can be treated as ordinary volatile memory,
i.e. they behave just like the HWSP access just at a different location.
We can reduce the CSB access inside the irq handler to a set of
read/write/buffer pointers and treat the various paths identically and
not worry about forcewake.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
 drivers/gpu/drm/i915/intel_lrc.c        | 110 ++++++++++--------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
 3 files changed, 62 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index b1a1ca0758ce..ad6508e991f3 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -25,7 +25,6 @@
 #include <drm/drm_print.h>
 
 #include "i915_drv.h"
-#include "i915_vgpu.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
@@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 	i915_gem_batch_pool_init(&engine->batch_pool, engine);
 }
 
-static bool csb_force_mmio(struct drm_i915_private *i915)
-{
-	/* Older GVT emulation depends upon intercepting CSB mmio */
-	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
-		return true;
-
-	return false;
-}
-
 static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
-	execlists->csb_use_mmio = csb_force_mmio(engine->i915);
-
 	execlists->port_mask = 1;
 	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0c7108249244..6701778c7dc1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_workarounds.h"
@@ -963,13 +964,8 @@ static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
-	struct drm_i915_private *i915 = engine->i915;
-
-	/* The HWSP contains a (cacheable) mirror of the CSB */
-	const u32 *buf =
-		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+	const u32 * const buf = execlists->csb_status;
 	unsigned int head, tail;
-	bool fw = false;
 
 	/*
 	 * We must never release our device wakeref until after we have
@@ -981,32 +977,16 @@ static void process_csb(struct intel_engine_cs *engine)
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 	smp_mb__after_atomic();
 
-	if (unlikely(execlists->csb_use_mmio)) {
-		intel_uncore_forcewake_get(i915, execlists->fw_domains);
-		fw = true;
-
-		buf = (u32 * __force)
-			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-
-		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-		tail = GEN8_CSB_WRITE_PTR(head);
-		head = GEN8_CSB_READ_PTR(head);
-		execlists->csb_head = head;
-	} else {
-		const int write_idx =
-			intel_hws_csb_write_index(i915) -
-			I915_HWS_CSB_BUF0_INDEX;
+	/* Note that csb_write, csb_status may be either in HWSP or mmio */
+	head = execlists->csb_head;
+	tail = READ_ONCE(*execlists->csb_write);
+	GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+	if (unlikely(head == tail))
+		return;
 
-		head = execlists->csb_head;
-		tail = READ_ONCE(buf[write_idx]);
-		rmb(); /* Hopefully paired with a wmb() in HW */
-	}
-	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-		  engine->name,
-		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+	rmb(); /* Hopefully paired with a wmb() in HW */
 
-	while (head != tail) {
+	do {
 		struct i915_request *rq;
 		unsigned int status;
 		unsigned int count;
@@ -1119,16 +1099,11 @@ static void process_csb(struct intel_engine_cs *engine)
 		} else {
 			port_set(port, port_pack(rq, count));
 		}
-	}
-
-	if (head != execlists->csb_head) {
-		execlists->csb_head = head;
-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-	}
+	} while (head != tail);
 
-	if (unlikely(fw))
-		intel_uncore_forcewake_put(i915, execlists->fw_domains);
+	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+	       execlists->csb_read);
+	execlists->csb_head = head;
 }
 
 /*
@@ -2365,28 +2340,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 static void
 logical_ring_setup(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	enum forcewake_domains fw_domains;
-
 	intel_engine_setup_common(engine);
 
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
 
-	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
-						    RING_ELSP(engine),
-						    FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_PTR(engine),
-						     FW_REG_READ | FW_REG_WRITE);
-
-	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
-						     RING_CONTEXT_STATUS_BUF_BASE(engine),
-						     FW_REG_READ);
-
-	engine->execlists.fw_domains = fw_domains;
-
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
 
@@ -2394,34 +2352,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	logical_ring_default_irqs(engine);
 }
 
+static bool csb_force_mmio(struct drm_i915_private *i915)
+{
+	/* Older GVT emulation depends upon intercepting CSB mmio */
+	return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
+}
+
 static int logical_ring_init(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
 	int ret;
 
 	ret = intel_engine_init_common(engine);
 	if (ret)
 		goto error;
 
-	if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
-		engine->execlists.submit_reg = engine->i915->regs +
+	if (HAS_LOGICAL_RING_ELSQ(i915)) {
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
-		engine->execlists.ctrl_reg = engine->i915->regs +
+		execlists->ctrl_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
 	} else {
-		engine->execlists.submit_reg = engine->i915->regs +
+		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_ELSP(engine));
 	}
 
-	engine->execlists.preempt_complete_status = ~0u;
-	if (engine->i915->preempt_context) {
+	execlists->preempt_complete_status = ~0u;
+	if (i915->preempt_context) {
 		struct intel_context *ce =
-			to_intel_context(engine->i915->preempt_context, engine);
+			to_intel_context(i915->preempt_context, engine);
 
-		engine->execlists.preempt_complete_status =
+		execlists->preempt_complete_status =
 			upper_32_bits(ce->lrc_desc);
 	}
 
-	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
+	execlists->csb_read =
+		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+	if (csb_force_mmio(i915)) {
+		execlists->csb_status =
+			i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+
+		execlists->csb_write = execlists->csb_read;
+	} else {
+		execlists->csb_status =
+			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+
+		execlists->csb_write =
+			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4ad9c5842575..0a96088f522f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -299,19 +299,30 @@ struct intel_engine_execlists {
 	struct rb_node *first;
 
 	/**
-	 * @fw_domains: forcewake domains for irq tasklet
+	 * @csb_head: context status buffer head
 	 */
-	unsigned int fw_domains;
+	unsigned int csb_head;
 
 	/**
-	 * @csb_head: context status buffer head
+	 * @csb_read: control register for Context Switch buffer
+	 *
+	 * Note this register is always in mmio.
 	 */
-	unsigned int csb_head;
+	u32 *csb_read;
 
 	/**
-	 * @csb_use_mmio: access csb through mmio, instead of hwsp
+	 * @csb_write: control register for Context Switch buffer
+	 *
+	 * Note this register may be either mmio or HWSP shadow.
+	 */
+	u32 *csb_write;
+
+	/**
+	 * @csb_status: status array for Context Switch buffer
+	 *
+	 * Note these register may be either mmio or HWSP shadow.
 	 */
-	bool csb_use_mmio;
+	u32 *csb_status;
 
 	/**
 	 * @preempt_complete_status: expected CSB upon completing preemption
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 017/262] drm/i915/execlists: Process the CSB directly from inside the irq handler
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (14 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 016/262] drm/i915/execlists: Unify CSB access pointers Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 018/262] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd) Chris Wilson
                   ` (39 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

As we now only use the cached HWSP access to read the CSB buffer and no
longer use any forcewaked mmio, processing the CSB is fast and possible
to do so directly from inside the CS interrupt handler.

We have to rearrange the irq handler slightly as we wish to preserve the
single threaded access to the event buffer and ELSP, so we need to
perform the processing with the master interrupt still disabled to avoid
concurrent CS interrupts on other CPUs. We also have to ensure that no
one else is updating the execlist->port[] simultaneously (i.e.
execlists_dequeue) and so encompass ourselves inside the
engine->timeline.lock (although we may lift this restriction in the
future by separating the two phases to operate on distinct port[]), and
similarly must flush the interrupt handler and disable further processing
during a reset.

References: https://bugs.freedesktop.org/show_bug.cgi?id=106373
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.h         |  5 +++
 drivers/gpu/drm/i915/i915_irq.c         | 14 +++-----
 drivers/gpu/drm/i915/intel_engine_cs.c  |  8 ++---
 drivers/gpu/drm/i915/intel_lrc.c        | 45 +++++++++++--------------
 drivers/gpu/drm/i915/intel_lrc.h        |  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 6 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 5bf24cfc218c..5b098acec566 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -79,4 +79,9 @@ static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
 		tasklet_unlock_wait(t);
 }
 
+static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
+{
+	return likely(!atomic_read(&t->count));
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 460878572515..3f139ff64385 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1462,13 +1462,11 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	bool tasklet = false;
 
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
-		if (READ_ONCE(engine->execlists.active))
-			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
-						    &engine->irq_posted);
+		intel_engine_handle_execlists_irq(engine);
+		tasklet = true;
 	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
@@ -1479,7 +1477,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 
 	if (tasklet)
-		tasklet_hi_schedule(&execlists->tasklet);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
@@ -2165,6 +2163,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		I915_WRITE(VLV_IER, 0);
 
 		gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
+		gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
 
 		if (iir & I915_DISPLAY_PORT_INTERRUPT)
 			hotplug_status = i9xx_hpd_irq_ack(dev_priv);
@@ -2189,8 +2188,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
 		POSTING_READ(GEN8_MASTER_IRQ);
 
-		gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
-
 		if (hotplug_status)
 			i9xx_hpd_irq_handler(dev_priv, hotplug_status);
 
@@ -2761,6 +2758,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 
 	/* Find, clear, then process each source of interrupt */
 	gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
+	gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	if (master_ctl & ~GEN8_GT_IRQS) {
@@ -2771,8 +2769,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 
 	I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
 
-	gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
-
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ad6508e991f3..7165a3d21443 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1297,12 +1297,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		read = GEN8_CSB_READ_PTR(ptr);
 		write = GEN8_CSB_WRITE_PTR(ptr);
-		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
+		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
 			   read, execlists->csb_head,
 			   write,
 			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
-			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
-					  &engine->irq_posted)),
 			   yesno(test_bit(TASKLET_STATE_SCHED,
 					  &engine->execlists.tasklet.state)),
 			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
@@ -1483,11 +1481,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock(&b->rb_lock);
 	local_irq_restore(flags);
 
-	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
+	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
 		   engine->irq_posted,
 		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-				  &engine->irq_posted)),
-		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
 				  &engine->irq_posted)));
 
 	drm_printf(m, "HWSP:\n");
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6701778c7dc1..954eb3a71051 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -567,8 +567,10 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 {
 	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
 
+	__unwind_incomplete_requests(container_of(execlists,
+						  typeof(struct intel_engine_cs),
+						  execlists));
 	execlists_cancel_port_requests(execlists);
-	execlists_unwind_incomplete_requests(execlists);
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
@@ -880,14 +882,6 @@ static void reset_irq(struct intel_engine_cs *engine)
 	synchronize_hardirq(engine->i915->drm.irq);
 
 	clear_gtiir(engine);
-
-	/*
-	 * The port is checked prior to scheduling a tasklet, but
-	 * just in case we have suspended the tasklet to do the
-	 * wedging make sure that when it wakes, it decides there
-	 * is no work to do by clearing the irq_posted bit.
-	 */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -960,7 +954,13 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	local_irq_restore(flags);
 }
 
-static void process_csb(struct intel_engine_cs *engine)
+static inline bool
+reset_in_progress(const struct intel_engine_execlists *execlists)
+{
+	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
+}
+
+void intel_engine_handle_execlists_irq(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -973,9 +973,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	GEM_BUG_ON(!engine->i915->gt.awake);
 
-	/* Clear before reading to catch new interrupts */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	smp_mb__after_atomic();
+	if (reset_in_progress(execlists))
+		return;
 
 	/* Note that csb_write, csb_status may be either in HWSP or mmio */
 	head = execlists->csb_head;
@@ -986,6 +985,8 @@ static void process_csb(struct intel_engine_cs *engine)
 
 	rmb(); /* Hopefully paired with a wmb() in HW */
 
+	spin_lock(&engine->timeline.lock);
+
 	do {
 		struct i915_request *rq;
 		unsigned int status;
@@ -1104,6 +1105,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
 	       execlists->csb_read);
 	execlists->csb_head = head;
+
+	spin_unlock(&engine->timeline.lock);
 }
 
 /*
@@ -1114,19 +1117,10 @@ static void execlists_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 
-	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
+	GEM_TRACE("%s awake?=%d, active=%x\n",
 		  engine->name,
 		  engine->i915->gt.awake,
-		  engine->execlists.active,
-		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
-
-	/*
-	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
-	 * imposing the cost of a locked atomic transaction when submitting a
-	 * new request (outside of the context-switch interrupt).
-	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
+		  engine->execlists.active);
 
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
@@ -1836,8 +1830,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * and avoid blaming an innocent request if the stall was due to the
 	 * preemption itself.
 	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
+	synchronize_hardirq(engine->i915->drm.irq);
 
 	/*
 	 * The last active request can then be no later than the last request
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194e930c..772ca3993e51 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -102,6 +102,7 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
 struct drm_i915_private;
 struct i915_gem_context;
 
+void intel_engine_handle_execlists_irq(struct intel_engine_cs *engine);
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0a96088f522f..42a136810e15 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -358,7 +358,6 @@ struct intel_engine_cs {
 	atomic_t irq_count;
 	unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
-#define ENGINE_IRQ_EXECLIST 1
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 018/262] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd)
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (15 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 017/262] drm/i915/execlists: Process the CSB directly from inside the irq handler Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 019/262] drm/i915: Combine gt irq ack/handlers Chris Wilson
                   ` (38 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a
bottom half"), we came to the conclusion that running our CSB processing
and ELSP submission from inside the irq handler was a bad idea. A really
bad idea as we could impose nearly 1s latency on other users of the
system, on average! Deferring our work to a tasklet allowed us to do the
processing with irqs enabled, reducing the impact to an average of about
50us.

We have since eradicated the use of forcewaked mmio from inside the CSB
processing and ELSP submission, bringing the impact down to around 5us
(on Kabylake); an order of magnitude better than our measurements 2
years ago on Broadwell and only about 2x worse on average than the
gem_syslatency on an unladen system.

Comparing the impact on the maximum latency observed over a 120s interval,
repeated several times (using gem_syslatency, similar to RT's cyclictest)
while the system is fully laden with i915 nops, we see that direct
submission definitely worsens the response but not to the same outlandish
degree as before.

x Unladen baseline
+ Using tasklet
* Direct submission

+------------------------------------------------------------------------+
|xx x          ++    +++ +                           *  * *   ** *** *  *|
||A|              |__AM__|                               |_____A_M___|   |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  10             5            18            10           9.3     3.6530049
+  10            72           120           108         102.9     15.758243
*  10           255           348           316         305.7      28.74814

And with a background load

+------------------------------------------------------------------------+
|x                          +           *              *                 |
|x                    +     + + + +  + +* * ** ++      * *   *          *|
|A                        |_______A_____|__|_______A___M______|          |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  10             4            11             9           8.5     2.1730675
+  10           633          1388           972           993     243.33744
*  10          1152          2109          1608        1488.3     314.80719

References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c  | 11 ++------
 drivers/gpu/drm/i915/intel_lrc.c | 44 +++++++++++++++++---------------
 2 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3f139ff64385..8b61ebf5cb4a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1462,22 +1462,15 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
-	bool tasklet = false;
-
-	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
 		intel_engine_handle_execlists_irq(engine);
-		tasklet = true;
-	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
 		if (intel_engine_uses_guc(engine))
-			tasklet = true;
+			tasklet_hi_schedule(&engine->execlists.tasklet);
 
 		notify_ring(engine);
 	}
-
-	if (tasklet)
-		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 954eb3a71051..37839d89e03a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -575,7 +575,7 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
-static void __execlists_dequeue(struct intel_engine_cs *engine)
+static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -587,7 +587,11 @@ static void __execlists_dequeue(struct intel_engine_cs *engine)
 
 	lockdep_assert_held(&engine->timeline.lock);
 
-	/* Hardware submission is through 2 ports. Conceptually each port
+	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+		return;
+
+	/*
+	 * Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
 	 * static for a context, and unique to each, so we only execute
 	 * requests belonging to a single context from each ring. RING_HEAD
@@ -777,15 +781,6 @@ static void __execlists_dequeue(struct intel_engine_cs *engine)
 		   !port_isset(engine->execlists.port));
 }
 
-static void execlists_dequeue(struct intel_engine_cs *engine)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	__execlists_dequeue(engine);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
@@ -1106,6 +1101,7 @@ void intel_engine_handle_execlists_irq(struct intel_engine_cs *engine)
 	       execlists->csb_read);
 	execlists->csb_head = head;
 
+	execlists_dequeue(engine);
 	spin_unlock(&engine->timeline.lock);
 }
 
@@ -1122,8 +1118,9 @@ static void execlists_submission_tasklet(unsigned long data)
 		  engine->i915->gt.awake,
 		  engine->execlists.active);
 
-	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
-		execlists_dequeue(engine);
+	spin_lock_irq(&engine->timeline.lock);
+	execlists_dequeue(engine);
+	spin_unlock_irq(&engine->timeline.lock);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -1134,16 +1131,20 @@ static void queue_request(struct intel_engine_cs *engine,
 		      &lookup_priolist(engine, prio)->requests);
 }
 
-static void __submit_queue(struct intel_engine_cs *engine, int prio)
+static void __update_queue(struct intel_engine_cs *engine, int prio)
 {
 	engine->execlists.queue_priority = prio;
-	tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
-	if (prio > engine->execlists.queue_priority)
-		__submit_queue(engine, prio);
+	if (prio > engine->execlists.queue_priority) {
+		__update_queue(engine, prio);
+		if (!intel_engine_uses_guc(engine))
+			execlists_dequeue(engine);
+		else
+			tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1155,11 +1156,12 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	queue_request(engine, &request->sched, rq_prio(request));
-	submit_queue(engine, rq_prio(request));
 
 	GEM_BUG_ON(!engine->execlists.first);
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
+	submit_queue(engine, rq_prio(request));
+
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
@@ -1286,8 +1288,10 @@ static void execlists_schedule(struct i915_request *request,
 		}
 
 		if (prio > engine->execlists.queue_priority &&
-		    i915_sw_fence_done(&sched_to_request(node)->submit))
-			__submit_queue(engine, prio);
+		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
+			__update_queue(engine, prio);
+			tasklet_hi_schedule(&engine->execlists.tasklet);
+		}
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 019/262] drm/i915: Combine gt irq ack/handlers
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (16 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 018/262] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd) Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 020/262] drm/i915/execlists: Force preemption via reset on timeout Chris Wilson
                   ` (37 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Having abandoned the split approach of acking then handling the GT irqs
(sacrificed to use the interrupt handler to guaranteed exclusive access
to the irq data), pull the two routines into one to let the compiler
eliminate the redundant storage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 87 ++++++++++++++-------------------
 1 file changed, 37 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8b61ebf5cb4a..e6c4007ecfd4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1473,10 +1473,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 }
 
-static void gen8_gt_irq_ack(struct drm_i915_private *i915,
-			    u32 master_ctl, u32 gt_iir[4])
+static void gen8_gt_irq_handler(struct drm_i915_private *i915, u32 master_ctl)
 {
 	void __iomem * const regs = i915->regs;
+	u32 iir;
 
 #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
 		      GEN8_GT_BCS_IRQ | \
@@ -1487,58 +1487,49 @@ static void gen8_gt_irq_ack(struct drm_i915_private *i915,
 		      GEN8_GT_GUC_IRQ)
 
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0));
-		if (likely(gt_iir[0]))
-			raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]);
+		iir = raw_reg_read(regs, GEN8_GT_IIR(0));
+		if (likely(iir)) {
+			raw_reg_write(regs, GEN8_GT_IIR(0), iir);
+
+			gen8_cs_irq_handler(i915->engine[RCS],
+					    iir >> GEN8_RCS_IRQ_SHIFT);
+			gen8_cs_irq_handler(i915->engine[BCS],
+					    iir >> GEN8_BCS_IRQ_SHIFT);
+		}
 	}
 
 	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
-		gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1));
-		if (likely(gt_iir[1]))
-			raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]);
-	}
-
-	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
-		gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2));
-		if (likely(gt_iir[2] & (i915->pm_rps_events |
-					i915->pm_guc_events)))
-			raw_reg_write(regs, GEN8_GT_IIR(2),
-				      gt_iir[2] & (i915->pm_rps_events |
-						   i915->pm_guc_events));
+		iir = raw_reg_read(regs, GEN8_GT_IIR(1));
+		if (likely(iir)) {
+			raw_reg_write(regs, GEN8_GT_IIR(1), iir);
+
+			gen8_cs_irq_handler(i915->engine[VCS],
+					    iir >> GEN8_VCS1_IRQ_SHIFT);
+			gen8_cs_irq_handler(i915->engine[VCS2],
+					    iir >> GEN8_VCS2_IRQ_SHIFT);
+		}
 	}
 
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3));
-		if (likely(gt_iir[3]))
-			raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]);
-	}
-}
+		iir = raw_reg_read(regs, GEN8_GT_IIR(3));
+		if (likely(iir)) {
+			raw_reg_write(regs, GEN8_GT_IIR(3), iir);
 
-static void gen8_gt_irq_handler(struct drm_i915_private *i915,
-				u32 master_ctl, u32 gt_iir[4])
-{
-	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		gen8_cs_irq_handler(i915->engine[RCS],
-				    gt_iir[0] >> GEN8_RCS_IRQ_SHIFT);
-		gen8_cs_irq_handler(i915->engine[BCS],
-				    gt_iir[0] >> GEN8_BCS_IRQ_SHIFT);
-	}
-
-	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
-		gen8_cs_irq_handler(i915->engine[VCS],
-				    gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT);
-		gen8_cs_irq_handler(i915->engine[VCS2],
-				    gt_iir[1] >> GEN8_VCS2_IRQ_SHIFT);
-	}
-
-	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		gen8_cs_irq_handler(i915->engine[VECS],
-				    gt_iir[3] >> GEN8_VECS_IRQ_SHIFT);
+			gen8_cs_irq_handler(i915->engine[VECS],
+					    iir >> GEN8_VECS_IRQ_SHIFT);
+		}
 	}
 
 	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
-		gen6_rps_irq_handler(i915, gt_iir[2]);
-		gen9_guc_irq_handler(i915, gt_iir[2]);
+		iir = raw_reg_read(regs, GEN8_GT_IIR(2));
+		if (likely(iir & (i915->pm_rps_events | i915->pm_guc_events))) {
+			raw_reg_write(regs, GEN8_GT_IIR(2),
+				      iir & (i915->pm_rps_events |
+					     i915->pm_guc_events));
+
+			gen6_rps_irq_handler(i915, iir);
+			gen9_guc_irq_handler(i915, iir);
+		}
 	}
 }
 
@@ -2127,7 +2118,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		u32 master_ctl, iir;
 		u32 pipe_stats[I915_MAX_PIPES] = {};
 		u32 hotplug_status = 0;
-		u32 gt_iir[4];
 		u32 ier = 0;
 
 		master_ctl = I915_READ(GEN8_MASTER_IRQ) & ~GEN8_MASTER_IRQ_CONTROL;
@@ -2155,8 +2145,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		ier = I915_READ(VLV_IER);
 		I915_WRITE(VLV_IER, 0);
 
-		gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
-		gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
+		gen8_gt_irq_handler(dev_priv, master_ctl);
 
 		if (iir & I915_DISPLAY_PORT_INTERRUPT)
 			hotplug_status = i9xx_hpd_irq_ack(dev_priv);
@@ -2737,7 +2726,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 {
 	struct drm_i915_private *dev_priv = to_i915(arg);
 	u32 master_ctl;
-	u32 gt_iir[4];
 
 	if (!intel_irqs_enabled(dev_priv))
 		return IRQ_NONE;
@@ -2750,8 +2738,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 	I915_WRITE_FW(GEN8_MASTER_IRQ, 0);
 
 	/* Find, clear, then process each source of interrupt */
-	gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
-	gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
+	gen8_gt_irq_handler(dev_priv, master_ctl);
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	if (master_ctl & ~GEN8_GT_IRQS) {
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 020/262] drm/i915/execlists: Force preemption via reset on timeout
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (17 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 019/262] drm/i915: Combine gt irq ack/handlers Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 021/262] drm/i915/execlists: Try preempt-reset from hardirq timer context Chris Wilson
                   ` (36 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Install a timer when trying to preempt on behalf of an important
context such that if the active context does not honour the preemption
request within the desired timeout, then we reset the GPU to allow the
important context to run.

v2: Install the timer on scheduling the preempt request; long before we
even try to inject preemption into the ELSP, as the tasklet/injection
may itself be blocked.
v3: Update the guc to handle the preemption/tasklet timer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c      |  4 +
 drivers/gpu/drm/i915/intel_guc_submission.c |  1 +
 drivers/gpu/drm/i915/intel_lrc.c            | 87 +++++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h     |  8 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c  | 65 +++++++++++++++
 5 files changed, 157 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 7165a3d21443..3ec79bd06b83 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -466,6 +466,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	execlists->queue_priority = INT_MIN;
 	execlists->queue = RB_ROOT;
 	execlists->first = NULL;
+
+	hrtimer_init(&execlists->preempt_timer,
+		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 }
 
 /**
@@ -1047,6 +1050,7 @@ void intel_engines_park(struct drm_i915_private *i915)
 
 	for_each_engine(engine, i915, id) {
 		/* Flush the residual irq tasklets first. */
+		hrtimer_cancel(&engine->execlists.preempt_timer);
 		intel_engine_disarm_breadcrumbs(engine);
 		tasklet_kill(&engine->execlists.tasklet);
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index d9fcd5db4ea4..48c4ae2be8f1 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -751,6 +751,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
 	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
 	execlists->first = rb;
 	if (submit)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 37839d89e03a..9b015ec236a3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -563,6 +563,52 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
+{
+	struct intel_engine_execlists *execlists =
+		container_of(hrtimer, typeof(*execlists), preempt_timer);
+
+	GEM_TRACE("%s active=%x\n",
+		  container_of(execlists,
+			       struct intel_engine_cs,
+			       execlists)->name,
+		  execlists->active);
+
+	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT))
+		return HRTIMER_NORESTART;
+
+	if (GEM_SHOW_DEBUG()) {
+		struct intel_engine_cs *engine =
+			container_of(execlists, typeof(*engine), execlists);
+		struct drm_printer p = drm_debug_printer(__func__);
+
+		intel_engine_dump(engine, &p, "%s\n", engine->name);
+	}
+
+	queue_work(system_highpri_wq, &execlists->preempt_reset);
+
+	return HRTIMER_NORESTART;
+}
+
+static void preempt_reset(struct work_struct *work)
+{
+	struct intel_engine_execlists *execlists =
+		container_of(work, typeof(*execlists), preempt_reset);
+	struct intel_engine_cs *engine =
+		  container_of(execlists, struct intel_engine_cs, execlists);
+
+	GEM_TRACE("%s\n", engine->name);
+
+	tasklet_disable(&execlists->tasklet);
+
+	execlists->tasklet.func(execlists->tasklet.data);
+	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT))
+		i915_handle_error(engine->i915, BIT(engine->id), 0,
+				  "preemption time out on %s", engine->name);
+
+	tasklet_enable(&execlists->tasklet);
+}
+
 static void complete_preempt_context(struct intel_engine_execlists *execlists)
 {
 	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
@@ -663,7 +709,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * priorities of the ports haven't been switch.
 		 */
 		if (port_count(&port[1]))
-			return;
+			goto clear_preempt_timeout;
 
 		/*
 		 * WaIdleLiteRestore:bdw,skl
@@ -771,6 +817,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	/* We must always keep the beast fed if we have work piled up */
 	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
 
+clear_preempt_timeout:
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
+
 	/* Re-evaluate the executing context setup after each preemptive kick */
 	if (last)
 		execlists_user_begin(execlists, execlists->port);
@@ -1131,15 +1180,38 @@ static void queue_request(struct intel_engine_cs *engine,
 		      &lookup_priolist(engine, prio)->requests);
 }
 
-static void __update_queue(struct intel_engine_cs *engine, int prio)
+static void __update_queue(struct intel_engine_cs *engine,
+			   int prio, unsigned int timeout)
 {
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+
+	GEM_TRACE("%s prio=%d (previous=%d)\n",
+		  engine->name, prio, execlists->queue_priority);
+
+	if (unlikely(execlists_is_active(execlists,
+					 EXECLISTS_ACTIVE_PREEMPT_TIMEOUT)))
+		hrtimer_cancel(&execlists->preempt_timer);
+
+	/* Set a timer to force preemption vs hostile userspace */
+	if (timeout &&
+	    __execlists_need_preempt(prio, execlists->queue_priority)) {
+		GEM_TRACE("%s preempt timeout=%uns\n", engine->name, timeout);
+
+		execlists_set_active(execlists,
+				     EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
+		hrtimer_start(&execlists->preempt_timer,
+			      ns_to_ktime(timeout),
+			      HRTIMER_MODE_REL);
+	}
+
 	engine->execlists.queue_priority = prio;
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static void submit_queue(struct intel_engine_cs *engine,
+			 int prio, unsigned int timeout)
 {
 	if (prio > engine->execlists.queue_priority) {
-		__update_queue(engine, prio);
+		__update_queue(engine, prio, timeout);
 		if (!intel_engine_uses_guc(engine))
 			execlists_dequeue(engine);
 		else
@@ -1160,7 +1232,7 @@ static void execlists_submit_request(struct i915_request *request)
 	GEM_BUG_ON(!engine->execlists.first);
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
-	submit_queue(engine, rq_prio(request));
+	submit_queue(engine, rq_prio(request), 0);
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
@@ -1289,7 +1361,7 @@ static void execlists_schedule(struct i915_request *request,
 
 		if (prio > engine->execlists.queue_priority &&
 		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
-			__update_queue(engine, prio);
+			__update_queue(engine, prio, 0);
 			tasklet_hi_schedule(&engine->execlists.tasklet);
 		}
 	}
@@ -2345,6 +2417,9 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
 
+	INIT_WORK(&engine->execlists.preempt_reset, preempt_reset);
+	engine->execlists.preempt_timer.function = preempt_timeout;
+
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 42a136810e15..495cc2628fa9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -270,8 +270,9 @@ struct intel_engine_execlists {
 	 */
 	unsigned int active;
 #define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
+#define EXECLISTS_ACTIVE_HWACK 1
+#define EXECLISTS_ACTIVE_PREEMPT 2
+#define EXECLISTS_ACTIVE_PREEMPT_TIMEOUT 3
 
 	/**
 	 * @port_mask: number of execlist ports - 1
@@ -328,6 +329,9 @@ struct intel_engine_execlists {
 	 * @preempt_complete_status: expected CSB upon completing preemption
 	 */
 	u32 preempt_complete_status;
+
+	struct hrtimer preempt_timer;
+	struct work_struct preempt_reset;
 };
 
 #define INTEL_ENGINE_CS_MAX_NAME 8
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 68cb9126b3e1..7409c582b478 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -444,12 +444,77 @@ static int live_late_preempt(void *arg)
 	goto err_ctx_lo;
 }
 
+static void mark_preemption_hang(struct intel_engine_execlists *execlists)
+{
+	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
+}
+
+static int live_preempt_timeout(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id id;
+	struct spinner spin;
+	int err = -ENOMEM;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	if (spinner_init(&spin, i915))
+		goto err_unlock;
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		goto err_spin;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = spinner_create_request(&spin, ctx, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_ctx;
+		}
+
+		i915_request_add(rq);
+		if (!wait_for_spinner(&spin, rq)) {
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto err_ctx;
+		}
+
+		GEM_TRACE("%s triggering reset\n", engine->name);
+		mark_preemption_hang(&engine->execlists);
+		preempt_reset(&engine->execlists.preempt_reset);
+
+		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			err = -EIO;
+			goto err_ctx;
+		}
+	}
+
+	err = 0;
+err_ctx:
+	kernel_context_close(ctx);
+err_spin:
+	spinner_fini(&spin);
+err_unlock:
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_sanitycheck),
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
+		SUBTEST(live_preempt_timeout),
 	};
 
 	if (!HAS_EXECLISTS(i915))
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 021/262] drm/i915/execlists: Try preempt-reset from hardirq timer context
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (18 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 020/262] drm/i915/execlists: Force preemption via reset on timeout Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 022/262] drm/i915/preemption: Select timeout when scheduling Chris Wilson
                   ` (35 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

When circumstances allow, trying resetting the engine directly from the
preemption timeout handler. As this is softirq context, we have to be
careful both not to sleep and not to spin on anything we may be
interrupting (e.g. the submission tasklet).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c           |  34 ++++++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 111 +++++++++++++++++++++
 2 files changed, 144 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9b015ec236a3..b6f369d06bb8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -563,6 +563,37 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static int try_preempt_reset(struct intel_engine_execlists *execlists)
+{
+	struct tasklet_struct * const t = &execlists->tasklet;
+	int err = -EBUSY;
+
+	if (tasklet_trylock(t)) {
+		struct intel_engine_cs *engine =
+			container_of(execlists, typeof(*engine), execlists);
+		const unsigned int bit = I915_RESET_ENGINE + engine->id;
+		unsigned long *lock = &engine->i915->gpu_error.flags;
+
+		t->func(t->data);
+		if (!execlists_is_active(execlists,
+					 EXECLISTS_ACTIVE_PREEMPT_TIMEOUT)) {
+			/* Nothing to do; the tasklet was just delayed. */
+			err = 0;
+		} else if (!test_and_set_bit(bit, lock)) {
+			tasklet_disable_nosync(t);
+			err = i915_reset_engine(engine, "preemption time out");
+			tasklet_enable(t);
+
+			clear_bit(bit, lock);
+			wake_up_bit(lock, bit);
+		}
+
+		tasklet_unlock(t);
+	}
+
+	return err;
+}
+
 static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
 {
 	struct intel_engine_execlists *execlists =
@@ -585,7 +616,8 @@ static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
 		intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	queue_work(system_highpri_wq, &execlists->preempt_reset);
+	if (try_preempt_reset(execlists))
+		queue_work(system_highpri_wq, &execlists->preempt_reset);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 7409c582b478..40c1f6951444 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -508,6 +508,116 @@ static int live_preempt_timeout(void *arg)
 	return err;
 }
 
+static void __softirq_begin(void)
+{
+	local_bh_disable();
+}
+
+static void __softirq_end(void)
+{
+	local_bh_enable();
+}
+
+static void __hardirq_begin(void)
+{
+	local_irq_disable();
+}
+
+static void __hardirq_end(void)
+{
+	local_irq_enable();
+}
+
+static int live_preempt_reset(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id id;
+	struct spinner spin;
+	int err = -ENOMEM;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	if (spinner_init(&spin, i915))
+		goto err_unlock;
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		goto err_spin;
+
+	for_each_engine(engine, i915, id) {
+		static const struct {
+			const char *name;
+			void (*critical_section_begin)(void);
+			void (*critical_section_end)(void);
+		} phases[] = {
+			{ "softirq", __softirq_begin, __softirq_end },
+			{ "hardirq", __hardirq_begin, __hardirq_end },
+			{ }
+		};
+		struct tasklet_struct *t = &engine->execlists.tasklet;
+		const typeof(*phases) *p;
+
+		for (p = phases; p->name; p++) {
+			struct i915_request *rq;
+
+			rq = spinner_create_request(&spin, ctx, engine,
+						    MI_NOOP);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				goto err_ctx;
+			}
+
+			i915_request_add(rq);
+			if (!wait_for_spinner(&spin, rq)) {
+				i915_gem_set_wedged(i915);
+				err = -EIO;
+				goto err_ctx;
+			}
+
+			/* Flush to give try_preempt_reset a chance */
+			tasklet_schedule(t);
+			tasklet_kill(t);
+			GEM_BUG_ON(i915_request_completed(rq));
+
+			GEM_TRACE("%s triggering %s reset\n",
+				  engine->name, p->name);
+			p->critical_section_begin();
+
+			mark_preemption_hang(&engine->execlists);
+			err = try_preempt_reset(&engine->execlists);
+
+			p->critical_section_end();
+			if (err) {
+				pr_err("Preempt softirq reset failed on %s, tasklet state %lx\n",
+				       engine->name, t->state);
+				spinner_end(&spin);
+				i915_gem_set_wedged(i915);
+				goto err_ctx;
+			}
+
+			if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+				err = -EIO;
+				goto err_ctx;
+			}
+		}
+	}
+
+	err = 0;
+err_ctx:
+	kernel_context_close(ctx);
+err_spin:
+	spinner_fini(&spin);
+err_unlock:
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -515,6 +625,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
 		SUBTEST(live_preempt_timeout),
+		SUBTEST(live_preempt_reset),
 	};
 
 	if (!HAS_EXECLISTS(i915))
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 022/262] drm/i915/preemption: Select timeout when scheduling
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (19 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 021/262] drm/i915/execlists: Try preempt-reset from hardirq timer context Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 023/262] drm/i915: Use a preemption timeout to enforce interactivity Chris Wilson
                   ` (34 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

The choice of preemption timeout is determined by the context from which
we trigger the preemption, as such allow the caller to specify the
desired timeout.

Effectively the other choice would be to use the shortest timeout along
the dependency chain. However, given that we would have already
triggered preemption for the dependency chain, we can assume that no
preemption along that chain is more important than the current request,
ergo we need only consider the current timeout. Realising this, we can
then pass control of the preemption timeout to the caller for greater
flexibility.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c            |   2 +-
 drivers/gpu/drm/i915/i915_request.c        |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c           |   5 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   6 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 110 ++++++++++++++++++++-
 5 files changed, 118 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fa09837d0569..0ec4c7a7c237 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -581,7 +581,7 @@ static void __fence_set_priority(struct dma_fence *fence,
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule)
-		engine->schedule(rq, attr);
+		engine->schedule(rq, attr, 0);
 	rcu_read_unlock();
 	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index fc499bcbd105..c3893312cb9d 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1113,7 +1113,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule)
-		engine->schedule(request, &request->gem_context->sched);
+		engine->schedule(request, &request->gem_context->sched, 0);
 	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b6f369d06bb8..c5795b9944b1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1290,7 +1290,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 }
 
 static void execlists_schedule(struct i915_request *request,
-			       const struct i915_sched_attr *attr)
+			       const struct i915_sched_attr *attr,
+			       unsigned int timeout)
 {
 	struct i915_priolist *uninitialized_var(pl);
 	struct intel_engine_cs *engine, *last;
@@ -1393,7 +1394,7 @@ static void execlists_schedule(struct i915_request *request,
 
 		if (prio > engine->execlists.queue_priority &&
 		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
-			__update_queue(engine, prio, 0);
+			__update_queue(engine, prio, timeout);
 			tasklet_hi_schedule(&engine->execlists.tasklet);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 495cc2628fa9..769ac73004c4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -478,14 +478,16 @@ struct intel_engine_cs {
 	 */
 	void		(*submit_request)(struct i915_request *rq);
 
-	/* Call when the priority on a request has changed and it and its
+	/*
+	 * Call when the priority on a request has changed and it and its
 	 * dependencies may need rescheduling. Note the request itself may
 	 * not be ready to run!
 	 *
 	 * Called under the struct_mutex.
 	 */
 	void		(*schedule)(struct i915_request *request,
-				    const struct i915_sched_attr *attr);
+				    const struct i915_sched_attr *attr,
+				    unsigned int timeout);
 
 	/*
 	 * Cancel all requests on the hardware, or queued for execution.
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 40c1f6951444..7efa52e514ab 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -406,7 +406,7 @@ static int live_late_preempt(void *arg)
 		}
 
 		attr.priority = I915_PRIORITY_MAX;
-		engine->schedule(rq, &attr);
+		engine->schedule(rq, &attr, 0);
 
 		if (!wait_for_spinner(&spin_hi, rq)) {
 			pr_err("High priority context failed to preempt the low priority context\n");
@@ -618,6 +618,113 @@ static int live_preempt_reset(void *arg)
 	return err;
 }
 
+static int live_late_preempt_timeout(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx_hi, *ctx_lo;
+	struct spinner spin_hi, spin_lo;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	if (spinner_init(&spin_hi, i915))
+		goto err_unlock;
+
+	if (spinner_init(&spin_lo, i915))
+		goto err_spin_hi;
+
+	ctx_hi = kernel_context(i915);
+	if (!ctx_hi)
+		goto err_spin_lo;
+
+	ctx_lo = kernel_context(i915);
+	if (!ctx_lo)
+		goto err_ctx_hi;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = spinner_create_request(&spin_lo, ctx_lo, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (!wait_for_spinner(&spin_lo, rq)) {
+			pr_err("First context failed to start\n");
+			goto err_wedged;
+		}
+
+		rq = spinner_create_request(&spin_hi, ctx_hi, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			spinner_end(&spin_lo);
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (wait_for_spinner(&spin_hi, rq)) {
+			pr_err("Second context overtook first?\n");
+			goto err_wedged;
+		}
+
+		GEM_TRACE("%s rescheduling (no timeout)\n", engine->name);
+		engine->schedule(rq, &(struct i915_sched_attr){
+				 .priority = 1,
+				 }, 0);
+
+		if (wait_for_spinner(&spin_hi, rq)) {
+			pr_err("High priority context overtook first without an arbitration point?\n");
+			goto err_wedged;
+		}
+
+		GEM_TRACE("%s rescheduling (with timeout)\n", engine->name);
+		engine->schedule(rq, &(struct i915_sched_attr){
+				 .priority = 2,
+				 }, 10 * 1000 /* 10us */);
+
+		if (!wait_for_spinner(&spin_hi, rq)) {
+			pr_err("High priority context failed to force itself in front of the low priority context\n");
+			GEM_TRACE_DUMP();
+			goto err_wedged;
+		}
+
+		spinner_end(&spin_hi);
+		spinner_end(&spin_lo);
+		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+	}
+
+	err = 0;
+err_ctx_lo:
+	kernel_context_close(ctx_lo);
+err_ctx_hi:
+	kernel_context_close(ctx_hi);
+err_spin_lo:
+	spinner_fini(&spin_lo);
+err_spin_hi:
+	spinner_fini(&spin_hi);
+err_unlock:
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+
+err_wedged:
+	spinner_end(&spin_hi);
+	spinner_end(&spin_lo);
+	i915_gem_set_wedged(i915);
+	err = -EIO;
+	goto err_ctx_lo;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -626,6 +733,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_late_preempt),
 		SUBTEST(live_preempt_timeout),
 		SUBTEST(live_preempt_reset),
+		SUBTEST(live_late_preempt_timeout),
 	};
 
 	if (!HAS_EXECLISTS(i915))
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 023/262] drm/i915: Use a preemption timeout to enforce interactivity
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (20 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 022/262] drm/i915/preemption: Select timeout when scheduling Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 024/262] drm/i915: Allow user control over preempt timeout on their important context Chris Wilson
                   ` (33 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Use a liberal timeout of 20ms to ensure that the rendering for an
interactive pageflip is started in a timely fashion, and that
user interaction is not blocked by GPU, or CPU, hogs. This is at the cost
of resetting whoever was blocking the preemption, likely leading to that
context/process being banned from submitting future requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h      |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c      | 19 +++++++++++--------
 drivers/gpu/drm/i915/intel_display.c | 17 ++++++++++++++++-
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e33c380b43e3..56ffdd523893 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3177,8 +3177,10 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 			 struct intel_rps_client *rps);
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
-				  const struct i915_sched_attr *attr);
+				  const struct i915_sched_attr *attr,
+				  unsigned int timeout);
 #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
+#define I915_PREEMPTION_TIMEOUT_DISPLAY (100 * 1000 * 1000) /* 100 ms / 10Hz */
 
 int __must_check
 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0ec4c7a7c237..42d95f6490f8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -567,7 +567,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 }
 
 static void __fence_set_priority(struct dma_fence *fence,
-				 const struct i915_sched_attr *attr)
+				 const struct i915_sched_attr *attr,
+				 unsigned int timeout)
 {
 	struct i915_request *rq;
 	struct intel_engine_cs *engine;
@@ -581,13 +582,14 @@ static void __fence_set_priority(struct dma_fence *fence,
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule)
-		engine->schedule(rq, attr, 0);
+		engine->schedule(rq, attr, timeout);
 	rcu_read_unlock();
 	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
 }
 
 static void fence_set_priority(struct dma_fence *fence,
-			       const struct i915_sched_attr *attr)
+			       const struct i915_sched_attr *attr,
+			       unsigned int timeout)
 {
 	/* Recurse once into a fence-array */
 	if (dma_fence_is_array(fence)) {
@@ -595,16 +597,17 @@ static void fence_set_priority(struct dma_fence *fence,
 		int i;
 
 		for (i = 0; i < array->num_fences; i++)
-			__fence_set_priority(array->fences[i], attr);
+			__fence_set_priority(array->fences[i], attr, timeout);
 	} else {
-		__fence_set_priority(fence, attr);
+		__fence_set_priority(fence, attr, timeout);
 	}
 }
 
 int
 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 			      unsigned int flags,
-			      const struct i915_sched_attr *attr)
+			      const struct i915_sched_attr *attr,
+			      unsigned int timeout)
 {
 	struct dma_fence *excl;
 
@@ -619,7 +622,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 			return ret;
 
 		for (i = 0; i < count; i++) {
-			fence_set_priority(shared[i], attr);
+			fence_set_priority(shared[i], attr, timeout);
 			dma_fence_put(shared[i]);
 		}
 
@@ -629,7 +632,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 	}
 
 	if (excl) {
-		fence_set_priority(excl, attr);
+		fence_set_priority(excl, attr, timeout);
 		dma_fence_put(excl);
 	}
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ad588d564198..538d436db52a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12879,7 +12879,8 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
 		.priority = I915_PRIORITY_DISPLAY,
 	};
 
-	i915_gem_object_wait_priority(obj, 0, &attr);
+	i915_gem_object_wait_priority(obj, 0,
+				      &attr, I915_PREEMPTION_TIMEOUT_DISPLAY);
 }
 
 /**
@@ -12958,6 +12959,20 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 
 	ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
 
+	/*
+	 * Reschedule our dependencies, and ensure we run within a timeout.
+	 *
+	 * Note that if the timeout is exceeded, then whoever was running that
+	 * prevented us from acquiring the GPU is declared rogue and reset. An
+	 * unresponsive process will then be banned in order to preserve
+	 * interactivity. Since this can be seen as a bit heavy-handed, we
+	 * select a timeout for when the dropped frames start to become a
+	 * noticeable nuisance for the user (100 ms, i.e. preemption was
+	 * blocked for more than a few frames). Note, this is only a timeout
+	 * for a delay in preempting the current request in order to run our
+	 * dependency chain, our dependency chain may itself take a long time
+	 * to run to completion before we can present the framebuffer.
+	 */
 	fb_obj_bump_render_priority(obj);
 
 	mutex_unlock(&dev_priv->drm.struct_mutex);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 024/262] drm/i915: Allow user control over preempt timeout on their important context
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (21 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 023/262] drm/i915: Use a preemption timeout to enforce interactivity Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 025/262] drm/mm: Reject over-sized allocation requests early Chris Wilson
                   ` (32 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

One usecase would be to couple in via EGL_NV_context_priority_realtime
in userspace to provide some QoS guarantees in conjunction with setting
the highest priority.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c    | 22 ++++++
 drivers/gpu/drm/i915/i915_gem_context.h    | 13 ++++
 drivers/gpu/drm/i915/i915_request.c        |  7 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 10 +--
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 85 ++++++++++++++++++++++
 include/uapi/drm/i915_drm.h                | 12 +++
 6 files changed, 142 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b69b18ef8120..9d51560ba9ad 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -758,6 +758,15 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_PRIORITY:
 		args->value = ctx->sched.priority;
 		break;
+	case I915_CONTEXT_PARAM_PREEMPT_TIMEOUT:
+		if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+			ret = -ENODEV;
+		else if (args->size)
+			ret = -EINVAL;
+		else
+			args->value = ctx->preempt_timeout;
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
@@ -833,6 +842,19 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		}
 		break;
 
+	case I915_CONTEXT_PARAM_PREEMPT_TIMEOUT:
+		if (args->size)
+			ret = -EINVAL;
+		else if (args->value > U32_MAX)
+			ret = -EINVAL;
+		else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+			ret = -ENODEV;
+		else if (args->value && !capable(CAP_SYS_ADMIN))
+			ret = -EPERM;
+		else
+			ctx->preempt_timeout = args->value;
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index c3262b4dd2ee..b7d75c569f44 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -144,6 +144,19 @@ struct i915_gem_context {
 
 	struct i915_sched_attr sched;
 
+	/**
+	 * @preempt_timeout: QoS guarantee for the high priority context
+	 *
+	 * Some clients need a guarantee that they will start executing
+	 * within a certain window, even at the expense of others. This entails
+	 * that if a preemption request is not honoured by the active context
+	 * within the timeout, we will reset the GPU to evict the hog and
+	 * run the high priority context instead.
+	 *
+	 * Timeout is stored in nanoseconds.
+	 */
+	u32 preempt_timeout;
+
 	/** ggtt_offset_bias: placement restriction for context objects */
 	u32 ggtt_offset_bias;
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c3893312cb9d..6466d8f475f1 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1112,8 +1112,11 @@ void __i915_request_add(struct i915_request *request, bool flush_caches)
 	 */
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(request, &request->gem_context->sched, 0);
+	if (engine->schedule) {
+		engine->schedule(request,
+				 &request->gem_context->sched,
+				 request->gem_context->preempt_timeout);
+	}
 	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c5795b9944b1..6128f850194f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1251,20 +1251,20 @@ static void submit_queue(struct intel_engine_cs *engine,
 	}
 }
 
-static void execlists_submit_request(struct i915_request *request)
+static void execlists_submit_request(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = request->engine;
+	struct intel_engine_cs *engine = rq->engine;
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 
-	queue_request(engine, &request->sched, rq_prio(request));
+	queue_request(engine, &rq->sched, rq_prio(rq));
 
 	GEM_BUG_ON(!engine->execlists.first);
-	GEM_BUG_ON(list_empty(&request->sched.link));
+	GEM_BUG_ON(list_empty(&rq->sched.link));
 
-	submit_queue(engine, rq_prio(request), 0);
+	submit_queue(engine, rq_prio(rq), rq->gem_context->preempt_timeout);
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 7efa52e514ab..b235f4444b2b 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -725,6 +725,90 @@ static int live_late_preempt_timeout(void *arg)
 	goto err_ctx_lo;
 }
 
+static int live_context_preempt_timeout(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx_hi, *ctx_lo;
+	struct spinner spin_hi, spin_lo;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	if (spinner_init(&spin_hi, i915))
+		goto err_unlock;
+
+	if (spinner_init(&spin_lo, i915))
+		goto err_spin_hi;
+
+	ctx_hi = kernel_context(i915);
+	if (!ctx_hi)
+		goto err_spin_lo;
+	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+	ctx_hi->preempt_timeout = 50 * 1000; /* 50us */
+
+	ctx_lo = kernel_context(i915);
+	if (!ctx_lo)
+		goto err_ctx_hi;
+	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = spinner_create_request(&spin_lo, ctx_lo, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (!wait_for_spinner(&spin_lo, rq)) {
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+
+		rq = spinner_create_request(&spin_hi, ctx_hi, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			spinner_end(&spin_lo);
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (!wait_for_spinner(&spin_hi, rq)) {
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+
+		spinner_end(&spin_hi);
+		spinner_end(&spin_lo);
+		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+	}
+
+	err = 0;
+err_ctx_lo:
+	kernel_context_close(ctx_lo);
+err_ctx_hi:
+	kernel_context_close(ctx_hi);
+err_spin_lo:
+	spinner_fini(&spin_lo);
+err_spin_hi:
+	spinner_fini(&spin_hi);
+err_unlock:
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -734,6 +818,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt_timeout),
 		SUBTEST(live_preempt_reset),
 		SUBTEST(live_late_preempt_timeout),
+		SUBTEST(live_context_preempt_timeout),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..853e0c7e0e85 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,18 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+
+/*
+ * I915_CONTEXT_PARAM_PREEMPT_TIMEOUT:
+ *
+ * Preemption timeout give in nanoseconds.
+ *
+ * Only allowed for privileged clients (CAP_SYS_ADMIN), this property allows
+ * the preempting context to kick out a GPU hog using a GPU reset if they do
+ * not honour our preemption request in time.
+ */
+#define I915_CONTEXT_PARAM_PREEMPT_TIMEOUT	0x7
+
 	__u64 value;
 };
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 025/262] drm/mm: Reject over-sized allocation requests early
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (22 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 024/262] drm/i915: Allow user control over preempt timeout on their important context Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 026/262] drm/mm: Add a search-by-address variant to only inspect a single hole Chris Wilson
                   ` (31 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

As we keep an rbtree of available holes sorted by their size, we can
very easily determine if there is any hole large enough that might
satisfy the allocation request. This helps when dealing with a highly
fragmented address space and a request for a search by address.

To cache the largest size, we convert into the cached rbtree variant
which tracks the leftmost node for us. However, currently we sorted into
ascending size order so the leftmost node is the smallest, and so to
make it the largest hole we need to invert our sorting.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/drm_mm.c | 82 ++++++++++++++++++++++++++++------------
 include/drm/drm_mm.h     |  2 +-
 2 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 3166026a1874..7b4ad05fe1c0 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -239,6 +239,32 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 #define HOLE_SIZE(NODE) ((NODE)->hole_size)
 #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
 
+static u64 rb_to_hole_size(struct rb_node *rb)
+{
+	return rb_entry(rb, struct drm_mm_node, rb_hole_size)->hole_size;
+}
+
+static void insert_hole_size(struct rb_root_cached *root,
+			     struct drm_mm_node *node)
+{
+	struct rb_node **link = &root->rb_root.rb_node, *rb = NULL;
+	u64 x = node->hole_size;
+	bool first = true;
+
+	while (*link) {
+		rb = *link;
+		if (x > rb_to_hole_size(rb)) {
+			link = &rb->rb_left;
+		} else {
+			link = &rb->rb_right;
+			first = false;
+		}
+	}
+
+	rb_link_node(&node->rb_hole_size, rb, link);
+	rb_insert_color_cached(&node->rb_hole_size, root, first);
+}
+
 static void add_hole(struct drm_mm_node *node)
 {
 	struct drm_mm *mm = node->mm;
@@ -247,7 +273,7 @@ static void add_hole(struct drm_mm_node *node)
 		__drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node);
 	DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
 
-	RB_INSERT(mm->holes_size, rb_hole_size, HOLE_SIZE);
+	insert_hole_size(&mm->holes_size, node);
 	RB_INSERT(mm->holes_addr, rb_hole_addr, HOLE_ADDR);
 
 	list_add(&node->hole_stack, &mm->hole_stack);
@@ -258,7 +284,7 @@ static void rm_hole(struct drm_mm_node *node)
 	DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
 
 	list_del(&node->hole_stack);
-	rb_erase(&node->rb_hole_size, &node->mm->holes_size);
+	rb_erase_cached(&node->rb_hole_size, &node->mm->holes_size);
 	rb_erase(&node->rb_hole_addr, &node->mm->holes_addr);
 	node->hole_size = 0;
 
@@ -282,38 +308,39 @@ static inline u64 rb_hole_size(struct rb_node *rb)
 
 static struct drm_mm_node *best_hole(struct drm_mm *mm, u64 size)
 {
-	struct rb_node *best = NULL;
-	struct rb_node **link = &mm->holes_size.rb_node;
+	struct rb_node *rb = mm->holes_size.rb_root.rb_node;
+	struct drm_mm_node *best = NULL;
 
-	while (*link) {
-		struct rb_node *rb = *link;
+	do {
+		struct drm_mm_node *node =
+			rb_entry(rb, struct drm_mm_node, rb_hole_size);
 
-		if (size <= rb_hole_size(rb)) {
-			link = &rb->rb_left;
-			best = rb;
+		if (size <= node->hole_size) {
+			best = node;
+			rb = rb->rb_right;
 		} else {
-			link = &rb->rb_right;
+			rb = rb->rb_left;
 		}
-	}
+	} while (rb);
 
-	return rb_hole_size_to_node(best);
+	return best;
 }
 
 static struct drm_mm_node *find_hole(struct drm_mm *mm, u64 addr)
 {
+	struct rb_node *rb = mm->holes_addr.rb_node;
 	struct drm_mm_node *node = NULL;
-	struct rb_node **link = &mm->holes_addr.rb_node;
 
-	while (*link) {
+	while (rb) {
 		u64 hole_start;
 
-		node = rb_hole_addr_to_node(*link);
+		node = rb_hole_addr_to_node(rb);
 		hole_start = __drm_mm_hole_node_start(node);
 
 		if (addr < hole_start)
-			link = &node->rb_hole_addr.rb_left;
+			rb = node->rb_hole_addr.rb_left;
 		else if (addr > hole_start + node->hole_size)
-			link = &node->rb_hole_addr.rb_right;
+			rb = node->rb_hole_addr.rb_right;
 		else
 			break;
 	}
@@ -326,9 +353,6 @@ first_hole(struct drm_mm *mm,
 	   u64 start, u64 end, u64 size,
 	   enum drm_mm_insert_mode mode)
 {
-	if (RB_EMPTY_ROOT(&mm->holes_size))
-		return NULL;
-
 	switch (mode) {
 	default:
 	case DRM_MM_INSERT_BEST:
@@ -355,7 +379,7 @@ next_hole(struct drm_mm *mm,
 	switch (mode) {
 	default:
 	case DRM_MM_INSERT_BEST:
-		return rb_hole_size_to_node(rb_next(&node->rb_hole_size));
+		return rb_hole_size_to_node(rb_prev(&node->rb_hole_size));
 
 	case DRM_MM_INSERT_LOW:
 		return rb_hole_addr_to_node(rb_next(&node->rb_hole_addr));
@@ -426,6 +450,11 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 }
 EXPORT_SYMBOL(drm_mm_reserve_node);
 
+static u64 rb_to_hole_size_or_zero(struct rb_node *rb)
+{
+	return rb ? rb_to_hole_size(rb) : 0;
+}
+
 /**
  * drm_mm_insert_node_in_range - ranged search for space and insert @node
  * @mm: drm_mm to allocate from
@@ -457,6 +486,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 	if (unlikely(size == 0 || range_end - range_start < size))
 		return -ENOSPC;
 
+	if (rb_to_hole_size_or_zero(rb_first_cached(&mm->holes_size)) < size)
+		return -ENOSPC;
+
 	if (alignment <= 1)
 		alignment = 0;
 
@@ -587,9 +619,9 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 
 	if (drm_mm_hole_follows(old)) {
 		list_replace(&old->hole_stack, &new->hole_stack);
-		rb_replace_node(&old->rb_hole_size,
-				&new->rb_hole_size,
-				&mm->holes_size);
+		rb_replace_node_cached(&old->rb_hole_size,
+				       &new->rb_hole_size,
+				       &mm->holes_size);
 		rb_replace_node(&old->rb_hole_addr,
 				&new->rb_hole_addr,
 				&mm->holes_addr);
@@ -885,7 +917,7 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size)
 
 	INIT_LIST_HEAD(&mm->hole_stack);
 	mm->interval_tree = RB_ROOT_CACHED;
-	mm->holes_size = RB_ROOT;
+	mm->holes_size = RB_ROOT_CACHED;
 	mm->holes_addr = RB_ROOT;
 
 	/* Clever trick to avoid a special case in the free hole tracking. */
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 101f566ae43d..e3aa3bfd4860 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -173,7 +173,7 @@ struct drm_mm {
 	struct drm_mm_node head_node;
 	/* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */
 	struct rb_root_cached interval_tree;
-	struct rb_root holes_size;
+	struct rb_root_cached holes_size;
 	struct rb_root holes_addr;
 
 	unsigned long scan_active;
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 026/262] drm/mm: Add a search-by-address variant to only inspect a single hole
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (23 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 025/262] drm/mm: Reject over-sized allocation requests early Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 027/262] drm/i915: Limit searching for PIN_HIGH Chris Wilson
                   ` (30 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Searching for an available hole by address is slow, as there no
guarantee that a hole will be available and so we must walk over all
nodes in the rbtree before we determine the search was futile. In many
cases, the caller doesn't strictly care for the highest available hole
and was just opportunistically laying out the address space in a
preferred order. In such cases, the caller can accept any address and
would rather do so then do a slow walk.

To be able to mix search strategies, the caller wants to tell the drm_mm
how long to spend on the search. Without a good guide for what should be
the best split, start with a request to try once at most. That is return
the top-most (or lowest) hole if it fulfils the alignment and size
requirements.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/drm_mm.c | 9 +++++++--
 include/drm/drm_mm.h     | 4 ++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 7b4ad05fe1c0..b8e99e2a7c34 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -480,6 +480,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 {
 	struct drm_mm_node *hole;
 	u64 remainder_mask;
+	bool once;
 
 	DRM_MM_BUG_ON(range_start >= range_end);
 
@@ -492,9 +493,13 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 	if (alignment <= 1)
 		alignment = 0;
 
+	once = mode & DRM_MM_INSERT_END;
+	mode &= ~DRM_MM_INSERT_END;
+
 	remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-	for (hole = first_hole(mm, range_start, range_end, size, mode); hole;
-	     hole = next_hole(mm, hole, mode)) {
+	for (hole = first_hole(mm, range_start, range_end, size, mode);
+	     hole;
+	     hole = once ? NULL : next_hole(mm, hole, mode)) {
 		u64 hole_start = __drm_mm_hole_node_start(hole);
 		u64 hole_end = hole_start + hole->hole_size;
 		u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index e3aa3bfd4860..2a6351093d15 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -109,6 +109,10 @@ enum drm_mm_insert_mode {
 	 * Allocates the node from the bottom of the found hole.
 	 */
 	DRM_MM_INSERT_EVICT,
+
+	DRM_MM_INSERT_END = BIT(31),
+	DRM_MM_INSERT_HIGHEST = DRM_MM_INSERT_HIGH | DRM_MM_INSERT_END,
+	DRM_MM_INSERT_LOWEST = DRM_MM_INSERT_LOW | DRM_MM_INSERT_END,
 };
 
 /**
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 027/262] drm/i915: Limit searching for PIN_HIGH
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (24 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 026/262] drm/mm: Add a search-by-address variant to only inspect a single hole Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 028/262] drm/i915: Pin the ring high Chris Wilson
                   ` (29 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

To no surprise (since we've flip-flopped over the use of PIN_HIGH a few
times), doing a search by address over a pathologically fragmented
address space is exceeding slow. To protect ourselves from nearly
unbounded latency (think searching a million holes while under
struct_mutex), limit the search for the highest available hole and
fallback to best-fit if it fails.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 996ab2ad6c45..6a5b30107399 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3988,7 +3988,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 
 	mode = DRM_MM_INSERT_BEST;
 	if (flags & PIN_HIGH)
-		mode = DRM_MM_INSERT_HIGH;
+		mode = DRM_MM_INSERT_HIGHEST;
 	if (flags & PIN_MAPPABLE)
 		mode = DRM_MM_INSERT_LOW;
 
@@ -4008,6 +4008,15 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	if (err != -ENOSPC)
 		return err;
 
+	if (mode & DRM_MM_INSERT_END) {
+		err = drm_mm_insert_node_in_range(&vm->mm, node,
+						  size, alignment, color,
+						  start, end,
+						  DRM_MM_INSERT_BEST);
+		if (err != -ENOSPC)
+			return err;
+	}
+
 	if (flags & PIN_NOEVICT)
 		return -ENOSPC;
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 028/262] drm/i915: Pin the ring high
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (25 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 027/262] drm/i915: Limit searching for PIN_HIGH Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 029/262] drm/i915: Track the purgeable objects on a separate eviction list Chris Wilson
                   ` (28 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

If we can use an unmappable ring, try to pin it out of the mappable
aperture. This simple layout preference is to try and keep the mappable
aperture reserved and available to handle GGTT mmapping requests from
userspace without causing evictions and GPU stalls.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 001cf6bcb349..97b38bbb7ce2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1049,6 +1049,8 @@ int intel_ring_pin(struct intel_ring *ring,
 		flags |= PIN_OFFSET_BIAS | offset_bias;
 	if (vma->obj->stolen)
 		flags |= PIN_MAPPABLE;
+	else
+		flags |= PIN_HIGH;
 
 	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
 		if (flags & PIN_MAPPABLE || map == I915_MAP_WC)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 029/262] drm/i915: Track the purgeable objects on a separate eviction list
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (26 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 028/262] drm/i915: Pin the ring high Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-18 11:36   ` Matthew Auld
  2018-05-17  6:03 ` [PATCH 030/262] drm/i915: Refactor unsettting obj->mm.pages Chris Wilson
                   ` (27 subsequent siblings)
  55 siblings, 1 reply; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Currently the purgeable objects, I915_MADV_DONTNEED, as mixed in the
normal bound/unbound lists. Every shrinker pass starts with an attempt
to purge from this set of unneeded objects, which entails us doing a
walk over both lists looking for any candidates. If there are none, and
since we are shrinking we can reasonably assume that the lists are
full!, this becomes a very slow futile walk.

If we separate out the purgeable objects into own list, this search then
becomes its own phase that is preferentially handled during shrinking.
Instead the cost becomes that we then need to filter the purgeable list
if we want to distinguish between bound and unbound objects.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          | 13 ++++---
 drivers/gpu/drm/i915/i915_gem.c          | 49 ++++++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 28 +++++++-------
 drivers/gpu/drm/i915/i915_vma.c          |  2 +-
 4 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 56ffdd523893..1eeee043e164 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -926,6 +926,10 @@ struct i915_gem_mm {
 	 * not actually have any pages attached.
 	 */
 	struct list_head unbound_list;
+	/**
+	 * List of objects which are purgeable. May be active.
+	 */
+	struct list_head purge_list;
 
 	/** List of all objects in gtt_space, currently mmaped by userspace.
 	 * All objects within this list must also be on bound_list.
@@ -3310,11 +3314,10 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915,
 			      unsigned long target,
 			      unsigned long *nr_scanned,
 			      unsigned flags);
-#define I915_SHRINK_PURGEABLE 0x1
-#define I915_SHRINK_UNBOUND 0x2
-#define I915_SHRINK_BOUND 0x4
-#define I915_SHRINK_ACTIVE 0x8
-#define I915_SHRINK_VMAPS 0x10
+#define I915_SHRINK_UNBOUND BIT(0)
+#define I915_SHRINK_BOUND BIT(1)
+#define I915_SHRINK_ACTIVE BIT(2)
+#define I915_SHRINK_VMAPS BIT(3)
 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
 void i915_gem_shrinker_register(struct drm_i915_private *i915);
 void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 42d95f6490f8..be3092a03722 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1672,8 +1672,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *i915;
-	struct list_head *list;
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
@@ -1688,11 +1686,16 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	}
 
-	i915 = to_i915(obj->base.dev);
-	spin_lock(&i915->mm.obj_lock);
-	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
-	list_move_tail(&obj->mm.link, list);
-	spin_unlock(&i915->mm.obj_lock);
+	if (obj->mm.madv == I915_MADV_WILLNEED) {
+		struct drm_i915_private *i915 = to_i915(obj->base.dev);
+		struct list_head *list;
+
+		spin_lock(&i915->mm.obj_lock);
+		list = obj->bind_count ?
+			&i915->mm.bound_list : &i915->mm.unbound_list;
+		list_move_tail(&obj->mm.link, list);
+		spin_unlock(&i915->mm.obj_lock);
+	}
 }
 
 /**
@@ -2531,7 +2534,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	sg_page_sizes = 0;
 	for (i = 0; i < page_count; i++) {
 		const unsigned int shrink[] = {
-			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
 			0,
 		}, *s = shrink;
 		gfp_t gfp = noreclaim;
@@ -4519,7 +4522,7 @@ int
 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_gem_madvise *args = data;
 	struct drm_i915_gem_object *obj;
 	int err;
@@ -4542,7 +4545,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 
 	if (i915_gem_object_has_pages(obj) &&
 	    i915_gem_object_is_tiled(obj) &&
-	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
 		if (obj->mm.madv == I915_MADV_WILLNEED) {
 			GEM_BUG_ON(!obj->mm.quirked);
 			__i915_gem_object_unpin_pages(obj);
@@ -4558,6 +4561,20 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	if (obj->mm.madv != __I915_MADV_PURGED)
 		obj->mm.madv = args->madv;
 
+	if (i915_gem_object_has_pages(obj)) {
+		struct list_head *list;
+
+		spin_lock(&i915->mm.obj_lock);
+		if (obj->mm.madv != I915_MADV_WILLNEED)
+			list = &i915->mm.purge_list;
+		else if (obj->bind_count)
+			list = &i915->mm.bound_list;
+		else
+			list = &i915->mm.unbound_list;
+		list_move_tail(&obj->mm.link, list);
+		spin_unlock(&i915->mm.obj_lock);
+	}
+
 	/* if the object is no longer attached, discard its backing storage */
 	if (obj->mm.madv == I915_MADV_DONTNEED &&
 	    !i915_gem_object_has_pages(obj))
@@ -4876,9 +4893,18 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->mm.quirked)
 		__i915_gem_object_unpin_pages(obj);
 
-	if (discard_backing_storage(obj))
+	if (discard_backing_storage(obj)) {
+		struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
 		obj->mm.madv = I915_MADV_DONTNEED;
 
+		if (i915_gem_object_has_pages(obj)) {
+			spin_lock(&i915->mm.obj_lock);
+			list_move_tail(&obj->mm.link, &i915->mm.purge_list);
+			spin_unlock(&i915->mm.obj_lock);
+		}
+	}
+
 	/*
 	 * Before we free the object, make sure any pure RCU-only
 	 * read-side critical sections are complete, e.g.
@@ -5527,6 +5553,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
 
 	init_llist_head(&i915->mm.free_list);
 
+	INIT_LIST_HEAD(&i915->mm.purge_list);
 	INIT_LIST_HEAD(&i915->mm.unbound_list);
 	INIT_LIST_HEAD(&i915->mm.bound_list);
 	INIT_LIST_HEAD(&i915->mm.fence_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 5757fb7c4b5a..ea5a6e2d62d2 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -151,6 +151,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 		struct list_head *list;
 		unsigned int bit;
 	} phases[] = {
+		{ &i915->mm.purge_list, 0 },
 		{ &i915->mm.unbound_list, I915_SHRINK_UNBOUND },
 		{ &i915->mm.bound_list, I915_SHRINK_BOUND },
 		{ NULL, 0 },
@@ -182,8 +183,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	 * device just to recover a little memory. If absolutely necessary,
 	 * we will force the wake during oom-notifier.
 	 */
-	if ((flags & I915_SHRINK_BOUND) &&
-	    !intel_runtime_pm_get_if_in_use(i915))
+	if (flags & I915_SHRINK_BOUND && !intel_runtime_pm_get_if_in_use(i915))
 		flags &= ~I915_SHRINK_BOUND;
 
 	/*
@@ -209,7 +209,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 		struct list_head still_in_list;
 		struct drm_i915_gem_object *obj;
 
-		if ((flags & phase->bit) == 0)
+		if (phase->bit && (flags & phase->bit) == 0)
 			continue;
 
 		INIT_LIST_HEAD(&still_in_list);
@@ -228,10 +228,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 						       mm.link))) {
 			list_move_tail(&obj->mm.link, &still_in_list);
 
-			if (flags & I915_SHRINK_PURGEABLE &&
-			    obj->mm.madv != I915_MADV_DONTNEED)
-				continue;
-
 			if (flags & I915_SHRINK_VMAPS &&
 			    !is_vmalloc_addr(obj->mm.mapping))
 				continue;
@@ -241,6 +237,10 @@ i915_gem_shrink(struct drm_i915_private *i915,
 			     i915_gem_object_is_framebuffer(obj)))
 				continue;
 
+			if (!(flags & I915_SHRINK_BOUND) &&
+			    READ_ONCE(obj->bind_count))
+				continue;
+
 			if (!can_release_pages(obj))
 				continue;
 
@@ -325,6 +325,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 			num_objects++;
 		}
+	list_for_each_entry(obj, &i915->mm.purge_list, mm.link)
+		if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) {
+			count += obj->base.size >> PAGE_SHIFT;
+			num_objects++;
+		}
 	spin_unlock(&i915->mm.obj_lock);
 
 	/* Update our preferred vmscan batch size for the next pass.
@@ -361,14 +366,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 				sc->nr_to_scan,
 				&sc->nr_scanned,
 				I915_SHRINK_BOUND |
-				I915_SHRINK_UNBOUND |
-				I915_SHRINK_PURGEABLE);
-	if (sc->nr_scanned < sc->nr_to_scan)
-		freed += i915_gem_shrink(i915,
-					 sc->nr_to_scan - sc->nr_scanned,
-					 &sc->nr_scanned,
-					 I915_SHRINK_BOUND |
-					 I915_SHRINK_UNBOUND);
+				I915_SHRINK_UNBOUND);
 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
 		intel_runtime_pm_get(i915);
 		freed += i915_gem_shrink(i915,
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 9324d476e0a7..96039c4ef434 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -624,7 +624,7 @@ i915_vma_remove(struct i915_vma *vma)
 	 * no more VMAs exist.
 	 */
 	spin_lock(&i915->mm.obj_lock);
-	if (--obj->bind_count == 0)
+	if (--obj->bind_count == 0 && obj->mm.madv == I915_MADV_WILLNEED)
 		list_move_tail(&obj->mm.link, &i915->mm.unbound_list);
 	spin_unlock(&i915->mm.obj_lock);
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 030/262] drm/i915: Refactor unsettting obj->mm.pages
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (27 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 029/262] drm/i915: Track the purgeable objects on a separate eviction list Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-18 13:35   ` Matthew Auld
  2018-05-17  6:03 ` [PATCH 031/262] drm/i915: Report all objects with allocated pages to the shrinker Chris Wilson
                   ` (26 subsequent siblings)
  55 siblings, 1 reply; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

As i915_gem_object_phys_attach() wants to play dirty and mess around
with obj->mm.pages itself (replacing the shmemfs with a DMA allocation),
refactor the gubbins so into i915_gem_object_unset_pages() that we don't
have to duplicate all the secrets.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 68 ++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index be3092a03722..4e480874563f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2410,29 +2410,15 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 	rcu_read_unlock();
 }
 
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				 enum i915_mm_subclass subclass)
+static struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct sg_table *pages;
 
-	if (i915_gem_object_has_pinned_pages(obj))
-		return;
-
-	GEM_BUG_ON(obj->bind_count);
-	if (!i915_gem_object_has_pages(obj))
-		return;
-
-	/* May be called by shrinker from within get_pages() (on another bo) */
-	mutex_lock_nested(&obj->mm.lock, subclass);
-	if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
-		goto unlock;
-
-	/* ->put_pages might need to allocate memory for the bit17 swizzle
-	 * array, hence protect them from being reaped by removing them from gtt
-	 * lists early. */
 	pages = fetch_and_zero(&obj->mm.pages);
-	GEM_BUG_ON(!pages);
+	if (!pages)
+		return NULL;
 
 	spin_lock(&i915->mm.obj_lock);
 	list_del(&obj->mm.link);
@@ -2451,12 +2437,37 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	}
 
 	__i915_gem_object_reset_page_iter(obj);
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
+	return pages;
+}
+
+void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				 enum i915_mm_subclass subclass)
+{
+	struct sg_table *pages;
+
+	if (i915_gem_object_has_pinned_pages(obj))
+		return;
 
+	GEM_BUG_ON(obj->bind_count);
+	if (!i915_gem_object_has_pages(obj))
+		return;
+
+	/* May be called by shrinker from within get_pages() (on another bo) */
+	mutex_lock_nested(&obj->mm.lock, subclass);
+	if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
+		goto unlock;
+
+	/*
+	 * ->put_pages might need to allocate memory for the bit17 swizzle
+	 * array, hence protect them from being reaped by removing them from gtt
+	 * lists early.
+	 */
+	pages = __i915_gem_object_unset_pages(obj);
 	if (!IS_ERR(pages))
 		obj->ops->put_pages(obj, pages);
 
-	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
-
 unlock:
 	mutex_unlock(&obj->mm.lock);
 }
@@ -6013,16 +6024,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 		goto err_unlock;
 	}
 
-	pages = fetch_and_zero(&obj->mm.pages);
-	if (pages) {
-		struct drm_i915_private *i915 = to_i915(obj->base.dev);
-
-		__i915_gem_object_reset_page_iter(obj);
-
-		spin_lock(&i915->mm.obj_lock);
-		list_del(&obj->mm.link);
-		spin_unlock(&i915->mm.obj_lock);
-	}
+	pages = __i915_gem_object_unset_pages(obj);
 
 	obj->ops = &i915_gem_phys_ops;
 
@@ -6040,7 +6042,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 
 err_xfer:
 	obj->ops = &i915_gem_object_ops;
-	obj->mm.pages = pages;
+	if (!IS_ERR(pages)) {
+		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
+
+		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+	}
 err_unlock:
 	mutex_unlock(&obj->mm.lock);
 	return err;
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 031/262] drm/i915: Report all objects with allocated pages to the shrinker
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (28 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 030/262] drm/i915: Refactor unsettting obj->mm.pages Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-18 16:42   ` Matthew Auld
  2018-05-17  6:03 ` [PATCH 032/262] drm/i915: Disable preemption and sleeping while using the punit sideband Chris Wilson
                   ` (25 subsequent siblings)
  55 siblings, 1 reply; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Currently, we try to report to the shrinker the precise number of
objects (pages) that are available to be reaped at this moment. This
requires searching all objects with allocated pages to see if they
fulfill the search criteria, and this count is performed quite
frequently. (The shrinker tries to free ~128 pages on each invocation,
before which we count all the objects; counting takes longer than
unbinding the objects!) If we take the pragmatic view that with
sufficient desire, all objects are eventually reapable (they become
inactive, or no longer used as framebuffer etc), we can simply return
the count of pinned pages maintained during get_pages/put_pages rather
than walk the lists every time.

The downside is that we may (slightly) over-report the number of
objects/pages we could shrink and so penalize ourselves by shrinking
more than required. This is mitigated by keeping the order in which we
shrink objects such that we avoid penalizing active and frequently used
objects, and if memory is so tight that we need to free them we would
need to anyway.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  2 +-
 drivers/gpu/drm/i915/i915_drv.h          |  1 -
 drivers/gpu/drm/i915/i915_gem.c          | 27 ++++-------------------
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 28 +++++-------------------
 4 files changed, 11 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ee8e2ff2c426..72d2238755db 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -434,7 +434,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	if (ret)
 		return ret;
 
-	seq_printf(m, "%u objects, %llu bytes\n",
+	seq_printf(m, "%u active objects, %llu bytes\n",
 		   dev_priv->mm.object_count,
 		   dev_priv->mm.object_memory);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1eeee043e164..7fa727e62d6f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -986,7 +986,6 @@ struct i915_gem_mm {
 	uint32_t bit_6_swizzle_y;
 
 	/* accounting, useful for userland debugging */
-	spinlock_t object_stat_lock;
 	u64 object_memory;
 	u32 object_count;
 };
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4e480874563f..a5694b0a7e6a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -77,25 +77,6 @@ remove_mappable_node(struct drm_mm_node *node)
 	drm_mm_remove_node(node);
 }
 
-/* some bookkeeping */
-static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
-				  u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count++;
-	dev_priv->mm.object_memory += size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
-static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
-				     u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count--;
-	dev_priv->mm.object_memory -= size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
 static int
 i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
@@ -2422,6 +2403,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 
 	spin_lock(&i915->mm.obj_lock);
 	list_del(&obj->mm.link);
+	i915->mm.object_count--;
+	i915->mm.object_memory -= obj->base.size;
 	spin_unlock(&i915->mm.obj_lock);
 
 	if (obj->mm.mapping) {
@@ -2708,6 +2691,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
 
 	spin_lock(&i915->mm.obj_lock);
+	i915->mm.object_count++;
+	i915->mm.object_memory += obj->base.size;
 	list_add(&obj->mm.link, &i915->mm.unbound_list);
 	spin_unlock(&i915->mm.obj_lock);
 }
@@ -4628,8 +4613,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	obj->mm.madv = I915_MADV_WILLNEED;
 	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
 	mutex_init(&obj->mm.get_page.lock);
-
-	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
@@ -4817,7 +4800,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 		reservation_object_fini(&obj->__builtin_resv);
 		drm_gem_object_release(&obj->base);
-		i915_gem_info_remove_obj(i915, obj->base.size);
 
 		kfree(obj->bit_17);
 		i915_gem_object_free(obj);
@@ -5558,7 +5540,6 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 
 static void i915_gem_init__mm(struct drm_i915_private *i915)
 {
-	spin_lock_init(&i915->mm.object_stat_lock);
 	spin_lock_init(&i915->mm.obj_lock);
 	spin_lock_init(&i915->mm.free_lock);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index ea5a6e2d62d2..2bf08631d264 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -309,30 +309,14 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *i915 =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_i915_gem_object *obj;
-	unsigned long num_objects = 0;
-	unsigned long count = 0;
+	unsigned long num_objects;
+	unsigned long count;
 
-	spin_lock(&i915->mm.obj_lock);
-	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link)
-		if (can_release_pages(obj)) {
-			count += obj->base.size >> PAGE_SHIFT;
-			num_objects++;
-		}
+	count = READ_ONCE(i915->mm.object_memory) >> PAGE_SHIFT;
+	num_objects = READ_ONCE(i915->mm.object_count);
 
-	list_for_each_entry(obj, &i915->mm.bound_list, mm.link)
-		if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) {
-			count += obj->base.size >> PAGE_SHIFT;
-			num_objects++;
-		}
-	list_for_each_entry(obj, &i915->mm.purge_list, mm.link)
-		if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) {
-			count += obj->base.size >> PAGE_SHIFT;
-			num_objects++;
-		}
-	spin_unlock(&i915->mm.obj_lock);
-
-	/* Update our preferred vmscan batch size for the next pass.
+	/*
+	 * Update our preferred vmscan batch size for the next pass.
 	 * Our rough guess for an effective batch size is roughly 2
 	 * available GEM objects worth of pages. That is we don't want
 	 * the shrinker to fire, until it is worth the cost of freeing an
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 032/262] drm/i915: Disable preemption and sleeping while using the punit sideband
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (29 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 031/262] drm/i915: Report all objects with allocated pages to the shrinker Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 033/262] drm/i915: Lift acquiring the vlv punit magic to a common sb-get Chris Wilson
                   ` (24 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx; +Cc: Hans de Goede

While we talk to the punit over its sideband, we need to prevent the cpu
from sleeping in order to prevent a potential machine hang.

Note that by itself, it appears that pm_qos_update_request (via
intel_idle) doesn't provide a sufficient barrier to ensure that all core
are indeed awake (out of Cstate) and that the package is awake. To do so,
we need to supplement the pm_qos with a manual ping on_each_cpu.

v2: Restrict the heavy-weight wakeup to just the ISOF_PORT_PUNIT, there
is insufficient evidence to implicate a wider problem atm. Similarly,
restrict the w/a to Valleyview, as Cherryview doesn't have an angry cadre
of users.

The working theory, courtesy of Ville and Hans, is the issue lies within
the power delivery and so is likely to be unit and board specific and
occurs when both the unit/fw require extra power at the same time as the
cpu package is changing its own power state.

References: https://bugzilla.kernel.org/show_bug.cgi?id=109051
References: https://bugs.freedesktop.org/show_bug.cgi?id=102657
References: https://bugzilla.kernel.org/show_bug.cgi?id=195255
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c       |  6 ++
 drivers/gpu/drm/i915/i915_drv.h       |  1 +
 drivers/gpu/drm/i915/intel_sideband.c | 89 +++++++++++++++++++++------
 3 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9c449b8d8eab..4a44f1a233e3 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -919,6 +919,9 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	spin_lock_init(&dev_priv->uncore.lock);
 
 	mutex_init(&dev_priv->sb_lock);
+	pm_qos_add_request(&dev_priv->sb_qos,
+			   PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
+
 	mutex_init(&dev_priv->modeset_restore_lock);
 	mutex_init(&dev_priv->av_mutex);
 	mutex_init(&dev_priv->wm.wm_mutex);
@@ -971,6 +974,9 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 	i915_gem_cleanup_early(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 	i915_engines_cleanup(dev_priv);
+
+	pm_qos_remove_request(&dev_priv->sb_qos);
+	mutex_destroy(&dev_priv->sb_lock);
 }
 
 static int i915_mmio_setup(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7fa727e62d6f..d0a7b1deab98 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1650,6 +1650,7 @@ struct drm_i915_private {
 
 	/* Sideband mailbox protection */
 	struct mutex sb_lock;
+	struct pm_qos_request sb_qos;
 
 	/** Cached value of IMR to avoid reads in updating the bitfield */
 	union {
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 75c872bb8cc9..d56eda33734e 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -22,6 +22,8 @@
  *
  */
 
+#include <asm/iosf_mbi.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
@@ -39,18 +41,48 @@
 /* Private register write, double-word addressing, non-posted */
 #define SB_CRWRDA_NP	0x07
 
-static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
-			   u32 port, u32 opcode, u32 addr, u32 *val)
+static void ping(void *info)
 {
-	u32 cmd, be = 0xf, bar = 0;
-	bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
+}
 
-	cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
-		(port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
-		(bar << IOSF_BAR_SHIFT);
+static void __vlv_punit_get(struct drm_i915_private *dev_priv)
+{
+	iosf_mbi_punit_acquire();
 
-	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
+	/*
+	 * Prevent the cpu from sleeping while we use this sideband, otherwise
+	 * the punit may cause a machine hang. The issue appears to be isolated
+	 * with changing the power state of the CPU package while changing
+	 * the power state via the punit, and we have only observed it
+	 * reliably on 4-core Baytail systems suggesting the issue is in the
+	 * power delivery mechanism and likely to be be board/function
+	 * specific. Hence we presume the workaround needs only be applied
+	 * to the Valleyview P-unit and not all sideband communications.
+	 */
+	if (IS_VALLEYVIEW(dev_priv)) {
+		pm_qos_update_request(&dev_priv->sb_qos, 0);
+		on_each_cpu(ping, NULL, 1);
+	}
+}
+
+static void __vlv_punit_put(struct drm_i915_private *dev_priv)
+{
+	if (IS_VALLEYVIEW(dev_priv))
+		pm_qos_update_request(&dev_priv->sb_qos, PM_QOS_DEFAULT_VALUE);
 
+	iosf_mbi_punit_release();
+}
+
+static int vlv_sideband_rw(struct drm_i915_private *dev_priv,
+			   u32 devfn, u32 port, u32 opcode,
+			   u32 addr, u32 *val)
+{
+	const bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
+	int err;
+
+	lockdep_assert_held(&dev_priv->sb_lock);
+
+	/* Flush the previous comms, just in case it failed last time. */
 	if (intel_wait_for_register(dev_priv,
 				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
 				    5)) {
@@ -59,22 +91,33 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
 		return -EAGAIN;
 	}
 
-	I915_WRITE(VLV_IOSF_ADDR, addr);
-	I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val);
-	I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd);
-
-	if (intel_wait_for_register(dev_priv,
-				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
-				    5)) {
+	preempt_disable();
+
+	I915_WRITE_FW(VLV_IOSF_ADDR, addr);
+	I915_WRITE_FW(VLV_IOSF_DATA, is_read ? 0 : *val);
+	I915_WRITE_FW(VLV_IOSF_DOORBELL_REQ,
+		      (devfn << IOSF_DEVFN_SHIFT) |
+		      (opcode << IOSF_OPCODE_SHIFT) |
+		      (port << IOSF_PORT_SHIFT) |
+		      (0xf << IOSF_BYTE_ENABLES_SHIFT) |
+		      (0 << IOSF_BAR_SHIFT) |
+		      IOSF_SB_BUSY);
+
+	if (__intel_wait_for_register_fw(dev_priv,
+					 VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
+					 10000, 0, NULL) == 0) {
+		if (is_read)
+			*val = I915_READ_FW(VLV_IOSF_DATA);
+		err = 0;
+	} else {
 		DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n",
 				 is_read ? "read" : "write");
-		return -ETIMEDOUT;
+		err = -ETIMEDOUT;
 	}
 
-	if (is_read)
-		*val = I915_READ(VLV_IOSF_DATA);
+	preempt_enable();
 
-	return 0;
+	return err;
 }
 
 u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
@@ -84,8 +127,12 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
 
 	mutex_lock(&dev_priv->sb_lock);
+	__vlv_punit_get(dev_priv);
+
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
+
+	__vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->sb_lock);
 
 	return val;
@@ -98,8 +145,12 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
 
 	mutex_lock(&dev_priv->sb_lock);
+	__vlv_punit_get(dev_priv);
+
 	err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			      SB_CRWRDA_NP, addr, &val);
+
+	__vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->sb_lock);
 
 	return err;
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 033/262] drm/i915: Lift acquiring the vlv punit magic to a common sb-get
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (30 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 032/262] drm/i915: Disable preemption and sleeping while using the punit sideband Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 034/262] drm/i915: Lift sideband locking for vlv_punit_(read|write) Chris Wilson
                   ` (23 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

As we now employ a very heavy pm_qos around the punit access, we want to
minimise the number of synchronous requests by performing one for the
whole punit sequence rather than around individual accesses. The
sideband lock is used for this, so push the pm_qos into the sideband
lock acquisition and release, moving it from the lowlevel punit rw
routine to the callers. In the first step, we move the punit magic into
the common sideband lock so that we can acquire a bunch of ports
simultaneously, and if need be extend the workaround protection later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  45 +++++++++-
 drivers/gpu/drm/i915/intel_cdclk.c      |   6 +-
 drivers/gpu/drm/i915/intel_display.c    |  37 ++++----
 drivers/gpu/drm/i915/intel_dp.c         |   4 +-
 drivers/gpu/drm/i915/intel_dpio_phy.c   |  37 ++++----
 drivers/gpu/drm/i915/intel_dsi.c        |   8 +-
 drivers/gpu/drm/i915/intel_dsi_pll.c    |  14 +--
 drivers/gpu/drm/i915/intel_dsi_vbt.c    |   8 +-
 drivers/gpu/drm/i915/intel_hdmi.c       |   4 +-
 drivers/gpu/drm/i915/intel_pm.c         |   4 +-
 drivers/gpu/drm/i915/intel_runtime_pm.c |   8 +-
 drivers/gpu/drm/i915/intel_sideband.c   | 115 ++++++++++++++++++++----
 12 files changed, 208 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d0a7b1deab98..2400ead8c4c9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3474,25 +3474,62 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 		      u32 reply_mask, u32 reply, int timeout_base_ms);
 
 /* intel_sideband.c */
+
+enum {
+	VLV_IOSF_SB_BUNIT,
+	VLV_IOSF_SB_CCK,
+	VLV_IOSF_SB_CCU,
+	VLV_IOSF_SB_DPIO,
+	VLV_IOSF_SB_FLISDSI,
+	VLV_IOSF_SB_GPIO,
+	VLV_IOSF_SB_NC,
+	VLV_IOSF_SB_PUNIT,
+};
+
+void vlv_iosf_sb_get(struct drm_i915_private *dev_priv, unsigned long ports);
+u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
+void vlv_iosf_sb_write(struct drm_i915_private *dev_priv,
+		       u8 port, u32 reg, u32 val);
+void vlv_iosf_sb_put(struct drm_i915_private *dev_priv, unsigned long ports);
+
+void vlv_punit_get(struct drm_i915_private *dev_priv);
 u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr);
 int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
+void vlv_punit_put(struct drm_i915_private *dev_priv);
+
+void vlv_nc_get(struct drm_i915_private *dev_priv);
 u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
-u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
-void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val);
+void vlv_nc_put(struct drm_i915_private *dev_priv);
+
+void vlv_cck_get(struct drm_i915_private *dev_priv);
 u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg);
 void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_cck_put(struct drm_i915_private *dev_priv);
+
+void vlv_ccu_get(struct drm_i915_private *dev_priv);
 u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg);
 void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_ccu_put(struct drm_i915_private *dev_priv);
+
+void vlv_bunit_get(struct drm_i915_private *dev_priv);
 u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg);
 void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_bunit_put(struct drm_i915_private *dev_priv);
+
+void vlv_dpio_get(struct drm_i915_private *dev_priv);
 u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg);
 void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val);
+void vlv_dpio_put(struct drm_i915_private *dev_priv);
+
+void vlv_flisdsi_get(struct drm_i915_private *dev_priv);
+u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
+void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_flisdsi_put(struct drm_i915_private *dev_priv);
+
 u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
 		   enum intel_sbi_destination destination);
 void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
 		     enum intel_sbi_destination destination);
-u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
 
 /* intel_dpio_phy.c */
 void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 704ddb4d3ca7..cb43fd139a0e 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -552,7 +552,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT));
 
 	if (cdclk == 400000) {
 		u32 divider;
@@ -586,7 +587,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 		val |= 3000 / 250; /* 3.0 usec */
 	vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT));
 
 	intel_update_cdclk(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 538d436db52a..e6203bf4c899 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -181,10 +181,10 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv)
 	int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 };
 
 	/* Obtain SKU information */
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) &
 		CCK_FUSE_HPLL_FREQ_MASK;
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	return vco_freq[hpll_freq] * 1000;
 }
@@ -195,9 +195,9 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
 	u32 val;
 	int divider;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, reg);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	divider = val & CCK_FREQUENCY_VALUES;
 
@@ -1121,9 +1121,9 @@ void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state)
 	u32 val;
 	bool cur_state;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	cur_state = val & DSI_PLL_VCO_EN;
 	I915_STATE_WARN(cur_state != state,
@@ -1471,14 +1471,14 @@ static void _chv_enable_pll(struct intel_crtc *crtc,
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
 	u32 tmp;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Enable back the 10bit clock to display controller */
 	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
 	tmp |= DPIO_DCLKP_EN;
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	/*
 	 * Need to wait > 100ns between dclkp clock enable bit and PLL enable.
@@ -1663,14 +1663,14 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 	I915_WRITE(DPLL(pipe), val);
 	POSTING_READ(DPLL(pipe));
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Disable 10bit clock to display controller */
 	val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
 	val &= ~DPIO_DCLKP_EN;
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
@@ -6875,7 +6875,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
 	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
 		return;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	bestn = pipe_config->dpll.n;
 	bestm1 = pipe_config->dpll.m1;
@@ -6952,7 +6952,8 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
 	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk);
 
 	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000);
-	mutex_unlock(&dev_priv->sb_lock);
+
+	vlv_dpio_put(dev_priv);
 }
 
 static void chv_prepare_pll(struct intel_crtc *crtc,
@@ -6985,7 +6986,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
 	dpio_val = 0;
 	loopfilter = 0;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* p1 and p2 divider */
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port),
@@ -7057,7 +7058,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
 			vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) |
 			DPIO_AFC_RECAL);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 /**
@@ -7659,9 +7660,9 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc,
 	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
 		return;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe));
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7;
 	clock.m2 = mdiv & DPIO_M2DIV_MASK;
@@ -7761,13 +7762,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc,
 	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
 		return;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port));
 	pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
 	pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
 	pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
 	pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port));
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
 	clock.m2 = (pll_dw0 & 0xff) << 22;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 2cc58596ff5a..94bad33db38a 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -2861,12 +2861,12 @@ static void chv_post_disable_dp(struct intel_encoder *encoder,
 
 	intel_dp_link_down(encoder, old_crtc_state);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, old_crtc_state, true);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c
index 00b3ab656b06..79c449aabc7f 100644
--- a/drivers/gpu/drm/i915/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/intel_dpio_phy.c
@@ -646,7 +646,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 	u32 val;
 	int i;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Clear calc init */
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
@@ -727,8 +727,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
 	}
 
-	mutex_unlock(&dev_priv->sb_lock);
-
+	vlv_dpio_put(dev_priv);
 }
 
 void chv_data_lane_soft_reset(struct intel_encoder *encoder,
@@ -798,7 +797,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 
 	chv_phy_powergate_lanes(encoder, true, lane_mask);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, crtc_state, true);
@@ -853,7 +852,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 		val |= CHV_CMN_USEDCLKCHANNEL;
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
@@ -868,7 +867,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	int data, i, stagger;
 	u32 val;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* allow hardware to manage TX FIFO reset source */
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch));
@@ -933,7 +932,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	/* Deassert data lane reset */
 	chv_data_lane_soft_reset(encoder, crtc_state, false);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void chv_phy_release_cl2_override(struct intel_encoder *encoder)
@@ -954,7 +953,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder,
 	enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe;
 	u32 val;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* disable left/right clock distribution */
 	if (pipe != PIPE_B) {
@@ -967,7 +966,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder,
 		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val);
 	}
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	/*
 	 * Leave the power down bit cleared for at least one
@@ -991,7 +990,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = intel_crtc->pipe;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
+
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port),
@@ -1004,7 +1004,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN);
-	mutex_unlock(&dev_priv->sb_lock);
+
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
@@ -1017,7 +1018,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	enum pipe pipe = crtc->pipe;
 
 	/* Program Tx lane resets to default */
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
+
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port),
 			 DPIO_PCS_TX_LANE2_RESET |
 			 DPIO_PCS_TX_LANE1_RESET);
@@ -1031,7 +1033,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000);
-	mutex_unlock(&dev_priv->sb_lock);
+
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
@@ -1045,7 +1048,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	enum pipe pipe = crtc->pipe;
 	u32 val;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Enable clock channels for this port */
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port));
@@ -1061,7 +1064,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_phy_reset_lanes(struct intel_encoder *encoder,
@@ -1073,8 +1076,8 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder,
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = crtc->pipe;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 51a1d6868b1e..355aa8717af2 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -278,7 +278,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs,
 
 static void band_gap_reset(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_flisdsi_get(dev_priv);
 
 	vlv_flisdsi_write(dev_priv, 0x08, 0x0001);
 	vlv_flisdsi_write(dev_priv, 0x0F, 0x0005);
@@ -287,7 +287,7 @@ static void band_gap_reset(struct drm_i915_private *dev_priv)
 	vlv_flisdsi_write(dev_priv, 0x0F, 0x0000);
 	vlv_flisdsi_write(dev_priv, 0x08, 0x0000);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_flisdsi_put(dev_priv);
 }
 
 static inline bool is_vid_mode(struct intel_dsi *intel_dsi)
@@ -509,11 +509,11 @@ static void vlv_dsi_device_ready(struct intel_encoder *encoder)
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_flisdsi_get(dev_priv);
 	/* program rcomp for compliance, reduce from 50 ohms to 45 ohms
 	 * needed everytime after power gate */
 	vlv_flisdsi_write(dev_priv, 0x04, 0x0004);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_flisdsi_put(dev_priv);
 
 	/* bandgap reset is needed after everytime we do power gate */
 	band_gap_reset(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c
index 2ff2ee7f3b78..b73336e7dcd2 100644
--- a/drivers/gpu/drm/i915/intel_dsi_pll.c
+++ b/drivers/gpu/drm/i915/intel_dsi_pll.c
@@ -149,7 +149,7 @@ static void vlv_enable_dsi_pll(struct intel_encoder *encoder,
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 
 	vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, 0);
 	vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_DIVIDER, config->dsi_pll.div);
@@ -166,11 +166,11 @@ static void vlv_enable_dsi_pll(struct intel_encoder *encoder,
 	if (wait_for(vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL) &
 						DSI_PLL_LOCK, 20)) {
 
-		mutex_unlock(&dev_priv->sb_lock);
+		vlv_cck_put(dev_priv);
 		DRM_ERROR("DSI PLL lock failed\n");
 		return;
 	}
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	DRM_DEBUG_KMS("DSI PLL locked\n");
 }
@@ -182,14 +182,14 @@ static void vlv_disable_dsi_pll(struct intel_encoder *encoder)
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 
 	tmp = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
 	tmp &= ~DSI_PLL_VCO_EN;
 	tmp |= DSI_PLL_LDO_GATE;
 	vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, tmp);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 }
 
 static bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv)
@@ -274,10 +274,10 @@ static u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp,
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	pll_ctl = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
 	pll_div = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_DIVIDER);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	config->dsi_pll.ctrl = pll_ctl & ~DSI_PLL_LOCK;
 	config->dsi_pll.div = pll_div;
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c
index 4d6ffa7b3e7b..515ab68f319c 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -234,7 +234,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv,
 	pconf0 = VLV_GPIO_PCONF0(map->base_offset);
 	padval = VLV_GPIO_PAD_VAL(map->base_offset);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 	if (!map->init) {
 		/* FIXME: remove constant below */
 		vlv_iosf_sb_write(dev_priv, port, pconf0, 0x2000CC00);
@@ -243,7 +243,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv,
 
 	tmp = 0x4 | value;
 	vlv_iosf_sb_write(dev_priv, port, padval, tmp);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 }
 
 static void chv_exec_gpio(struct drm_i915_private *dev_priv,
@@ -289,12 +289,12 @@ static void chv_exec_gpio(struct drm_i915_private *dev_priv,
 	cfg0 = CHV_GPIO_PAD_CFG0(family_num, gpio_index);
 	cfg1 = CHV_GPIO_PAD_CFG1(family_num, gpio_index);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 	vlv_iosf_sb_write(dev_priv, port, cfg1, 0);
 	vlv_iosf_sb_write(dev_priv, port, cfg0,
 			  CHV_GPIO_GPIOEN | CHV_GPIO_GPIOCFG_GPO |
 			  CHV_GPIO_GPIOTXSTATE(value));
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 }
 
 static void bxt_exec_gpio(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index ee929f31f7db..4257209c75f3 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1995,12 +1995,12 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, old_crtc_state, true);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 static void chv_hdmi_pre_enable(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b85229e153c4..877830ea7b5e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7421,9 +7421,9 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 	vlv_init_gpll_ref_freq(dev_priv);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	switch ((val >> 2) & 0x7) {
 	case 3:
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 53a6eaa9671a..78262cc5b483 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -1214,7 +1214,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
 				    1))
 		DRM_ERROR("Display PHY %d is not power up\n", phy);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Enable dynamic power down */
 	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW28);
@@ -1237,7 +1237,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
 		vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, tmp);
 	}
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(phy);
 	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
@@ -1300,9 +1300,9 @@ static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpi
 	else
 		reg = _CHV_CMN_DW6_CH1;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	val = vlv_dpio_read(dev_priv, pipe, reg);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	/*
 	 * This assumes !override is only used when the port is disabled.
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index d56eda33734e..3d7c5917b97c 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -73,6 +73,22 @@ static void __vlv_punit_put(struct drm_i915_private *dev_priv)
 	iosf_mbi_punit_release();
 }
 
+void vlv_iosf_sb_get(struct drm_i915_private *dev_priv, unsigned long ports)
+{
+	if (ports & BIT(VLV_IOSF_SB_PUNIT))
+		__vlv_punit_get(dev_priv);
+
+	mutex_lock(&dev_priv->sb_lock);
+}
+
+void vlv_iosf_sb_put(struct drm_i915_private *dev_priv, unsigned long ports)
+{
+	mutex_unlock(&dev_priv->sb_lock);
+
+	if (ports & BIT(VLV_IOSF_SB_PUNIT))
+		__vlv_punit_put(dev_priv);
+}
+
 static int vlv_sideband_rw(struct drm_i915_private *dev_priv,
 			   u32 devfn, u32 port, u32 opcode,
 			   u32 addr, u32 *val)
@@ -81,6 +97,8 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv,
 	int err;
 
 	lockdep_assert_held(&dev_priv->sb_lock);
+	if (port == IOSF_PORT_PUNIT)
+		iosf_mbi_assert_punit_acquired();
 
 	/* Flush the previous comms, just in case it failed last time. */
 	if (intel_wait_for_register(dev_priv,
@@ -124,16 +142,14 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 {
 	u32 val = 0;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	lockdep_assert_held(&dev_priv->pcu_lock);
 
-	mutex_lock(&dev_priv->sb_lock);
-	__vlv_punit_get(dev_priv);
+	vlv_punit_get(dev_priv);
 
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
 
-	__vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_punit_put(dev_priv);
 
 	return val;
 }
@@ -142,20 +158,28 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 {
 	int err;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	lockdep_assert_held(&dev_priv->pcu_lock);
 
-	mutex_lock(&dev_priv->sb_lock);
-	__vlv_punit_get(dev_priv);
+	vlv_punit_get(dev_priv);
 
 	err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			      SB_CRWRDA_NP, addr, &val);
 
-	__vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_punit_put(dev_priv);
 
 	return err;
 }
 
+void vlv_punit_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_PUNIT));
+}
+
+void vlv_punit_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_PUNIT));
+}
+
 u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
@@ -172,20 +196,38 @@ void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 			SB_CRWRDA_NP, reg, &val);
 }
 
+void vlv_bunit_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_BUNIT));
+}
+
+void vlv_bunit_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_BUNIT));
+}
+
 u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
 {
 	u32 val = 0;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_nc_get(dev_priv);
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,
 			SB_CRRDDA_NP, addr, &val);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_nc_put(dev_priv);
 
 	return val;
 }
 
+void vlv_nc_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_NC));
+}
+
+void vlv_nc_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_NC));
+}
+
 u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg)
 {
 	u32 val = 0;
@@ -215,6 +257,16 @@ void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 			SB_CRWRDA_NP, reg, &val);
 }
 
+void vlv_cck_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_CCK));
+}
+
+void vlv_cck_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_CCK));
+}
+
 u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
@@ -229,6 +281,16 @@ void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 			SB_CRWRDA_NP, reg, &val);
 }
 
+void vlv_ccu_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_CCU));
+}
+
+void vlv_ccu_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_CCU));
+}
+
 u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg)
 {
 	u32 val = 0;
@@ -252,12 +314,23 @@ void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg,
 			SB_MWR_NP, reg, &val);
 }
 
+void vlv_dpio_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_DPIO));
+}
+
+void vlv_dpio_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_DPIO));
+}
+
 /* SBI access */
 u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
 		   enum intel_sbi_destination destination)
 {
 	u32 value = 0;
-	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
+
+	lockdep_assert_held(&dev_priv->sb_lock);
 
 	if (intel_wait_for_register(dev_priv,
 				    SBI_CTL_STAT, SBI_BUSY, 0,
@@ -297,7 +370,7 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
 {
 	u32 tmp;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
+	lockdep_assert_held(&dev_priv->sb_lock);
 
 	if (intel_wait_for_register(dev_priv,
 				    SBI_CTL_STAT, SBI_BUSY, 0,
@@ -344,3 +417,13 @@ void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP,
 			reg, &val);
 }
+
+void vlv_flisdsi_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_FLISDSI));
+}
+
+void vlv_flisdsi_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_FLISDSI));
+}
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 034/262] drm/i915: Lift sideband locking for vlv_punit_(read|write)
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (31 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 033/262] drm/i915: Lift acquiring the vlv punit magic to a common sb-get Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 035/262] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview Chris Wilson
                   ` (22 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Lift the sideband acquisition for vlv_punit_read and vlv_punit_write
into their callers, so that we can lock the sideband once for a sequence
of operations, rather than perform the heavyweight acquisition on each
request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  3 ++
 drivers/gpu/drm/i915/i915_sysfs.c       | 14 ++++----
 drivers/gpu/drm/i915/intel_cdclk.c      | 24 ++++++++++---
 drivers/gpu/drm/i915/intel_display.c    | 16 +++++----
 drivers/gpu/drm/i915/intel_pm.c         | 46 ++++++++++++++++++++-----
 drivers/gpu/drm/i915/intel_runtime_pm.c |  8 +++++
 drivers/gpu/drm/i915/intel_sideband.c   | 18 ++--------
 7 files changed, 86 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 72d2238755db..9eef7e1b6008 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1088,7 +1088,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 			   yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
 				  GEN6_RP_MEDIA_SW_MODE));
 
+		vlv_punit_get(dev_priv);
 		freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+		vlv_punit_put(dev_priv);
+
 		seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
 		seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index e5e6f6bb2b05..0519e00b3720 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -258,25 +258,25 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 				    struct device_attribute *attr, char *buf)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	int ret;
+	u32 freq;
 
 	intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->pcu_lock);
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		u32 freq;
+		vlv_punit_get(dev_priv);
 		freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-		ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff);
+		vlv_punit_put(dev_priv);
+
+		freq = (freq >> 8) & 0xff;
 	} else {
-		ret = intel_gpu_freq(dev_priv,
-				     intel_get_cagf(dev_priv,
-						    I915_READ(GEN6_RPSTAT1)));
+		freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1));
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_runtime_pm_put(dev_priv);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
+	return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq));
 }
 
 static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index cb43fd139a0e..ba8d2fb02a4c 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -461,13 +461,19 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv,
 {
 	u32 val;
 
+	mutex_lock(&dev_priv->pcu_lock);
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
+
 	cdclk_state->vco = vlv_get_hpll_vco(dev_priv);
 	cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk",
 					       CCK_DISPLAY_CLOCK_CONTROL,
 					       cdclk_state->vco);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
+
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	if (IS_VALLEYVIEW(dev_priv))
@@ -540,6 +546,11 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	 */
 	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) |
+			BIT(VLV_IOSF_SB_BUNIT) |
+			BIT(VLV_IOSF_SB_PUNIT));
+
 	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 	val &= ~DSPFREQGUAR_MASK;
@@ -552,9 +563,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	vlv_iosf_sb_get(dev_priv,
-			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT));
-
 	if (cdclk == 400000) {
 		u32 divider;
 
@@ -588,7 +596,9 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val);
 
 	vlv_iosf_sb_put(dev_priv,
-			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT));
+			BIT(VLV_IOSF_SB_CCK) |
+			BIT(VLV_IOSF_SB_BUNIT) |
+			BIT(VLV_IOSF_SB_PUNIT));
 
 	intel_update_cdclk(dev_priv);
 
@@ -623,6 +633,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
+
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 	val &= ~DSPFREQGUAR_MASK_CHV;
 	val |= (cmd << DSPFREQGUAR_SHIFT_CHV);
@@ -632,6 +644,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 		     50)) {
 		DRM_ERROR("timed out waiting for CDclk change\n");
 	}
+
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e6203bf4c899..35758bfbf173 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -181,10 +181,8 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv)
 	int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 };
 
 	/* Obtain SKU information */
-	vlv_cck_get(dev_priv);
 	hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) &
 		CCK_FUSE_HPLL_FREQ_MASK;
-	vlv_cck_put(dev_priv);
 
 	return vco_freq[hpll_freq] * 1000;
 }
@@ -195,10 +193,7 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
 	u32 val;
 	int divider;
 
-	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, reg);
-	vlv_cck_put(dev_priv);
-
 	divider = val & CCK_FREQUENCY_VALUES;
 
 	WARN((val & CCK_FREQUENCY_STATUS) !=
@@ -211,11 +206,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
 int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv,
 			   const char *name, u32 reg)
 {
+	int hpll;
+
+	vlv_cck_get(dev_priv);
+
 	if (dev_priv->hpll_freq == 0)
 		dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv);
 
-	return vlv_get_cck_clock(dev_priv, name, reg,
-				 dev_priv->hpll_freq);
+	hpll = vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq);
+
+	vlv_cck_put(dev_priv);
+
+	return hpll;
 }
 
 static void intel_update_czclk(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 877830ea7b5e..02cc7a6a34d6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -311,6 +311,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 	u32 val;
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
 	if (enable)
@@ -325,6 +326,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 		      FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
 		DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
 
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 }
 
@@ -333,6 +335,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 	u32 val;
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 	if (enable)
@@ -341,6 +344,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 		val &= ~DSP_MAXFIFO_PM5_ENABLE;
 	vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
 
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 }
 
@@ -5858,6 +5862,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
 
 	if (IS_CHERRYVIEW(dev_priv)) {
 		mutex_lock(&dev_priv->pcu_lock);
+		vlv_punit_get(dev_priv);
 
 		val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 		if (val & DSP_MAXFIFO_PM5_ENABLE)
@@ -5887,6 +5892,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
 				wm->level = VLV_WM_LEVEL_DDR_DVFS;
 		}
 
+		vlv_punit_put(dev_priv);
 		mutex_unlock(&dev_priv->pcu_lock);
 	}
 
@@ -6434,7 +6440,9 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
 	if (val != dev_priv->gt_pm.rps.cur_freq) {
+		vlv_punit_get(dev_priv);
 		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+		vlv_punit_put(dev_priv);
 		if (err)
 			return err;
 
@@ -7373,6 +7381,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 	valleyview_setup_pctx(dev_priv);
 
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
 	vlv_init_gpll_ref_freq(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
@@ -7410,6 +7423,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
 			 intel_gpu_freq(dev_priv, rps->min_freq),
 			 rps->min_freq);
+
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
 }
 
 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
@@ -7419,11 +7437,14 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 	cherryview_setup_pctx(dev_priv);
 
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
 	vlv_init_gpll_ref_freq(dev_priv);
 
-	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-	vlv_cck_put(dev_priv);
 
 	switch ((val >> 2) & 0x7) {
 	case 3:
@@ -7456,6 +7477,11 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 			 intel_gpu_freq(dev_priv, rps->min_freq),
 			 rps->min_freq);
 
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
 	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
 		   rps->min_freq) & 1,
 		  "Odd GPU freq values\n");
@@ -7543,13 +7569,15 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
 		   GEN6_RP_DOWN_IDLE_AVG);
 
 	/* Setting Fixed Bias */
-	val = VLV_OVERRIDE_EN |
-		  VLV_SOC_TDP_EN |
-		  CHV_BIAS_CPU_50_SOC_50;
+	vlv_punit_get(dev_priv);
+
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
 	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
+	vlv_punit_put(dev_priv);
+
 	/* RPS code assumes GPLL is used */
 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
@@ -7626,14 +7654,16 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_CONT);
 
+	vlv_punit_get(dev_priv);
+
 	/* Setting Fixed Bias */
-	val = VLV_OVERRIDE_EN |
-		  VLV_SOC_TDP_EN |
-		  VLV_BIAS_CPU_125_SOC_875;
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
 	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
+	vlv_punit_put(dev_priv);
+
 	/* RPS code assumes GPLL is used */
 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 78262cc5b483..0ad20c1bf4f5 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -831,6 +831,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
 			 PUNIT_PWRGT_PWR_GATE(power_well_id);
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 #define COND \
 	((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state)
@@ -851,6 +852,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
 #undef COND
 
 out:
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 }
 
@@ -879,6 +881,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
 	ctrl = PUNIT_PWRGT_PWR_ON(power_well_id);
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
 	/*
@@ -897,6 +900,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
 	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask;
 	WARN_ON(ctrl != state);
 
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	return enabled;
@@ -1410,6 +1414,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
 	u32 state, ctrl;
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe);
 	/*
@@ -1426,6 +1431,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
 	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe);
 	WARN_ON(ctrl << 16 != state);
 
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	return enabled;
@@ -1442,6 +1448,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
 	state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 #define COND \
 	((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state)
@@ -1462,6 +1469,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
 #undef COND
 
 out:
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 3d7c5917b97c..dc3b491b4d00 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -144,30 +144,18 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 
 	lockdep_assert_held(&dev_priv->pcu_lock);
 
-	vlv_punit_get(dev_priv);
-
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
 
-	vlv_punit_put(dev_priv);
-
 	return val;
 }
 
 int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 {
-	int err;
-
 	lockdep_assert_held(&dev_priv->pcu_lock);
 
-	vlv_punit_get(dev_priv);
-
-	err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
-			      SB_CRWRDA_NP, addr, &val);
-
-	vlv_punit_put(dev_priv);
-
-	return err;
+	return vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
+			       SB_CRWRDA_NP, addr, &val);
 }
 
 void vlv_punit_get(struct drm_i915_private *dev_priv)
@@ -210,10 +198,8 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
 {
 	u32 val = 0;
 
-	vlv_nc_get(dev_priv);
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,
 			SB_CRRDDA_NP, addr, &val);
-	vlv_nc_put(dev_priv);
 
 	return val;
 }
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 035/262] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (32 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 034/262] drm/i915: Lift sideband locking for vlv_punit_(read|write) Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 036/262] Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3" Chris Wilson
                   ` (21 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Valleyview and Cherryview update the GPU frequency via the punit, which
is very expensive as we have to ensure the cores do not sleep during the
comms. If we perform frequent RPS evaluations, the frequent punit
requests cause measurable system overhead for little benefit, so
increase the evaluation intervals to reduce the number of times we try
and change frequency.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_pm.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 02cc7a6a34d6..d4c2b67e44de 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6342,6 +6342,19 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		break;
 	}
 
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+		/*
+		 * Baytrail and Braswell control the gpu frequency via the
+		 * punit, which is very slow and expensive to communicate with,
+		 * as we synchronously force the package to C0. If we try and
+		 * update the gpufreq too often we cause measurable system
+		 * load for little benefit (effectively stealing CPU time for
+		 * the GPU, negatively impacting overall throughput).
+		 */
+		ei_up <<= 2;
+		ei_down <<= 2;
+	}
+
 	/* When byt can survive without system hang with dynamic
 	 * sw freq adjustments, this restriction can be lifted.
 	 */
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 036/262] Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3"
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (33 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 035/262] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 037/262] drm/i915: Replace pcu_lock with sb_lock Chris Wilson
                   ` (20 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx; +Cc: Len Brown, Jani Nikula, Daniel Vetter, fritsch

With the vlv sideband fixed to avoid sleeping while we talk to the
punit, the system should be much more stable and be able to utilise the
punit without risk.

This reverts commit 6067a27d1f01 ("drm/i915: Avoid tweaking evaluation
thresholds on Baytrail v3")

References: 6067a27d1f01 ("drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: fritsch@xbmc.org
---
 drivers/gpu/drm/i915/intel_pm.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d4c2b67e44de..ac8018bffd2a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6355,12 +6355,6 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		ei_down <<= 2;
 	}
 
-	/* When byt can survive without system hang with dynamic
-	 * sw freq adjustments, this restriction can be lifted.
-	 */
-	if (IS_VALLEYVIEW(dev_priv))
-		goto skip_hw_write;
-
 	I915_WRITE(GEN6_RP_UP_EI,
 		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
 	I915_WRITE(GEN6_RP_UP_THRESHOLD,
@@ -6381,7 +6375,6 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_AVG);
 
-skip_hw_write:
 	rps->power = new_power;
 	rps->up_threshold = threshold_up;
 	rps->down_threshold = threshold_down;
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 037/262] drm/i915: Replace pcu_lock with sb_lock
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (34 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 036/262] Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3" Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 038/262] drm/i915: Separate sideband declarations to intel_sideband.h Chris Wilson
                   ` (19 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

We now have two locks for sideband access. The general one covering
sideband access across all generation, sb_lock, and a specific one
covering sideband access via the punit on vlv/chv. After lifting the
sb_lock around the punit into the callers, the pcu_lock is now redudant
and can be separated from its other use to regulate RPS (essentially
giving RPS a lock all of its own).

v2: Extract a couple of minor bug fixes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  23 +----
 drivers/gpu/drm/i915/i915_drv.h         |  10 +-
 drivers/gpu/drm/i915/i915_irq.c         |   4 +-
 drivers/gpu/drm/i915/i915_sysfs.c       |  32 +++---
 drivers/gpu/drm/i915/intel_cdclk.c      |  28 ------
 drivers/gpu/drm/i915/intel_display.c    |   6 --
 drivers/gpu/drm/i915/intel_hdcp.c       |   2 -
 drivers/gpu/drm/i915/intel_pm.c         | 127 ++++++++++++------------
 drivers/gpu/drm/i915/intel_runtime_pm.c |   8 --
 drivers/gpu/drm/i915/intel_sideband.c   |   4 -
 10 files changed, 81 insertions(+), 163 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9eef7e1b6008..3ded47279919 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1077,8 +1077,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		u32 rpmodectl, freq_sts;
 
-		mutex_lock(&dev_priv->pcu_lock);
-
 		rpmodectl = I915_READ(GEN6_RP_CONTROL);
 		seq_printf(m, "Video Turbo Mode: %s\n",
 			   yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
@@ -1113,7 +1111,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		seq_printf(m,
 			   "efficient (RPe) frequency: %d MHz\n",
 			   intel_gpu_freq(dev_priv, rps->efficient_freq));
-		mutex_unlock(&dev_priv->pcu_lock);
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		u32 rp_state_limits;
 		u32 gt_perf_status;
@@ -1527,12 +1524,9 @@ static int gen6_drpc_info(struct seq_file *m)
 		gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS);
 	}
 
-	if (INTEL_GEN(dev_priv) <= 7) {
-		mutex_lock(&dev_priv->pcu_lock);
+	if (INTEL_GEN(dev_priv) <= 7)
 		sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
 				       &rc6vids);
-		mutex_unlock(&dev_priv->pcu_lock);
-	}
 
 	seq_printf(m, "RC1e Enabled: %s\n",
 		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
@@ -1803,17 +1797,10 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	unsigned int max_gpu_freq, min_gpu_freq;
 	int gpu_freq, ia_freq;
-	int ret;
 
 	if (!HAS_LLC(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
-
-	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
-	if (ret)
-		goto out;
-
 	min_gpu_freq = rps->min_freq;
 	max_gpu_freq = rps->max_freq;
 	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
@@ -1824,6 +1811,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 
 	seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
 
+	intel_runtime_pm_get(dev_priv);
 	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
 		ia_freq = gpu_freq;
 		sandybridge_pcode_read(dev_priv,
@@ -1837,12 +1825,9 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 			   ((ia_freq >> 0) & 0xff) * 100,
 			   ((ia_freq >> 8) & 0xff) * 100);
 	}
-
-	mutex_unlock(&dev_priv->pcu_lock);
-
-out:
 	intel_runtime_pm_put(dev_priv);
-	return ret;
+
+	return 0;
 }
 
 static int i915_opregion(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2400ead8c4c9..c9cc94f308fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -741,6 +741,8 @@ struct intel_rps_ei {
 };
 
 struct intel_rps {
+	struct mutex lock;
+
 	/*
 	 * work, interrupts_enabled and pm_iir are protected by
 	 * dev_priv->irq_lock
@@ -1797,14 +1799,6 @@ struct drm_i915_private {
 	/* Cannot be determined by PCIID. You must always read a register. */
 	u32 edram_cap;
 
-	/*
-	 * Protects RPS/RC6 register access and PCU communication.
-	 * Must be taken after struct_mutex if nested. Note that
-	 * this lock may be held for long periods of time when
-	 * talking to hw - so only take it when talking to hw!
-	 */
-	struct mutex pcu_lock;
-
 	/* gen6+ GT PM state */
 	struct intel_gen6_power_mgmt gt_pm;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e6c4007ecfd4..fe93e6731062 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1265,7 +1265,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
 		goto out;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 
 	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
 
@@ -1319,7 +1319,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		rps->last_adj = 0;
 	}
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 
 out:
 	/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 0519e00b3720..c98375ba79b4 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -262,7 +262,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 
 	intel_runtime_pm_get(dev_priv);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		vlv_punit_get(dev_priv);
 		freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
@@ -272,7 +271,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 	} else {
 		freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1));
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_runtime_pm_put(dev_priv);
 
@@ -317,12 +315,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 	if (val < rps->min_freq || val > rps->max_freq)
 		return -EINVAL;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 	if (val != rps->boost_freq) {
 		rps->boost_freq = val;
 		boost = atomic_read(&rps->num_waiters);
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 	if (boost)
 		schedule_work(&rps->work);
 
@@ -362,17 +360,14 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 		return ret;
 
 	intel_runtime_pm_get(dev_priv);
-
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 
 	val = intel_freq_opcode(dev_priv, val);
-
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto unlock;
 	}
 
 	if (val > rps->rp0_freq)
@@ -390,8 +385,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	 * frequency request may be unchanged. */
 	ret = intel_set_rps(dev_priv, val);
 
-	mutex_unlock(&dev_priv->pcu_lock);
-
+unlock:
+	mutex_unlock(&rps->lock);
 	intel_runtime_pm_put(dev_priv);
 
 	return ret ?: count;
@@ -420,17 +415,14 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 		return ret;
 
 	intel_runtime_pm_get(dev_priv);
-
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 
 	val = intel_freq_opcode(dev_priv, val);
-
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto unlock;
 	}
 
 	rps->min_freq_softlimit = val;
@@ -444,8 +436,8 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	 * frequency request may be unchanged. */
 	ret = intel_set_rps(dev_priv, val);
 
-	mutex_unlock(&dev_priv->pcu_lock);
-
+unlock:
+	mutex_unlock(&rps->lock);
 	intel_runtime_pm_put(dev_priv);
 
 	return ret ?: count;
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index ba8d2fb02a4c..67f5a17e1331 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -461,7 +461,6 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv,
 {
 	u32 val;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_iosf_sb_get(dev_priv,
 			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
 
@@ -474,7 +473,6 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv,
 
 	vlv_iosf_sb_put(dev_priv,
 			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	if (IS_VALLEYVIEW(dev_priv))
 		cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK) >>
@@ -551,7 +549,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 			BIT(VLV_IOSF_SB_BUNIT) |
 			BIT(VLV_IOSF_SB_PUNIT));
 
-	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 	val &= ~DSPFREQGUAR_MASK;
 	val |= (cmd << DSPFREQGUAR_SHIFT);
@@ -561,7 +558,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 		     50)) {
 		DRM_ERROR("timed out waiting for CDclk change\n");
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	if (cdclk == 400000) {
 		u32 divider;
@@ -632,7 +628,6 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	 */
 	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -646,7 +641,6 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	}
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 
@@ -724,10 +718,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv,
 		 "trying to change cdclk frequency with cdclk not enabled\n"))
 		return;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = sandybridge_pcode_write(dev_priv,
 				      BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("failed to inform pcode about cdclk change\n");
 		return;
@@ -776,10 +768,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv,
 			LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
 		DRM_ERROR("Switching back to LCPLL failed\n");
 
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1);
 
@@ -1007,12 +997,10 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
 	u32 freq_select, cdclk_ctl;
 	int ret;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				SKL_CDCLK_PREPARE_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
 			  ret);
@@ -1076,10 +1064,8 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
 	POSTING_READ(CDCLK_CTL);
 
 	/* inform PCU of the change */
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 }
@@ -1391,12 +1377,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 	 * requires us to wait up to 150usec, but that leads to timeouts;
 	 * the 2ms used here is based on experiment.
 	 */
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = sandybridge_pcode_write_timeout(dev_priv,
 					      HSW_PCODE_DE_WRITE_FREQ_REQ,
 					      0x80000000, 150, 2);
-	mutex_unlock(&dev_priv->pcu_lock);
-
 	if (ret) {
 		DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n",
 			  ret, cdclk);
@@ -1424,7 +1407,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 		val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
 	I915_WRITE(CDCLK_CTL, val);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	/*
 	 * The timeout isn't specified, the 2ms used here is based on
 	 * experiment.
@@ -1434,8 +1416,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 	ret = sandybridge_pcode_write_timeout(dev_priv,
 					      HSW_PCODE_DE_WRITE_FREQ_REQ,
 					      cdclk_state->voltage_level, 150, 2);
-	mutex_unlock(&dev_priv->pcu_lock);
-
 	if (ret) {
 		DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n",
 			  ret, cdclk);
@@ -1673,12 +1653,10 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
 	u32 val, divider;
 	int ret;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				SKL_CDCLK_PREPARE_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
 			  ret);
@@ -1715,10 +1693,8 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
 	I915_WRITE(CDCLK_CTL, val);
 
 	/* inform PCU of the change */
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 
@@ -1854,12 +1830,10 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv,
 	unsigned int vco = cdclk_state->vco;
 	int ret;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				SKL_CDCLK_PREPARE_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
 			  ret);
@@ -1876,10 +1850,8 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv,
 	I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE |
 			      skl_cdclk_decimal(cdclk));
 
-	mutex_lock(&dev_priv->pcu_lock);
 	/* TODO: add proper DVFS support. */
 	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, 2);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 35758bfbf173..7d88da0917d7 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -5033,10 +5033,8 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state)
 	WARN_ON(!(crtc_state->active_planes & ~BIT(PLANE_CURSOR)));
 
 	if (IS_BROADWELL(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL,
 						IPS_ENABLE | IPS_PCODE_CONTROL));
-		mutex_unlock(&dev_priv->pcu_lock);
 		/* Quoting Art Runyan: "its not safe to expect any particular
 		 * value in IPS_CTL bit 31 after enabling IPS through the
 		 * mailbox." Moreover, the mailbox may return a bogus state,
@@ -5066,9 +5064,7 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state)
 		return;
 
 	if (IS_BROADWELL(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0));
-		mutex_unlock(&dev_priv->pcu_lock);
 		/* wait for pcode to finish disabling IPS, which may take up to 42ms */
 		if (intel_wait_for_register(dev_priv,
 					    IPS_CTL, IPS_ENABLE, 0,
@@ -8973,11 +8969,9 @@ static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv)
 static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val)
 {
 	if (IS_HASWELL(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP,
 					    val))
 			DRM_DEBUG_KMS("Failed to write to D_COMP\n");
-		mutex_unlock(&dev_priv->pcu_lock);
 	} else {
 		I915_WRITE(D_COMP_BDW, val);
 		POSTING_READ(D_COMP_BDW);
diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c
index 2db5da550a1c..6fa39d4521ca 100644
--- a/drivers/gpu/drm/i915/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/intel_hdcp.c
@@ -105,10 +105,8 @@ static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv)
 	 * differ in the key load trigger process from other platforms.
 	 */
 	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		ret = sandybridge_pcode_write(dev_priv,
 					      SKL_PCODE_LOAD_HDCP_KEYS, 1);
-		mutex_unlock(&dev_priv->pcu_lock);
 		if (ret) {
 			DRM_ERROR("Failed to initiate HDCP key load (%d)\n",
 			          ret);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ac8018bffd2a..a128e2846f8f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -310,7 +310,6 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 {
 	u32 val;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
@@ -327,14 +326,12 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 		DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 }
 
 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 {
 	u32 val;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -345,7 +342,6 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 	vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 }
 
 #define FW_WM(value, plane) \
@@ -2810,11 +2806,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 
 		/* read the first set of memory latencies[0:3] */
 		val = 0; /* data0 to be programmed to 0 for first set */
-		mutex_lock(&dev_priv->pcu_lock);
 		ret = sandybridge_pcode_read(dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
 					     &val);
-		mutex_unlock(&dev_priv->pcu_lock);
 
 		if (ret) {
 			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
@@ -2831,11 +2825,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 
 		/* read the second set of memory latencies[4:7] */
 		val = 1; /* data0 to be programmed to 1 for second set */
-		mutex_lock(&dev_priv->pcu_lock);
 		ret = sandybridge_pcode_read(dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
 					     &val);
-		mutex_unlock(&dev_priv->pcu_lock);
 		if (ret) {
 			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
 			return;
@@ -3639,13 +3631,10 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
 		return 0;
 
 	DRM_DEBUG_KMS("Enabling the SAGV\n");
-	mutex_lock(&dev_priv->pcu_lock);
-
 	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				      GEN9_SAGV_ENABLE);
 
 	/* We don't need to wait for the SAGV when enabling */
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	/*
 	 * Some skl systems, pre-release machines in particular,
@@ -3676,15 +3665,11 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 		return 0;
 
 	DRM_DEBUG_KMS("Disabling the SAGV\n");
-	mutex_lock(&dev_priv->pcu_lock);
-
 	/* bspec says to keep retrying for at least 1 ms */
 	ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				GEN9_SAGV_DISABLE,
 				GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
 				1);
-	mutex_unlock(&dev_priv->pcu_lock);
-
 	/*
 	 * Some skl systems, pre-release machines in particular,
 	 * don't actually have an SAGV.
@@ -5861,7 +5846,6 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
 	wm->level = VLV_WM_LEVEL_PM2;
 
 	if (IS_CHERRYVIEW(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		vlv_punit_get(dev_priv);
 
 		val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -5893,7 +5877,6 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
 		}
 
 		vlv_punit_put(dev_priv);
-		mutex_unlock(&dev_priv->pcu_lock);
 	}
 
 	for_each_intel_crtc(dev, crtc) {
@@ -6501,7 +6484,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 	if (rps->enabled) {
 		u8 freq;
 
@@ -6524,7 +6507,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 					rps->max_freq_softlimit)))
 			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -6538,7 +6521,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 	 */
 	gen6_disable_rps_interrupts(dev_priv);
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 	if (rps->enabled) {
 		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 			vlv_set_rps_idle(dev_priv);
@@ -6548,7 +6531,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 }
 
 void gen6_rps_boost(struct i915_request *rq,
@@ -6589,7 +6572,7 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	int err;
 
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&rps->lock);
 	GEM_BUG_ON(val > rps->max_freq);
 	GEM_BUG_ON(val < rps->min_freq);
 
@@ -7088,7 +7071,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
 	unsigned int max_gpu_freq, min_gpu_freq;
 	struct cpufreq_policy *policy;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	lockdep_assert_held(&rps->lock);
 
 	if (rps->max_freq <= rps->min_freq)
 		return;
@@ -8167,7 +8150,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 		intel_runtime_pm_get(dev_priv);
 	}
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 
 	/* Initialize RPS limits (for userspace) */
 	if (IS_CHERRYVIEW(dev_priv))
@@ -8207,7 +8190,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	/* Finally allow us to boost to max by default */
 	rps->boost_freq = rps->max_freq;
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 }
 
 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
@@ -8249,7 +8232,7 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
 
 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
 {
-	lockdep_assert_held(&i915->pcu_lock);
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
 	if (!i915->gt_pm.llc_pstate.enabled)
 		return;
@@ -8261,7 +8244,7 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
 
 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
 	if (!dev_priv->gt_pm.rc6.enabled)
 		return;
@@ -8280,7 +8263,7 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv)
 
 static void intel_disable_rps(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
 	if (!dev_priv->gt_pm.rps.enabled)
 		return;
@@ -8301,19 +8284,19 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv)
 
 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
 
 	intel_disable_rc6(dev_priv);
 	intel_disable_rps(dev_priv);
 	if (HAS_LLC(dev_priv))
 		intel_disable_llc_pstate(dev_priv);
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
 {
-	lockdep_assert_held(&i915->pcu_lock);
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
 	if (i915->gt_pm.llc_pstate.enabled)
 		return;
@@ -8325,7 +8308,7 @@ static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
 
 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
 	if (dev_priv->gt_pm.rc6.enabled)
 		return;
@@ -8348,7 +8331,7 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&rps->lock);
 
 	if (rps->enabled)
 		return;
@@ -8383,7 +8366,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
 	if (intel_vgpu_active(dev_priv))
 		return;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
 
 	if (HAS_RC6(dev_priv))
 		intel_enable_rc6(dev_priv);
@@ -8391,7 +8374,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
 	if (HAS_LLC(dev_priv))
 		intel_enable_llc_pstate(dev_priv);
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -9405,22 +9388,19 @@ static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
 	}
 }
 
-int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
+static int __sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
 {
 	int status;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	lockdep_assert_held(&dev_priv->sb_lock);
 
 	/* GEN6_PCODE_* are outside of the forcewake domain, we can
 	 * use te fw I915_READ variants to reduce the amount of work
 	 * required when reading/writing.
 	 */
 
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
-		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
-				 mbox, __builtin_return_address(0));
+	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
 		return -EAGAIN;
-	}
 
 	I915_WRITE_FW(GEN6_PCODE_DATA, *val);
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
@@ -9428,11 +9408,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
 
 	if (__intel_wait_for_register_fw(dev_priv,
 					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 500, 0, NULL)) {
-		DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
-			  mbox, __builtin_return_address(0));
+					 500, 0, NULL))
 		return -ETIMEDOUT;
-	}
 
 	*val = I915_READ_FW(GEN6_PCODE_DATA);
 	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
@@ -9442,33 +9419,39 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
 	else
 		status = gen6_check_mailbox_status(dev_priv);
 
+	return status;
+}
+
+int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
+{
+	int status;
+
+	mutex_lock(&dev_priv->sb_lock);
+	status = __sandybridge_pcode_read(dev_priv, mbox, val);
+	mutex_unlock(&dev_priv->sb_lock);
+
 	if (status) {
 		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
 				 mbox, __builtin_return_address(0), status);
-		return status;
 	}
 
-	return 0;
+	return status;
 }
 
-int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
-				    u32 mbox, u32 val,
-				    int fast_timeout_us, int slow_timeout_ms)
+static int __sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+					     u32 mbox, u32 val,
+					     int fast_timeout_us,
+					     int slow_timeout_ms)
 {
 	int status;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
 	/* GEN6_PCODE_* are outside of the forcewake domain, we can
 	 * use te fw I915_READ variants to reduce the amount of work
 	 * required when reading/writing.
 	 */
 
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
-		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
-				 val, mbox, __builtin_return_address(0));
+	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
 		return -EAGAIN;
-	}
 
 	I915_WRITE_FW(GEN6_PCODE_DATA, val);
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
@@ -9477,11 +9460,8 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
 	if (__intel_wait_for_register_fw(dev_priv,
 					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
 					 fast_timeout_us, slow_timeout_ms,
-					 NULL)) {
-		DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
-			  val, mbox, __builtin_return_address(0));
+					 NULL))
 		return -ETIMEDOUT;
-	}
 
 	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
@@ -9490,13 +9470,28 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
 	else
 		status = gen6_check_mailbox_status(dev_priv);
 
+	return status;
+}
+
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+				    u32 mbox, u32 val,
+				    int fast_timeout_us,
+				    int slow_timeout_ms)
+{
+	int status;
+
+	mutex_lock(&dev_priv->sb_lock);
+	status = __sandybridge_pcode_write_timeout(dev_priv, mbox, val,
+						   fast_timeout_us,
+						   slow_timeout_ms);
+	mutex_unlock(&dev_priv->sb_lock);
+
 	if (status) {
 		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
 				 val, mbox, __builtin_return_address(0), status);
-		return status;
 	}
 
-	return 0;
+	return status;
 }
 
 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
@@ -9505,7 +9500,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
 {
 	u32 val = request;
 
-	*status = sandybridge_pcode_read(dev_priv, mbox, &val);
+	*status = __sandybridge_pcode_read(dev_priv, mbox, &val);
 
 	return *status || ((val & reply_mask) == reply);
 }
@@ -9535,7 +9530,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 	u32 status;
 	int ret;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	mutex_lock(&dev_priv->sb_lock);
 
 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
 				   &status)
@@ -9571,6 +9566,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 	preempt_enable();
 
 out:
+	mutex_unlock(&dev_priv->sb_lock);
 	return ret ? ret : status;
 #undef COND
 }
@@ -9640,8 +9636,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 
 void intel_pm_setup(struct drm_i915_private *dev_priv)
 {
-	mutex_init(&dev_priv->pcu_lock);
-
+	mutex_init(&dev_priv->gt_pm.rps.lock);
 	atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
 
 	dev_priv->runtime_pm.suspended = false;
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 0ad20c1bf4f5..b1a7ecb6ab57 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -830,7 +830,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
 	state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) :
 			 PUNIT_PWRGT_PWR_GATE(power_well_id);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 #define COND \
@@ -853,7 +852,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
 
 out:
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 }
 
 static void vlv_power_well_enable(struct drm_i915_private *dev_priv,
@@ -880,7 +878,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
 	mask = PUNIT_PWRGT_MASK(power_well_id);
 	ctrl = PUNIT_PWRGT_PWR_ON(power_well_id);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
@@ -901,7 +898,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
 	WARN_ON(ctrl != state);
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	return enabled;
 }
@@ -1413,7 +1409,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
 	bool enabled;
 	u32 state, ctrl;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe);
@@ -1432,7 +1427,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
 	WARN_ON(ctrl << 16 != state);
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	return enabled;
 }
@@ -1447,7 +1441,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
 
 	state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 #define COND \
@@ -1470,7 +1463,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
 
 out:
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 }
 
 static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index dc3b491b4d00..2d4e48e9e1d5 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -142,8 +142,6 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 {
 	u32 val = 0;
 
-	lockdep_assert_held(&dev_priv->pcu_lock);
-
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
 
@@ -152,8 +150,6 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 
 int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
-
 	return vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			       SB_CRWRDA_NP, addr, &val);
 }
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 038/262] drm/i915: Separate sideband declarations to intel_sideband.h
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (35 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 037/262] drm/i915: Replace pcu_lock with sb_lock Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 039/262] drm/i915: Merge sbi read/write into a single accessor Chris Wilson
                   ` (18 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Split the sideback declarations out of the ginormous i915_drv.h

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  2 +
 drivers/gpu/drm/i915/i915_drv.h         | 63 ---------------------
 drivers/gpu/drm/i915/i915_sysfs.c       |  2 +
 drivers/gpu/drm/i915/intel_cdclk.c      |  1 +
 drivers/gpu/drm/i915/intel_display.c    | 19 ++++---
 drivers/gpu/drm/i915/intel_dp.c         |  6 +-
 drivers/gpu/drm/i915/intel_dpio_phy.c   |  1 +
 drivers/gpu/drm/i915/intel_dsi.c        |  7 ++-
 drivers/gpu/drm/i915/intel_dsi_pll.c    |  4 +-
 drivers/gpu/drm/i915/intel_dsi_vbt.c    | 11 +++-
 drivers/gpu/drm/i915/intel_hdmi.c       |  5 +-
 drivers/gpu/drm/i915/intel_pm.c         |  7 ++-
 drivers/gpu/drm/i915/intel_runtime_pm.c |  1 +
 drivers/gpu/drm/i915/intel_sideband.c   |  2 +
 drivers/gpu/drm/i915/intel_sideband.h   | 73 +++++++++++++++++++++++++
 15 files changed, 123 insertions(+), 81 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_sideband.h

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3ded47279919..736b0da204e9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -29,8 +29,10 @@
 #include <linux/debugfs.h>
 #include <linux/sort.h>
 #include <linux/sched/mm.h>
+
 #include "intel_drv.h"
 #include "intel_guc_submission.h"
+#include "intel_sideband.h"
 
 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
 {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9cc94f308fb..f81ee2151488 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -635,11 +635,6 @@ enum intel_pch {
 	PCH_NOP,
 };
 
-enum intel_sbi_destination {
-	SBI_ICLK,
-	SBI_MPHY,
-};
-
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
 #define QUIRK_BACKLIGHT_PRESENT (1<<3)
@@ -3467,64 +3462,6 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 		      u32 reply_mask, u32 reply, int timeout_base_ms);
 
-/* intel_sideband.c */
-
-enum {
-	VLV_IOSF_SB_BUNIT,
-	VLV_IOSF_SB_CCK,
-	VLV_IOSF_SB_CCU,
-	VLV_IOSF_SB_DPIO,
-	VLV_IOSF_SB_FLISDSI,
-	VLV_IOSF_SB_GPIO,
-	VLV_IOSF_SB_NC,
-	VLV_IOSF_SB_PUNIT,
-};
-
-void vlv_iosf_sb_get(struct drm_i915_private *dev_priv, unsigned long ports);
-u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
-void vlv_iosf_sb_write(struct drm_i915_private *dev_priv,
-		       u8 port, u32 reg, u32 val);
-void vlv_iosf_sb_put(struct drm_i915_private *dev_priv, unsigned long ports);
-
-void vlv_punit_get(struct drm_i915_private *dev_priv);
-u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr);
-int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
-void vlv_punit_put(struct drm_i915_private *dev_priv);
-
-void vlv_nc_get(struct drm_i915_private *dev_priv);
-u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
-void vlv_nc_put(struct drm_i915_private *dev_priv);
-
-void vlv_cck_get(struct drm_i915_private *dev_priv);
-u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-void vlv_cck_put(struct drm_i915_private *dev_priv);
-
-void vlv_ccu_get(struct drm_i915_private *dev_priv);
-u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-void vlv_ccu_put(struct drm_i915_private *dev_priv);
-
-void vlv_bunit_get(struct drm_i915_private *dev_priv);
-u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-void vlv_bunit_put(struct drm_i915_private *dev_priv);
-
-void vlv_dpio_get(struct drm_i915_private *dev_priv);
-u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg);
-void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val);
-void vlv_dpio_put(struct drm_i915_private *dev_priv);
-
-void vlv_flisdsi_get(struct drm_i915_private *dev_priv);
-u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-void vlv_flisdsi_put(struct drm_i915_private *dev_priv);
-
-u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
-		   enum intel_sbi_destination destination);
-void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
-		     enum intel_sbi_destination destination);
-
 /* intel_dpio_phy.c */
 void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
 			     enum dpio_phy *phy, enum dpio_channel *ch);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index c98375ba79b4..55554697133b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -29,7 +29,9 @@
 #include <linux/module.h>
 #include <linux/stat.h>
 #include <linux/sysfs.h>
+
 #include "intel_drv.h"
+#include "intel_sideband.h"
 #include "i915_drv.h"
 
 static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 67f5a17e1331..6aa8df1ee50d 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -22,6 +22,7 @@
  */
 
 #include "intel_drv.h"
+#include "intel_sideband.h"
 
 /**
  * DOC: CDCLK / RAWCLK
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 7d88da0917d7..fa2d4ea13a3a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -31,23 +31,26 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/vgaarb.h>
+#include <linux/dma_remapping.h>
+#include <linux/reservation.h>
+
 #include <drm/drm_edid.h>
 #include <drm/drmP.h>
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "intel_dsi.h"
-#include "i915_trace.h"
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_rect.h>
-#include <linux/dma_remapping.h>
-#include <linux/reservation.h>
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_trace.h"
+#include "intel_dsi.h"
+#include "intel_drv.h"
+#include "intel_frontbuffer.h"
+#include "intel_sideband.h"
 
 /* Primary plane formats for gen <= 3 */
 static const uint32_t i8xx_primary_formats[] = {
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 94bad33db38a..84f91f8ab20e 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -31,7 +31,9 @@
 #include <linux/types.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
+
 #include <asm/byteorder.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
@@ -39,9 +41,11 @@
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_hdcp.h>
-#include "intel_drv.h"
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_sideband.h"
 
 #define DP_DPRX_ESI_LEN 14
 
diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c
index 79c449aabc7f..f2f2a953a2d3 100644
--- a/drivers/gpu/drm/i915/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/intel_dpio_phy.c
@@ -22,6 +22,7 @@
  */
 
 #include "intel_drv.h"
+#include "intel_sideband.h"
 
 /**
  * DOC: DPIO
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 355aa8717af2..626c6791d018 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -23,17 +23,20 @@
  * Author: Jani Nikula <jani.nikula@intel.com>
  */
 
+#include <linux/slab.h>
+#include <linux/gpio/consumer.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/i915_drm.h>
 #include <drm/drm_mipi_dsi.h>
-#include <linux/slab.h>
-#include <linux/gpio/consumer.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_sideband.h"
 
 /* return pixels in terms of txbyteclkhs */
 static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count,
diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c
index b73336e7dcd2..ebb3dba75d06 100644
--- a/drivers/gpu/drm/i915/intel_dsi_pll.c
+++ b/drivers/gpu/drm/i915/intel_dsi_pll.c
@@ -26,9 +26,11 @@
  */
 
 #include <linux/kernel.h>
-#include "intel_drv.h"
+
 #include "i915_drv.h"
+#include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_sideband.h"
 
 static const u16 lfsr_converts[] = {
 	426, 469, 234, 373, 442, 221, 110, 311, 411,		/* 62 - 70 */
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c
index 515ab68f319c..a97dcd653696 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -24,18 +24,23 @@
  *
  */
 
+#include <linux/gpio/consumer.h>
+#include <linux/slab.h>
+
+#include <asm/intel-mid.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/i915_drm.h>
-#include <linux/gpio/consumer.h>
-#include <linux/slab.h>
+
 #include <video/mipi_display.h>
-#include <asm/intel-mid.h>
 #include <video/mipi_display.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_sideband.h"
 
 #define MIPI_TRANSFER_MODE_SHIFT	0
 #define MIPI_VIRTUAL_CHANNEL_SHIFT	1
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 4257209c75f3..0a3ecebdbebd 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -30,16 +30,19 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/hdmi.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_hdcp.h>
 #include <drm/drm_scdc_helper.h>
-#include "intel_drv.h"
 #include <drm/i915_drm.h>
 #include <drm/intel_lpe_audio.h>
+
 #include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_sideband.h"
 
 static struct drm_device *intel_hdmi_to_dev(struct intel_hdmi *intel_hdmi)
 {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a128e2846f8f..3b8b4b43dc49 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -26,12 +26,15 @@
  */
 
 #include <linux/cpufreq.h>
+#include <linux/module.h>
+
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
+#include "intel_sideband.h"
 #include "../../../platform/x86/intel_ips.h"
-#include <linux/module.h>
-#include <drm/drm_atomic_helper.h>
 
 /**
  * DOC: RC6
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index b1a7ecb6ab57..1793a3273682 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -31,6 +31,7 @@
 
 #include "i915_drv.h"
 #include "intel_drv.h"
+#include "intel_sideband.h"
 
 /**
  * DOC: runtime pm
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 2d4e48e9e1d5..87e34787939b 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -24,6 +24,8 @@
 
 #include <asm/iosf_mbi.h>
 
+#include "intel_sideband.h"
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h
new file mode 100644
index 000000000000..7204648eb9d4
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_sideband.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef _INTEL_SIDEBAND_H_
+#define _INTEL_SIDEBAND_H_
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+enum pipe;
+
+enum {
+	VLV_IOSF_SB_BUNIT,
+	VLV_IOSF_SB_CCK,
+	VLV_IOSF_SB_CCU,
+	VLV_IOSF_SB_DPIO,
+	VLV_IOSF_SB_FLISDSI,
+	VLV_IOSF_SB_GPIO,
+	VLV_IOSF_SB_NC,
+	VLV_IOSF_SB_PUNIT,
+};
+
+void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports);
+u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg);
+void vlv_iosf_sb_write(struct drm_i915_private *i915,
+		       u8 port, u32 reg, u32 val);
+void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports);
+
+void vlv_punit_get(struct drm_i915_private *i915);
+u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr);
+int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val);
+void vlv_punit_put(struct drm_i915_private *i915);
+
+void vlv_nc_get(struct drm_i915_private *i915);
+u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr);
+void vlv_nc_put(struct drm_i915_private *i915);
+
+void vlv_cck_get(struct drm_i915_private *i915);
+u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg);
+void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val);
+void vlv_cck_put(struct drm_i915_private *i915);
+
+void vlv_ccu_get(struct drm_i915_private *i915);
+u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg);
+void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val);
+void vlv_ccu_put(struct drm_i915_private *i915);
+
+void vlv_bunit_get(struct drm_i915_private *i915);
+u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg);
+void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val);
+void vlv_bunit_put(struct drm_i915_private *i915);
+
+void vlv_dpio_get(struct drm_i915_private *i915);
+u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg);
+void vlv_dpio_write(struct drm_i915_private *i915,
+		    enum pipe pipe, int reg, u32 val);
+void vlv_dpio_put(struct drm_i915_private *i915);
+
+void vlv_flisdsi_get(struct drm_i915_private *i915);
+u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg);
+void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val);
+void vlv_flisdsi_put(struct drm_i915_private *i915);
+
+enum intel_sbi_destination {
+	SBI_ICLK,
+	SBI_MPHY,
+};
+
+u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg,
+		   enum intel_sbi_destination destination);
+void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value,
+		     enum intel_sbi_destination destination);
+
+#endif /* _INTEL_SIDEBAND_H */
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 039/262] drm/i915: Merge sbi read/write into a single accessor
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (36 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 038/262] drm/i915: Separate sideband declarations to intel_sideband.h Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 040/262] drm/i915: Merge sandybridge_pcode_(read|write) Chris Wilson
                   ` (17 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Since intel_sideband_read and intel_sideband_write differ by only a
couple of lines (depending on whether we feed the value in or out),
merge the two into a single common accessor.

v2: Restore vlv_flisdsi_read() lost during rebasing.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_sideband.c | 91 +++++++++++----------------
 1 file changed, 37 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 87e34787939b..427c8c36f0c5 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -309,90 +309,73 @@ void vlv_dpio_put(struct drm_i915_private *dev_priv)
 }
 
 /* SBI access */
-u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
-		   enum intel_sbi_destination destination)
+static int intel_sbi_rw(struct drm_i915_private *dev_priv, u16 reg,
+			enum intel_sbi_destination destination,
+			u32 *val, bool is_read)
 {
-	u32 value = 0;
+	u32 cmd;
 
 	lockdep_assert_held(&dev_priv->sb_lock);
 
-	if (intel_wait_for_register(dev_priv,
-				    SBI_CTL_STAT, SBI_BUSY, 0,
-				    100)) {
+	if (intel_wait_for_register_fw(dev_priv,
+				       SBI_CTL_STAT, SBI_BUSY, 0,
+				       100)) {
 		DRM_ERROR("timeout waiting for SBI to become ready\n");
-		return 0;
+		return -EBUSY;
 	}
 
-	I915_WRITE(SBI_ADDR, (reg << 16));
-	I915_WRITE(SBI_DATA, 0);
+	I915_WRITE_FW(SBI_ADDR, (u32)reg << 16);
+	I915_WRITE_FW(SBI_DATA, is_read ? 0 : *val);
 
 	if (destination == SBI_ICLK)
-		value = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD;
+		cmd = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD;
 	else
-		value = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IORD;
-	I915_WRITE(SBI_CTL_STAT, value | SBI_BUSY);
+		cmd = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IORD;
+	if (!is_read)
+		cmd |= BIT(8);
+	I915_WRITE_FW(SBI_CTL_STAT, cmd | SBI_BUSY);
 
-	if (intel_wait_for_register(dev_priv,
-				    SBI_CTL_STAT,
-				    SBI_BUSY,
-				    0,
-				    100)) {
+	if (__intel_wait_for_register_fw(dev_priv,
+					 SBI_CTL_STAT, SBI_BUSY, 0,
+					 100, 100, &cmd)) {
 		DRM_ERROR("timeout waiting for SBI to complete read\n");
-		return 0;
+		return -ETIMEDOUT;
 	}
 
-	if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) {
+	if (cmd & SBI_RESPONSE_FAIL) {
 		DRM_ERROR("error during SBI read of reg %x\n", reg);
-		return 0;
+		return -ENXIO;
 	}
 
-	return I915_READ(SBI_DATA);
+	if (is_read)
+		*val = I915_READ_FW(SBI_DATA);
+
+	return 0;
 }
 
-void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
-		     enum intel_sbi_destination destination)
+u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
+		   enum intel_sbi_destination destination)
 {
-	u32 tmp;
-
-	lockdep_assert_held(&dev_priv->sb_lock);
-
-	if (intel_wait_for_register(dev_priv,
-				    SBI_CTL_STAT, SBI_BUSY, 0,
-				    100)) {
-		DRM_ERROR("timeout waiting for SBI to become ready\n");
-		return;
-	}
+	u32 result = 0;
 
-	I915_WRITE(SBI_ADDR, (reg << 16));
-	I915_WRITE(SBI_DATA, value);
+	intel_sbi_rw(dev_priv, reg, destination, &result, true);
 
-	if (destination == SBI_ICLK)
-		tmp = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRWR;
-	else
-		tmp = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IOWR;
-	I915_WRITE(SBI_CTL_STAT, SBI_BUSY | tmp);
-
-	if (intel_wait_for_register(dev_priv,
-				    SBI_CTL_STAT,
-				    SBI_BUSY,
-				    0,
-				    100)) {
-		DRM_ERROR("timeout waiting for SBI to complete write\n");
-		return;
-	}
+	return result;
+}
 
-	if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) {
-		DRM_ERROR("error during SBI write of %x to reg %x\n",
-			  value, reg);
-		return;
-	}
+void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
+		     enum intel_sbi_destination destination)
+{
+	intel_sbi_rw(dev_priv, reg, destination, &value, false);
 }
 
 u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
+
 	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP,
 			reg, &val);
+
 	return val;
 }
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 040/262] drm/i915: Merge sandybridge_pcode_(read|write)
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (37 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 039/262] drm/i915: Merge sbi read/write into a single accessor Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 041/262] drm/i915: Move sandybride pcode access to intel_sideband.c Chris Wilson
                   ` (16 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

These routines are identical except in the nature of the value parameter.
For writes it is a pure in-param, but for a read, we need an out-param.
Since they differ in a single line, merge the two routines into one.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 114 +++++++++++---------------------
 1 file changed, 40 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 3b8b4b43dc49..cfd5ac1876a2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9346,12 +9346,10 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 	}
 }
 
-static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
+static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv,
+					    u32 mbox)
 {
-	uint32_t flags =
-		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
-
-	switch (flags) {
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
 	case GEN6_PCODE_SUCCESS:
 		return 0;
 	case GEN6_PCODE_UNIMPLEMENTED_CMD:
@@ -9364,17 +9362,15 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
 	case GEN6_PCODE_TIMEOUT:
 		return -ETIMEDOUT;
 	default:
-		MISSING_CASE(flags);
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
 		return 0;
 	}
 }
 
-static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
+static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv,
+					    u32 mbox)
 {
-	uint32_t flags =
-		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
-
-	switch (flags) {
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
 	case GEN6_PCODE_SUCCESS:
 		return 0;
 	case GEN6_PCODE_ILLEGAL_CMD:
@@ -9386,18 +9382,21 @@ static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
 	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
 		return -EOVERFLOW;
 	default:
-		MISSING_CASE(flags);
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
 		return 0;
 	}
 }
 
-static int __sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
+static int __sandybridge_pcode_rw(struct drm_i915_private *dev_priv,
+				  u32 mbox, u32 *val,
+				  int fast_timeout_us,
+				  int slow_timeout_ms,
+				  bool is_read)
 {
-	int status;
-
 	lockdep_assert_held(&dev_priv->sb_lock);
 
-	/* GEN6_PCODE_* are outside of the forcewake domain, we can
+	/*
+	 * GEN6_PCODE_* are outside of the forcewake domain, we can
 	 * use te fw I915_READ variants to reduce the amount of work
 	 * required when reading/writing.
 	 */
@@ -9411,69 +9410,36 @@ static int __sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox,
 
 	if (__intel_wait_for_register_fw(dev_priv,
 					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 500, 0, NULL))
+					 fast_timeout_us,
+					 slow_timeout_ms,
+					 &mbox))
 		return -ETIMEDOUT;
 
-	*val = I915_READ_FW(GEN6_PCODE_DATA);
-	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
+	if (is_read)
+		*val = I915_READ_FW(GEN6_PCODE_DATA);
 
 	if (INTEL_GEN(dev_priv) > 6)
-		status = gen7_check_mailbox_status(dev_priv);
+		return gen7_check_mailbox_status(dev_priv, mbox);
 	else
-		status = gen6_check_mailbox_status(dev_priv);
-
-	return status;
+		return gen6_check_mailbox_status(dev_priv, mbox);
 }
 
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
 {
-	int status;
+	int err;
 
 	mutex_lock(&dev_priv->sb_lock);
-	status = __sandybridge_pcode_read(dev_priv, mbox, val);
+	err = __sandybridge_pcode_rw(dev_priv, mbox, val,
+				     500, 0,
+				     true);
 	mutex_unlock(&dev_priv->sb_lock);
 
-	if (status) {
+	if (err) {
 		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
-				 mbox, __builtin_return_address(0), status);
+				 mbox, __builtin_return_address(0), err);
 	}
 
-	return status;
-}
-
-static int __sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
-					     u32 mbox, u32 val,
-					     int fast_timeout_us,
-					     int slow_timeout_ms)
-{
-	int status;
-
-	/* GEN6_PCODE_* are outside of the forcewake domain, we can
-	 * use te fw I915_READ variants to reduce the amount of work
-	 * required when reading/writing.
-	 */
-
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
-		return -EAGAIN;
-
-	I915_WRITE_FW(GEN6_PCODE_DATA, val);
-	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
-	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
-
-	if (__intel_wait_for_register_fw(dev_priv,
-					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 fast_timeout_us, slow_timeout_ms,
-					 NULL))
-		return -ETIMEDOUT;
-
-	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
-
-	if (INTEL_GEN(dev_priv) > 6)
-		status = gen7_check_mailbox_status(dev_priv);
-	else
-		status = gen6_check_mailbox_status(dev_priv);
-
-	return status;
+	return err;
 }
 
 int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
@@ -9481,31 +9447,31 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
 				    int fast_timeout_us,
 				    int slow_timeout_ms)
 {
-	int status;
+	int err;
 
 	mutex_lock(&dev_priv->sb_lock);
-	status = __sandybridge_pcode_write_timeout(dev_priv, mbox, val,
-						   fast_timeout_us,
-						   slow_timeout_ms);
+	err = __sandybridge_pcode_rw(dev_priv, mbox, &val,
+				     fast_timeout_us, slow_timeout_ms,
+				     false);
 	mutex_unlock(&dev_priv->sb_lock);
 
-	if (status) {
+	if (err) {
 		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
-				 val, mbox, __builtin_return_address(0), status);
+				 val, mbox, __builtin_return_address(0), err);
 	}
 
-	return status;
+	return err;
 }
 
 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
 				  u32 request, u32 reply_mask, u32 reply,
 				  u32 *status)
 {
-	u32 val = request;
-
-	*status = __sandybridge_pcode_read(dev_priv, mbox, &val);
+	*status = __sandybridge_pcode_rw(dev_priv, mbox, &request,
+					 500, 0,
+					 true);
 
-	return *status || ((val & reply_mask) == reply);
+	return *status || ((request & reply_mask) == reply);
 }
 
 /**
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 041/262] drm/i915: Move sandybride pcode access to intel_sideband.c
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (38 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 040/262] drm/i915: Merge sandybridge_pcode_(read|write) Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 042/262] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
                   ` (15 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

sandybride_pcode is another sideband, so move it to their new home.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h       |  10 --
 drivers/gpu/drm/i915/intel_hdcp.c     |   3 +-
 drivers/gpu/drm/i915/intel_pm.c       | 194 --------------------------
 drivers/gpu/drm/i915/intel_sideband.c | 194 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_sideband.h |  10 ++
 5 files changed, 206 insertions(+), 205 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f81ee2151488..82360c96308a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3452,16 +3452,6 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv);
 extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
 					    struct intel_display_error_state *error);
 
-int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
-int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
-				    u32 val, int fast_timeout_us,
-				    int slow_timeout_ms);
-#define sandybridge_pcode_write(dev_priv, mbox, val)	\
-	sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500, 0)
-
-int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
-		      u32 reply_mask, u32 reply, int timeout_base_ms);
-
 /* intel_dpio_phy.c */
 void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
 			     enum dpio_phy *phy, enum dpio_channel *ch);
diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c
index 6fa39d4521ca..62215adcdb1a 100644
--- a/drivers/gpu/drm/i915/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/intel_hdcp.c
@@ -11,8 +11,9 @@
 #include <linux/i2c.h>
 #include <linux/random.h>
 
-#include "intel_drv.h"
 #include "i915_reg.h"
+#include "intel_drv.h"
+#include "intel_sideband.h"
 
 #define KEY_LOAD_TRIES	5
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index cfd5ac1876a2..1b048bb1fea3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9346,200 +9346,6 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 	}
 }
 
-static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv,
-					    u32 mbox)
-{
-	switch (mbox & GEN6_PCODE_ERROR_MASK) {
-	case GEN6_PCODE_SUCCESS:
-		return 0;
-	case GEN6_PCODE_UNIMPLEMENTED_CMD:
-		return -ENODEV;
-	case GEN6_PCODE_ILLEGAL_CMD:
-		return -ENXIO;
-	case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
-	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
-		return -EOVERFLOW;
-	case GEN6_PCODE_TIMEOUT:
-		return -ETIMEDOUT;
-	default:
-		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
-		return 0;
-	}
-}
-
-static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv,
-					    u32 mbox)
-{
-	switch (mbox & GEN6_PCODE_ERROR_MASK) {
-	case GEN6_PCODE_SUCCESS:
-		return 0;
-	case GEN6_PCODE_ILLEGAL_CMD:
-		return -ENXIO;
-	case GEN7_PCODE_TIMEOUT:
-		return -ETIMEDOUT;
-	case GEN7_PCODE_ILLEGAL_DATA:
-		return -EINVAL;
-	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
-		return -EOVERFLOW;
-	default:
-		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
-		return 0;
-	}
-}
-
-static int __sandybridge_pcode_rw(struct drm_i915_private *dev_priv,
-				  u32 mbox, u32 *val,
-				  int fast_timeout_us,
-				  int slow_timeout_ms,
-				  bool is_read)
-{
-	lockdep_assert_held(&dev_priv->sb_lock);
-
-	/*
-	 * GEN6_PCODE_* are outside of the forcewake domain, we can
-	 * use te fw I915_READ variants to reduce the amount of work
-	 * required when reading/writing.
-	 */
-
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
-		return -EAGAIN;
-
-	I915_WRITE_FW(GEN6_PCODE_DATA, *val);
-	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
-	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
-
-	if (__intel_wait_for_register_fw(dev_priv,
-					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 fast_timeout_us,
-					 slow_timeout_ms,
-					 &mbox))
-		return -ETIMEDOUT;
-
-	if (is_read)
-		*val = I915_READ_FW(GEN6_PCODE_DATA);
-
-	if (INTEL_GEN(dev_priv) > 6)
-		return gen7_check_mailbox_status(dev_priv, mbox);
-	else
-		return gen6_check_mailbox_status(dev_priv, mbox);
-}
-
-int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
-{
-	int err;
-
-	mutex_lock(&dev_priv->sb_lock);
-	err = __sandybridge_pcode_rw(dev_priv, mbox, val,
-				     500, 0,
-				     true);
-	mutex_unlock(&dev_priv->sb_lock);
-
-	if (err) {
-		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
-				 mbox, __builtin_return_address(0), err);
-	}
-
-	return err;
-}
-
-int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
-				    u32 mbox, u32 val,
-				    int fast_timeout_us,
-				    int slow_timeout_ms)
-{
-	int err;
-
-	mutex_lock(&dev_priv->sb_lock);
-	err = __sandybridge_pcode_rw(dev_priv, mbox, &val,
-				     fast_timeout_us, slow_timeout_ms,
-				     false);
-	mutex_unlock(&dev_priv->sb_lock);
-
-	if (err) {
-		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
-				 val, mbox, __builtin_return_address(0), err);
-	}
-
-	return err;
-}
-
-static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
-				  u32 request, u32 reply_mask, u32 reply,
-				  u32 *status)
-{
-	*status = __sandybridge_pcode_rw(dev_priv, mbox, &request,
-					 500, 0,
-					 true);
-
-	return *status || ((request & reply_mask) == reply);
-}
-
-/**
- * skl_pcode_request - send PCODE request until acknowledgment
- * @dev_priv: device private
- * @mbox: PCODE mailbox ID the request is targeted for
- * @request: request ID
- * @reply_mask: mask used to check for request acknowledgment
- * @reply: value used to check for request acknowledgment
- * @timeout_base_ms: timeout for polling with preemption enabled
- *
- * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
- * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
- * The request is acknowledged once the PCODE reply dword equals @reply after
- * applying @reply_mask. Polling is first attempted with preemption enabled
- * for @timeout_base_ms and if this times out for another 50 ms with
- * preemption disabled.
- *
- * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
- * other error as reported by PCODE.
- */
-int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
-		      u32 reply_mask, u32 reply, int timeout_base_ms)
-{
-	u32 status;
-	int ret;
-
-	mutex_lock(&dev_priv->sb_lock);
-
-#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
-				   &status)
-
-	/*
-	 * Prime the PCODE by doing a request first. Normally it guarantees
-	 * that a subsequent request, at most @timeout_base_ms later, succeeds.
-	 * _wait_for() doesn't guarantee when its passed condition is evaluated
-	 * first, so send the first request explicitly.
-	 */
-	if (COND) {
-		ret = 0;
-		goto out;
-	}
-	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
-	if (!ret)
-		goto out;
-
-	/*
-	 * The above can time out if the number of requests was low (2 in the
-	 * worst case) _and_ PCODE was busy for some reason even after a
-	 * (queued) request and @timeout_base_ms delay. As a workaround retry
-	 * the poll with preemption disabled to maximize the number of
-	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
-	 * account for interrupts that could reduce the number of these
-	 * requests, and for any quirks of the PCODE firmware that delays
-	 * the request completion.
-	 */
-	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
-	WARN_ON_ONCE(timeout_base_ms > 3);
-	preempt_disable();
-	ret = wait_for_atomic(COND, 50);
-	preempt_enable();
-
-out:
-	mutex_unlock(&dev_priv->sb_lock);
-	return ret ? ret : status;
-#undef COND
-}
-
 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 427c8c36f0c5..eb2850736e39 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -394,3 +394,197 @@ void vlv_flisdsi_put(struct drm_i915_private *dev_priv)
 {
 	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_FLISDSI));
 }
+
+static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv,
+					    u32 mbox)
+{
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
+	case GEN6_PCODE_SUCCESS:
+		return 0;
+	case GEN6_PCODE_UNIMPLEMENTED_CMD:
+		return -ENODEV;
+	case GEN6_PCODE_ILLEGAL_CMD:
+		return -ENXIO;
+	case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+		return -EOVERFLOW;
+	case GEN6_PCODE_TIMEOUT:
+		return -ETIMEDOUT;
+	default:
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
+		return 0;
+	}
+}
+
+static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv,
+					    u32 mbox)
+{
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
+	case GEN6_PCODE_SUCCESS:
+		return 0;
+	case GEN6_PCODE_ILLEGAL_CMD:
+		return -ENXIO;
+	case GEN7_PCODE_TIMEOUT:
+		return -ETIMEDOUT;
+	case GEN7_PCODE_ILLEGAL_DATA:
+		return -EINVAL;
+	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+		return -EOVERFLOW;
+	default:
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
+		return 0;
+	}
+}
+
+static int __sandybridge_pcode_rw(struct drm_i915_private *dev_priv,
+				  u32 mbox, u32 *val,
+				  int fast_timeout_us,
+				  int slow_timeout_ms,
+				  bool is_read)
+{
+	lockdep_assert_held(&dev_priv->sb_lock);
+
+	/*
+	 * GEN6_PCODE_* are outside of the forcewake domain, we can
+	 * use te fw I915_READ variants to reduce the amount of work
+	 * required when reading/writing.
+	 */
+
+	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
+		return -EAGAIN;
+
+	I915_WRITE_FW(GEN6_PCODE_DATA, *val);
+	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
+	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
+
+	if (__intel_wait_for_register_fw(dev_priv,
+					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+					 fast_timeout_us,
+					 slow_timeout_ms,
+					 &mbox))
+		return -ETIMEDOUT;
+
+	if (is_read)
+		*val = I915_READ_FW(GEN6_PCODE_DATA);
+
+	if (INTEL_GEN(dev_priv) > 6)
+		return gen7_check_mailbox_status(dev_priv, mbox);
+	else
+		return gen6_check_mailbox_status(dev_priv, mbox);
+}
+
+int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
+{
+	int err;
+
+	mutex_lock(&dev_priv->sb_lock);
+	err = __sandybridge_pcode_rw(dev_priv, mbox, val,
+				     500, 0,
+				     true);
+	mutex_unlock(&dev_priv->sb_lock);
+
+	if (err) {
+		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
+				 mbox, __builtin_return_address(0), err);
+	}
+
+	return err;
+}
+
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+				    u32 mbox, u32 val,
+				    int fast_timeout_us,
+				    int slow_timeout_ms)
+{
+	int err;
+
+	mutex_lock(&dev_priv->sb_lock);
+	err = __sandybridge_pcode_rw(dev_priv, mbox, &val,
+				     fast_timeout_us, slow_timeout_ms,
+				     false);
+	mutex_unlock(&dev_priv->sb_lock);
+
+	if (err) {
+		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
+				 val, mbox, __builtin_return_address(0), err);
+	}
+
+	return err;
+}
+
+static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
+				  u32 request, u32 reply_mask, u32 reply,
+				  u32 *status)
+{
+	*status = __sandybridge_pcode_rw(dev_priv, mbox, &request,
+					 500, 0,
+					 true);
+
+	return *status || ((request & reply_mask) == reply);
+}
+
+/**
+ * skl_pcode_request - send PCODE request until acknowledgment
+ * @dev_priv: device private
+ * @mbox: PCODE mailbox ID the request is targeted for
+ * @request: request ID
+ * @reply_mask: mask used to check for request acknowledgment
+ * @reply: value used to check for request acknowledgment
+ * @timeout_base_ms: timeout for polling with preemption enabled
+ *
+ * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
+ * The request is acknowledged once the PCODE reply dword equals @reply after
+ * applying @reply_mask. Polling is first attempted with preemption enabled
+ * for @timeout_base_ms and if this times out for another 50 ms with
+ * preemption disabled.
+ *
+ * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
+ * other error as reported by PCODE.
+ */
+int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms)
+{
+	u32 status;
+	int ret;
+
+	mutex_lock(&dev_priv->sb_lock);
+
+#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
+				   &status)
+
+	/*
+	 * Prime the PCODE by doing a request first. Normally it guarantees
+	 * that a subsequent request, at most @timeout_base_ms later, succeeds.
+	 * _wait_for() doesn't guarantee when its passed condition is evaluated
+	 * first, so send the first request explicitly.
+	 */
+	if (COND) {
+		ret = 0;
+		goto out;
+	}
+	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
+	if (!ret)
+		goto out;
+
+	/*
+	 * The above can time out if the number of requests was low (2 in the
+	 * worst case) _and_ PCODE was busy for some reason even after a
+	 * (queued) request and @timeout_base_ms delay. As a workaround retry
+	 * the poll with preemption disabled to maximize the number of
+	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
+	 * account for interrupts that could reduce the number of these
+	 * requests, and for any quirks of the PCODE firmware that delays
+	 * the request completion.
+	 */
+	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
+	WARN_ON_ONCE(timeout_base_ms > 3);
+	preempt_disable();
+	ret = wait_for_atomic(COND, 50);
+	preempt_enable();
+
+out:
+	mutex_unlock(&dev_priv->sb_lock);
+	return ret ? ret : status;
+#undef COND
+}
diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h
index 7204648eb9d4..16a708d1c1d8 100644
--- a/drivers/gpu/drm/i915/intel_sideband.h
+++ b/drivers/gpu/drm/i915/intel_sideband.h
@@ -70,4 +70,14 @@ u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg,
 void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value,
 		     enum intel_sbi_destination destination);
 
+int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, u32 *val);
+int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, u32 mbox,
+				    u32 val, int fast_timeout_us,
+				    int slow_timeout_ms);
+#define sandybridge_pcode_write(i915, mbox, val)	\
+	sandybridge_pcode_write_timeout(i915, mbox, val, 500, 0)
+
+int skl_pcode_request(struct drm_i915_private *i915, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms);
+
 #endif /* _INTEL_SIDEBAND_H */
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 042/262] drm/i915: Mark up Ironlake ips with rpm wakerefs
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (39 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 041/262] drm/i915: Move sandybride pcode access to intel_sideband.c Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:03 ` [PATCH 043/262] drm/i915: Record logical context support in driver caps Chris Wilson
                   ` (14 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Currently Ironlake operates under the assumption that rpm awake (and its
error checking is disabled). As such, we have missed a few places where we
access registers without taking the rpm wakeref and thus trigger
warnings. intel_ips being one culprit.

As this involved adding a potentially sleeping rpm_get, we have to
rearrange the spinlocks slightly and so switch to acquiring a device-ref
under the spinlock rather than hold the spinlock for the whole
operation. To be consistent, we make the change in pattern common to the
intel_ips interface even though this adds a few more atomic operations
than necessary in a few cases.

v2: Sagar noted the mb around setting mch_dev were overkill as we only
need ordering there, and that i915_emon_status was still using
struct_mutex for no reason, but lacked rpm.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |  19 ++--
 drivers/gpu/drm/i915/i915_drv.c     |   3 +
 drivers/gpu/drm/i915/intel_pm.c     | 138 ++++++++++++++--------------
 3 files changed, 81 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 736b0da204e9..8dd12798cca0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1768,22 +1768,19 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 
 static int i915_emon_status(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
+	struct drm_i915_private *i915 = node_to_i915(m->private);
 	unsigned long temp, chipset, gfx;
-	int ret;
 
-	if (!IS_GEN5(dev_priv))
+	if (!IS_GEN5(i915))
 		return -ENODEV;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	intel_runtime_pm_get(i915);
 
-	temp = i915_mch_val(dev_priv);
-	chipset = i915_chipset_val(dev_priv);
-	gfx = i915_gfx_val(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
+	temp = i915_mch_val(i915);
+	chipset = i915_chipset_val(i915);
+	gfx = i915_gfx_val(i915);
+
+	intel_runtime_pm_put(i915);
 
 	seq_printf(m, "GMCH temp: %ld\n", temp);
 	seq_printf(m, "Chipset power: %ld\n", chipset);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 4a44f1a233e3..89327339a73d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1449,6 +1449,9 @@ void i915_driver_unload(struct drm_device *dev)
 
 	i915_driver_unregister(dev_priv);
 
+	/* Flush any external code that still may be under the RCU lock */
+	synchronize_rcu();
+
 	if (i915_gem_suspend(dev_priv))
 		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1b048bb1fea3..f94fc65c5149 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6107,10 +6107,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
  */
 DEFINE_SPINLOCK(mchdev_lock);
 
-/* Global for IPS driver to get at the current i915 device. Protected by
- * mchdev_lock. */
-static struct drm_i915_private *i915_mch_dev;
-
 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
 {
 	u16 rgvswctl;
@@ -7757,11 +7753,13 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 	if (!IS_GEN5(dev_priv))
 		return 0;
 
+	intel_runtime_pm_get(dev_priv);
 	spin_lock_irq(&mchdev_lock);
 
 	val = __i915_chipset_val(dev_priv);
 
 	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(dev_priv);
 
 	return val;
 }
@@ -7841,11 +7839,13 @@ void i915_update_gfx_val(struct drm_i915_private *dev_priv)
 	if (!IS_GEN5(dev_priv))
 		return;
 
+	intel_runtime_pm_get(dev_priv);
 	spin_lock_irq(&mchdev_lock);
 
 	__i915_update_gfx_val(dev_priv);
 
 	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(dev_priv);
 }
 
 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
@@ -7892,15 +7892,32 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 	if (!IS_GEN5(dev_priv))
 		return 0;
 
+	intel_runtime_pm_get(dev_priv);
 	spin_lock_irq(&mchdev_lock);
 
 	val = __i915_gfx_val(dev_priv);
 
 	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(dev_priv);
 
 	return val;
 }
 
+static struct drm_i915_private *i915_mch_dev;
+
+static struct drm_i915_private *mchdev_get(void)
+{
+	struct drm_i915_private *i915;
+
+	rcu_read_lock();
+	i915 = i915_mch_dev;
+	if (!kref_get_unless_zero(&i915->drm.ref))
+		i915 = NULL;
+	rcu_read_unlock();
+
+	return i915;
+}
+
 /**
  * i915_read_mch_val - return value for IPS use
  *
@@ -7909,23 +7926,22 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
  */
 unsigned long i915_read_mch_val(void)
 {
-	struct drm_i915_private *dev_priv;
-	unsigned long chipset_val, graphics_val, ret = 0;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev)
-		goto out_unlock;
-	dev_priv = i915_mch_dev;
-
-	chipset_val = __i915_chipset_val(dev_priv);
-	graphics_val = __i915_gfx_val(dev_priv);
+	struct drm_i915_private *i915;
+	unsigned long chipset_val, graphics_val;
 
-	ret = chipset_val + graphics_val;
+	i915 = mchdev_get();
+	if (!i915)
+		return 0;
 
-out_unlock:
+	intel_runtime_pm_get(i915);
+	spin_lock_irq(&mchdev_lock);
+	chipset_val = __i915_chipset_val(i915);
+	graphics_val = __i915_gfx_val(i915);
 	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(i915);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return chipset_val + graphics_val;
 }
 EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
@@ -7936,23 +7952,19 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val);
  */
 bool i915_gpu_raise(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
+	struct drm_i915_private *i915;
 
-	if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
-		dev_priv->ips.max_delay--;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay > i915->ips.fmax)
+		i915->ips.max_delay--;
 	spin_unlock_irq(&mchdev_lock);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
@@ -7964,23 +7976,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
  */
 bool i915_gpu_lower(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
+	struct drm_i915_private *i915;
 
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
-
-	if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
-		dev_priv->ips.max_delay++;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay < i915->ips.min_delay)
+		i915->ips.max_delay++;
 	spin_unlock_irq(&mchdev_lock);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_lower);
 
@@ -7991,13 +7999,16 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
  */
 bool i915_gpu_busy(void)
 {
-	bool ret = false;
+	struct drm_i915_private *i915;
+	bool ret;
 
-	spin_lock_irq(&mchdev_lock);
-	if (i915_mch_dev)
-		ret = i915_mch_dev->gt.awake;
-	spin_unlock_irq(&mchdev_lock);
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	ret = i915->gt.awake;
 
+	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_busy);
@@ -8010,24 +8021,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
  */
 bool i915_gpu_turbo_disable(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
-
-	dev_priv->ips.max_delay = dev_priv->ips.fstart;
+	struct drm_i915_private *i915;
+	bool ret;
 
-	if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
-		ret = false;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	i915->ips.max_delay = i915->ips.fstart;
+	ret = ironlake_set_drps(i915, i915->ips.fstart);
 	spin_unlock_irq(&mchdev_lock);
 
+	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
@@ -8056,18 +8062,14 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
 {
 	/* We only register the i915 ips part with intel-ips once everything is
 	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
-	spin_lock_irq(&mchdev_lock);
-	i915_mch_dev = dev_priv;
-	spin_unlock_irq(&mchdev_lock);
+	rcu_assign_pointer(i915_mch_dev, dev_priv);
 
 	ips_ping_for_i915_load();
 }
 
 void intel_gpu_ips_teardown(void)
 {
-	spin_lock_irq(&mchdev_lock);
-	i915_mch_dev = NULL;
-	spin_unlock_irq(&mchdev_lock);
+	rcu_assign_pointer(i915_mch_dev, NULL);
 }
 
 static void intel_init_emon(struct drm_i915_private *dev_priv)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 043/262] drm/i915: Record logical context support in driver caps
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (40 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 042/262] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
@ 2018-05-17  6:03 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 044/262] drm/i915: Generalize i915_gem_sanitize() to reset contexts Chris Wilson
                   ` (13 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:03 UTC (permalink / raw)
  To: intel-gfx

Avoid looking at the magical engines[RCS] to decide if the HW and driver
supports logical contexts, and instead record that knowledge during
initialisation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 1 +
 drivers/gpu/drm/i915/i915_gem_context.c  | 5 ++---
 drivers/gpu/drm/i915/intel_device_info.c | 1 +
 drivers/gpu/drm/i915/intel_device_info.h | 1 +
 drivers/gpu/drm/i915/intel_engine_cs.c   | 2 ++
 5 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 82360c96308a..c66e439f27d6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2300,6 +2300,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 }
 
 #define INTEL_INFO(dev_priv)	intel_info((dev_priv))
+#define DRIVER_CAPS(dev_priv)	(&(dev_priv)->caps)
 
 #define INTEL_GEN(dev_priv)	((dev_priv)->info.gen)
 #define INTEL_DEVID(dev_priv)	((dev_priv)->info.device_id)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9d51560ba9ad..43fdbb906267 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -503,8 +503,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	}
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
-			 dev_priv->engine[RCS]->context_size ? "logical" :
-			 "fake");
+			 DRIVER_CAPS(dev_priv)->has_contexts ? "logical" : "fake");
 	return 0;
 }
 
@@ -660,7 +659,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	struct i915_gem_context *ctx;
 	int ret;
 
-	if (!dev_priv->engine[RCS]->context_size)
+	if (!DRIVER_CAPS(dev_priv)->has_contexts)
 		return -ENODEV;
 
 	if (args->pad != 0)
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 0fd13df424cf..f1395ddcdda0 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -858,6 +858,7 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
 void intel_driver_caps_print(const struct intel_driver_caps *caps,
 			     struct drm_printer *p)
 {
+	drm_printf(p, "Has contexts? %s\n", yesno(caps->has_contexts));
 	drm_printf(p, "scheduler: %x\n", caps->scheduler);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e31669557..3f9881c548ef 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -186,6 +186,7 @@ struct intel_device_info {
 
 struct intel_driver_caps {
 	unsigned int scheduler;
+	bool has_contexts:1;
 };
 
 static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 3ec79bd06b83..1a9c26d0da55 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -301,6 +301,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 							   engine->class);
 	if (WARN_ON(engine->context_size > BIT(20)))
 		engine->context_size = 0;
+	if (engine->context_size)
+		DRIVER_CAPS(dev_priv)->has_contexts = true;
 
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 044/262] drm/i915: Generalize i915_gem_sanitize() to reset contexts
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (41 preceding siblings ...)
  2018-05-17  6:03 ` [PATCH 043/262] drm/i915: Record logical context support in driver caps Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 045/262] drm/i915: Enable render context support for Ironlake (gen5) Chris Wilson
                   ` (12 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

While we believe that we need to always reset the GPU to scrub the state
on transition to/from the driver, it is essential when we enable contexts.
Generalize the gen test to be on context-support instead.

References: d2b4b97933f5 ("drm/i915: Record the default hw state after reset upon load"
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a5694b0a7e6a..c451d61eeca1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4952,7 +4952,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * of the reset, so this could be applied to even earlier gen.
 	 */
 	err = -ENODEV;
-	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
+	if (DRIVER_CAPS(i915)->has_contexts && intel_has_gpu_reset(i915)) {
 		if (!WARN_ON(intel_gpu_reset(i915, ALL_ENGINES))) {
 			for_each_engine(engine, i915, id) {
 				if (engine->reset.reset)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 045/262] drm/i915: Enable render context support for Ironlake (gen5)
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (42 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 044/262] drm/i915: Generalize i915_gem_sanitize() to reset contexts Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 046/262] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga) Chris Wilson
                   ` (11 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kenneth Graunke

Ironlake does support being able to saving and reloading context specific
registers between contexts, providing isolation of the basic GPU state
(as programmable by userspace). This allows userspace to assume that the
GPU retains their state from one batch to the next, minimising the
amount of state it needs to reload, or manually save and restore.

v2: Fix off-by-one in reading CXT_SIZE, and add a comment that the
CXT_SIZE and context-layout do not match in bspec, but the difference is
irrelevant as we overallocate the full page anyway (Ville).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  | 16 ++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 13 +++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1a9c26d0da55..9b55b7800ad0 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -219,6 +219,22 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
 					PAGE_SIZE);
 		case 5:
+			/*
+			 * There is a discrepancy here between the size reported
+			 * by the register and the size of the context layout
+			 * in the docs. Both are described as authorative!
+			 *
+			 * The discrepancy is on the order of a few cachelines,
+			 * but the total is under one page (4k), which is our
+			 * minimum allocation anyway so it should all come
+			 * out in the wash.
+			 */
+			cxt_size = I915_READ(CXT_SIZE) + 1;
+			DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
+					 INTEL_GEN(dev_priv),
+					 cxt_size * 64,
+					 cxt_size - 1);
+			return round_up(cxt_size * 64, PAGE_SIZE);
 		case 4:
 		case 3:
 		case 2:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 97b38bbb7ce2..47591758069a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1447,11 +1447,14 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 		/* These flags are for resource streamer on HSW+ */
 		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
 	else
+		/* We need to save the extended state for powersaving modes */
 		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
 
 	len = 4;
 	if (IS_GEN7(i915))
 		len += 2 + (num_rings ? 4*num_rings + 6 : 0);
+	if (IS_GEN5(i915))
+		len += 2;
 
 	cs = intel_ring_begin(rq, len);
 	if (IS_ERR(cs))
@@ -1474,6 +1477,14 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 						GEN6_PSMI_SLEEP_MSG_DISABLE);
 			}
 		}
+	} else if (IS_GEN5(i915)) {
+		/*
+		 * This w/a is only listed for pre-production ilk a/b steppings,
+		 * but is also mentioned for programming the powerctx. To be
+		 * safe, just apply the workaround; we do not use SyncFlush so
+		 * this should never take effect and so be a no-op!
+		 */
+		*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
 	}
 
 	*cs++ = MI_NOOP;
@@ -1508,6 +1519,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 			*cs++ = MI_NOOP;
 		}
 		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	} else if (IS_GEN5(i915)) {
+		*cs++ = MI_SUSPEND_FLUSH;
 	}
 
 	intel_ring_advance(rq, cs);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 046/262] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (43 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 045/262] drm/i915: Enable render context support for Ironlake (gen5) Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 047/262] drm/i915: Split GT powermanagement functions to intel_gt_pm.c Chris Wilson
                   ` (10 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kenneth Graunke

Broadwater and the rest of gen4  do support being able to saving and
reloading context specific registers between contexts, providing isolation
of the basic GPU state (as programmable by userspace). This allows
userspace to assume that the GPU retains their state from one batch to the
next, minimising the amount of state it needs to reload and manually save
across batches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 9b55b7800ad0..8c33a907eddc 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -219,6 +219,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
 					PAGE_SIZE);
 		case 5:
+		case 4:
 			/*
 			 * There is a discrepancy here between the size reported
 			 * by the register and the size of the context layout
@@ -235,7 +236,6 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 					 cxt_size * 64,
 					 cxt_size - 1);
 			return round_up(cxt_size * 64, PAGE_SIZE);
-		case 4:
 		case 3:
 		case 2:
 		/* For the special day when i810 gets merged. */
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 047/262] drm/i915: Split GT powermanagement functions to intel_gt_pm.c
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (44 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 046/262] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga) Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 048/262] drm/i915: Move rps worker " Chris Wilson
                   ` (9 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

intel_pm.c has grown to several thousand lines of loosely connected code
handling various powermanagement tasks. Split out the GT portion (IPS,
RPS and RC6) into its own file for easier maintenance.

v2: Move struct intel_gt_pm to intel_gt_pm.h

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/Makefile        |    1 +
 drivers/gpu/drm/i915/i915_debugfs.c  |    1 +
 drivers/gpu/drm/i915/i915_drv.c      |    5 +
 drivers/gpu/drm/i915/i915_drv.h      |   78 +-
 drivers/gpu/drm/i915/i915_gem.c      |   21 +-
 drivers/gpu/drm/i915/i915_irq.c      |    2 +
 drivers/gpu/drm/i915/i915_pmu.c      |    1 +
 drivers/gpu/drm/i915/i915_request.c  |    1 +
 drivers/gpu/drm/i915/i915_sysfs.c    |    1 +
 drivers/gpu/drm/i915/intel_display.c |    1 +
 drivers/gpu/drm/i915/intel_drv.h     |   12 -
 drivers/gpu/drm/i915/intel_gt_pm.c   | 2424 ++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_gt_pm.h   |  105 +
 drivers/gpu/drm/i915/intel_pm.c      | 2785 ++------------------------
 drivers/gpu/drm/i915/intel_uncore.c  |    2 -
 15 files changed, 2765 insertions(+), 2675 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_gt_pm.c
 create mode 100644 drivers/gpu/drm/i915/intel_gt_pm.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 4c6adae23e18..3bce92cd4445 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -46,6 +46,7 @@ i915-y := i915_drv.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
 	  intel_device_info.o \
+	  intel_gt_pm.o \
 	  intel_pm.o \
 	  intel_runtime_pm.o \
 	  intel_workarounds.o
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8dd12798cca0..75c89014033c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -31,6 +31,7 @@
 #include <linux/sched/mm.h>
 
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 #include "intel_guc_submission.h"
 #include "intel_sideband.h"
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 89327339a73d..5c6fe51eb45a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -52,6 +52,7 @@
 #include "i915_query.h"
 #include "i915_vgpu.h"
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 #include "intel_uc.h"
 
 static struct drm_driver driver;
@@ -1072,6 +1073,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
  */
 static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv)
 {
+	intel_sanitize_gt_powersave(dev_priv);
 	intel_uncore_fini(dev_priv);
 	i915_mmio_cleanup(dev_priv);
 	pci_dev_put(dev_priv->bridge_dev);
@@ -1179,6 +1181,9 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 
 	intel_uncore_sanitize(dev_priv);
 
+	/* BIOS often leaves RC6 enabled, but disable it for hw init */
+	intel_sanitize_gt_powersave(dev_priv);
+
 	intel_opregion_setup(dev_priv);
 
 	i915_gem_load_init_fences(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c66e439f27d6..e11b8c7dbf4f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -60,6 +60,7 @@
 #include "intel_device_info.h"
 #include "intel_display.h"
 #include "intel_dpll_mgr.h"
+#include "intel_gt_pm.h"
 #include "intel_lrc.h"
 #include "intel_opregion.h"
 #include "intel_ringbuffer.h"
@@ -729,78 +730,6 @@ struct vlv_s0ix_state {
 	u32 clock_gate_dis2;
 };
 
-struct intel_rps_ei {
-	ktime_t ktime;
-	u32 render_c0;
-	u32 media_c0;
-};
-
-struct intel_rps {
-	struct mutex lock;
-
-	/*
-	 * work, interrupts_enabled and pm_iir are protected by
-	 * dev_priv->irq_lock
-	 */
-	struct work_struct work;
-	bool interrupts_enabled;
-	u32 pm_iir;
-
-	/* PM interrupt bits that should never be masked */
-	u32 pm_intrmsk_mbz;
-
-	/* Frequencies are stored in potentially platform dependent multiples.
-	 * In other words, *_freq needs to be multiplied by X to be interesting.
-	 * Soft limits are those which are used for the dynamic reclocking done
-	 * by the driver (raise frequencies under heavy loads, and lower for
-	 * lighter loads). Hard limits are those imposed by the hardware.
-	 *
-	 * A distinction is made for overclocking, which is never enabled by
-	 * default, and is considered to be above the hard limit if it's
-	 * possible at all.
-	 */
-	u8 cur_freq;		/* Current frequency (cached, may not == HW) */
-	u8 min_freq_softlimit;	/* Minimum frequency permitted by the driver */
-	u8 max_freq_softlimit;	/* Max frequency permitted by the driver */
-	u8 max_freq;		/* Maximum frequency, RP0 if not overclocking */
-	u8 min_freq;		/* AKA RPn. Minimum frequency */
-	u8 boost_freq;		/* Frequency to request when wait boosting */
-	u8 idle_freq;		/* Frequency to request when we are idle */
-	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
-	u8 rp1_freq;		/* "less than" RP0 power/freqency */
-	u8 rp0_freq;		/* Non-overclocked max frequency. */
-	u16 gpll_ref_freq;	/* vlv/chv GPLL reference frequency */
-
-	u8 up_threshold; /* Current %busy required to uplock */
-	u8 down_threshold; /* Current %busy required to downclock */
-
-	int last_adj;
-	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
-
-	bool enabled;
-	atomic_t num_waiters;
-	atomic_t boosts;
-
-	/* manual wa residency calculations */
-	struct intel_rps_ei ei;
-};
-
-struct intel_rc6 {
-	bool enabled;
-	u64 prev_hw_residency[4];
-	u64 cur_residency[4];
-};
-
-struct intel_llc_pstate {
-	bool enabled;
-};
-
-struct intel_gen6_power_mgmt {
-	struct intel_rps rps;
-	struct intel_rc6 rc6;
-	struct intel_llc_pstate llc_pstate;
-};
-
 /* defined intel_pm.c */
 extern spinlock_t mchdev_lock;
 
@@ -1795,7 +1724,7 @@ struct drm_i915_private {
 	u32 edram_cap;
 
 	/* gen6+ GT PM state */
-	struct intel_gen6_power_mgmt gt_pm;
+	struct intel_gt_pm gt_pm;
 
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
@@ -2304,6 +2233,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 
 #define INTEL_GEN(dev_priv)	((dev_priv)->info.gen)
 #define INTEL_DEVID(dev_priv)	((dev_priv)->info.device_id)
+#define INTEL_SSEU(dev_priv)	(&INTEL_INFO(dev_priv)->sseu)
 
 #define REVID_FOREVER		0xff
 #define INTEL_REVID(dev_priv)	((dev_priv)->drm.pdev->revision)
@@ -3494,8 +3424,6 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 void vlv_phy_reset_lanes(struct intel_encoder *encoder,
 			 const struct intel_crtc_state *old_crtc_state);
 
-int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
-int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
 u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
 			   const i915_reg_t reg);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c451d61eeca1..b006d0e71a66 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -28,15 +28,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_vgpu.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
-#include "intel_mocs.h"
-#include "intel_workarounds.h"
-#include "i915_gemfs.h"
+
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
 #include <linux/reservation.h>
@@ -47,6 +39,17 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
+#include "i915_drv.h"
+#include "i915_gemfs.h"
+#include "i915_gem_clflush.h"
+#include "i915_vgpu.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include "intel_frontbuffer.h"
+#include "intel_gt_pm.h"
+#include "intel_mocs.h"
+#include "intel_workarounds.h"
+
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fe93e6731062..697800f607c8 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -33,9 +33,11 @@
 #include <linux/circ_buf.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 
 /**
  * DOC: interrupt handling
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index dc87797db500..f374af971395 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -5,6 +5,7 @@
  */
 
 #include "i915_pmu.h"
+#include "intel_gt_pm.h"
 #include "intel_ringbuffer.h"
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6466d8f475f1..b47875d46c84 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,7 @@
 #include <linux/sched/signal.h>
 
 #include "i915_drv.h"
+#include "intel_gt_pm.h"
 
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 {
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 55554697133b..fde5f0139ca1 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -31,6 +31,7 @@
 #include <linux/sysfs.h>
 
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 #include "intel_sideband.h"
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fa2d4ea13a3a..84ef96659b51 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -50,6 +50,7 @@
 #include "intel_dsi.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
+#include "intel_gt_pm.h"
 #include "intel_sideband.h"
 
 /* Primary plane formats for gen <= 3 */
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index d7dbca1aabff..7cfb71ade096 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -2022,18 +2022,6 @@ void intel_update_watermarks(struct intel_crtc *crtc);
 void intel_init_pm(struct drm_i915_private *dev_priv);
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv);
 void intel_pm_setup(struct drm_i915_private *dev_priv);
-void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
-void intel_gpu_ips_teardown(void);
-void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
-void gen6_rps_busy(struct drm_i915_private *dev_priv);
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
-void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 void g4x_wm_get_hw_state(struct drm_device *dev);
 void vlv_wm_get_hw_state(struct drm_device *dev);
 void ilk_wm_get_hw_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
new file mode 100644
index 000000000000..733d346601ca
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -0,0 +1,2424 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2012-2018 Intel Corporation
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+
+#include "../../../platform/x86/intel_ips.h"
+
+#include "intel_gt_pm.h"
+
+#include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_sideband.h"
+
+/**
+ * DOC: RC6
+ *
+ * RC6 is a special power stage which allows the GPU to enter an very
+ * low-voltage mode when idle, using down to 0V while at this stage.  This
+ * stage is entered automatically when the GPU is idle when RC6 support is
+ * enabled, and as soon as new workload arises GPU wakes up automatically as
+ * well.
+ *
+ * There are different RC6 modes available in Intel GPU, which differentiate
+ * among each other with the latency required to enter and leave RC6 and
+ * voltage consumed by the GPU in different states.
+ *
+ * The combination of the following flags define which states GPU is allowed
+ * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
+ * RC6pp is deepest RC6. Their support by hardware varies according to the
+ * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
+ * which brings the most power savings; deeper states save more power, but
+ * require higher latency to switch to and wake up.
+ */
+
+/*
+ * Lock protecting IPS related data structures
+ */
+DEFINE_SPINLOCK(mchdev_lock);
+
+bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
+{
+	u16 rgvswctl;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	rgvswctl = I915_READ16(MEMSWCTL);
+	if (rgvswctl & MEMCTL_CMD_STS) {
+		DRM_DEBUG("gpu busy, RCS change rejected\n");
+		return false; /* still busy with another command */
+	}
+
+	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+	I915_WRITE16(MEMSWCTL, rgvswctl);
+	POSTING_READ16(MEMSWCTL);
+
+	rgvswctl |= MEMCTL_CMD_STS;
+	I915_WRITE16(MEMSWCTL, rgvswctl);
+
+	return true;
+}
+
+static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
+{
+	u32 rgvmodectl;
+	u8 fmax, fmin, fstart, vstart;
+
+	spin_lock_irq(&mchdev_lock);
+
+	rgvmodectl = I915_READ(MEMMODECTL);
+
+	/* Enable temp reporting */
+	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
+	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
+
+	/* 100ms RC evaluation intervals */
+	I915_WRITE(RCUPEI, 100000);
+	I915_WRITE(RCDNEI, 100000);
+
+	/* Set max/min thresholds to 90ms and 80ms respectively */
+	I915_WRITE(RCBMAXAVG, 90000);
+	I915_WRITE(RCBMINAVG, 80000);
+
+	I915_WRITE(MEMIHYST, 1);
+
+	/* Set up min, max, and cur for interrupt handling */
+	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+		MEMMODE_FSTART_SHIFT;
+
+	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
+		PXVFREQ_PX_SHIFT;
+
+	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
+	dev_priv->ips.fstart = fstart;
+
+	dev_priv->ips.max_delay = fstart;
+	dev_priv->ips.min_delay = fmin;
+	dev_priv->ips.cur_delay = fstart;
+
+	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+			 fmax, fmin, fstart);
+
+	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+
+	/*
+	 * Interrupts will be enabled in ironlake_irq_postinstall
+	 */
+
+	I915_WRITE(VIDSTART, vstart);
+	POSTING_READ(VIDSTART);
+
+	rgvmodectl |= MEMMODE_SWMODE_EN;
+	I915_WRITE(MEMMODECTL, rgvmodectl);
+
+	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
+		DRM_ERROR("stuck trying to change perf mode\n");
+	mdelay(1);
+
+	ironlake_set_drps(dev_priv, fstart);
+
+	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
+		I915_READ(DDREC) + I915_READ(CSIEC);
+	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
+	dev_priv->ips.last_count2 = I915_READ(GFXEC);
+	dev_priv->ips.last_time2 = ktime_get_raw_ns();
+
+	spin_unlock_irq(&mchdev_lock);
+}
+
+static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
+{
+	u16 rgvswctl;
+
+	spin_lock_irq(&mchdev_lock);
+
+	rgvswctl = I915_READ16(MEMSWCTL);
+
+	/* Ack interrupts, disable EFC interrupt */
+	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
+	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
+	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
+	I915_WRITE(DEIIR, DE_PCU_EVENT);
+	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
+
+	/* Go back to the starting frequency */
+	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
+	mdelay(1);
+	rgvswctl |= MEMCTL_CMD_STS;
+	I915_WRITE(MEMSWCTL, rgvswctl);
+	mdelay(1);
+
+	spin_unlock_irq(&mchdev_lock);
+}
+
+/*
+ * There's a funny hw issue where the hw returns all 0 when reading from
+ * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
+ * ourselves, instead of doing a rmw cycle (which might result in us clearing
+ * all limits and the gpu stuck at whatever frequency it is at atm).
+ */
+static u32 intel_rps_limits(struct drm_i915_private *i915, u8 val)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	u32 limits;
+
+	/*
+	 * Only set the down limit when we've reached the lowest level to avoid
+	 * getting more interrupts, otherwise leave this clear. This prevents a
+	 * race in the hw when coming out of rc6: There's a tiny window where
+	 * the hw runs at the minimal clock before selecting the desired
+	 * frequency, if the down threshold expires in that window we will not
+	 * receive a down interrupt.
+	 */
+	if (INTEL_GEN(i915) >= 9) {
+		limits = (rps->max_freq_softlimit) << 23;
+		if (val <= rps->min_freq_softlimit)
+			limits |= (rps->min_freq_softlimit) << 14;
+	} else {
+		limits = rps->max_freq_softlimit << 24;
+		if (val <= rps->min_freq_softlimit)
+			limits |= rps->min_freq_softlimit << 16;
+	}
+
+	return limits;
+}
+
+static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	int new_power;
+	u32 threshold_up = 0, threshold_down = 0; /* in % */
+	u32 ei_up = 0, ei_down = 0;
+
+	new_power = rps->power;
+	switch (rps->power) {
+	case LOW_POWER:
+		if (val > rps->efficient_freq + 1 &&
+		    val > rps->cur_freq)
+			new_power = BETWEEN;
+		break;
+
+	case BETWEEN:
+		if (val <= rps->efficient_freq &&
+		    val < rps->cur_freq)
+			new_power = LOW_POWER;
+		else if (val >= rps->rp0_freq &&
+			 val > rps->cur_freq)
+			new_power = HIGH_POWER;
+		break;
+
+	case HIGH_POWER:
+		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
+		    val < rps->cur_freq)
+			new_power = BETWEEN;
+		break;
+	}
+	/* Max/min bins are special */
+	if (val <= rps->min_freq_softlimit)
+		new_power = LOW_POWER;
+	if (val >= rps->max_freq_softlimit)
+		new_power = HIGH_POWER;
+	if (new_power == rps->power)
+		return;
+
+	/* Note the units here are not exactly 1us, but 1280ns. */
+	switch (new_power) {
+	case LOW_POWER:
+		/* Upclock if more than 95% busy over 16ms */
+		ei_up = 16000;
+		threshold_up = 95;
+
+		/* Downclock if less than 85% busy over 32ms */
+		ei_down = 32000;
+		threshold_down = 85;
+		break;
+
+	case BETWEEN:
+		/* Upclock if more than 90% busy over 13ms */
+		ei_up = 13000;
+		threshold_up = 90;
+
+		/* Downclock if less than 75% busy over 32ms */
+		ei_down = 32000;
+		threshold_down = 75;
+		break;
+
+	case HIGH_POWER:
+		/* Upclock if more than 85% busy over 10ms */
+		ei_up = 10000;
+		threshold_up = 85;
+
+		/* Downclock if less than 60% busy over 32ms */
+		ei_down = 32000;
+		threshold_down = 60;
+		break;
+	}
+
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+		/*
+		 * Baytrail and Braswell control the gpu frequency via the
+		 * punit, which is very slow and expensive to communicate with,
+		 * as we synchronously force the package to C0. If we try and
+		 * update the gpufreq too often we cause measurable system
+		 * load for little benefit (effectively stealing CPU time for
+		 * the GPU, negatively impacting overall throughput).
+		 */
+		ei_up <<= 2;
+		ei_down <<= 2;
+	}
+
+	I915_WRITE(GEN6_RP_UP_EI,
+		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
+	I915_WRITE(GEN6_RP_UP_THRESHOLD,
+		   GT_INTERVAL_FROM_US(dev_priv,
+				       ei_up * threshold_up / 100));
+
+	I915_WRITE(GEN6_RP_DOWN_EI,
+		   GT_INTERVAL_FROM_US(dev_priv, ei_down));
+	I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
+		   GT_INTERVAL_FROM_US(dev_priv,
+				       ei_down * threshold_down / 100));
+
+	I915_WRITE(GEN6_RP_CONTROL,
+		   GEN6_RP_MEDIA_TURBO |
+		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+		   GEN6_RP_MEDIA_IS_GFX |
+		   GEN6_RP_ENABLE |
+		   GEN6_RP_UP_BUSY_AVG |
+		   GEN6_RP_DOWN_IDLE_AVG);
+
+	rps->power = new_power;
+	rps->up_threshold = threshold_up;
+	rps->down_threshold = threshold_down;
+	rps->last_adj = 0;
+}
+
+static u32 gen6_rps_pm_mask(struct drm_i915_private *i915, u8 val)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	u32 mask = 0;
+
+	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
+	if (val > rps->min_freq_softlimit)
+		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
+			 GEN6_PM_RP_DOWN_THRESHOLD |
+			 GEN6_PM_RP_DOWN_TIMEOUT);
+
+	if (val < rps->max_freq_softlimit)
+		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
+			 GEN6_PM_RP_UP_THRESHOLD);
+
+	mask &= i915->pm_rps_events;
+
+	return gen6_sanitize_rps_pm_mask(i915, ~mask);
+}
+
+/*
+ * gen6_set_rps is called to update the frequency request, but should also be
+ * called when the range (min_delay and max_delay) is modified so that we can
+ * update the GEN6_RP_INTERRUPT_LIMITS register accordingly.
+ */
+static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	/*
+	 * min/max delay may still have been modified so be sure to
+	 * write the limits value.
+	 */
+	if (val != rps->cur_freq) {
+		gen6_set_rps_thresholds(dev_priv, val);
+
+		if (INTEL_GEN(dev_priv) >= 9)
+			I915_WRITE(GEN6_RPNSWREQ,
+				   GEN9_FREQUENCY(val));
+		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+			I915_WRITE(GEN6_RPNSWREQ,
+				   HSW_FREQUENCY(val));
+		else
+			I915_WRITE(GEN6_RPNSWREQ,
+				   GEN6_FREQUENCY(val) |
+				   GEN6_OFFSET(0) |
+				   GEN6_AGGRESSIVE_TURBO);
+	}
+
+	/*
+	 * Make sure we continue to get interrupts
+	 * until we hit the minimum or maximum frequencies.
+	 */
+	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
+	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+
+	rps->cur_freq = val;
+	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
+
+	return 0;
+}
+
+static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	int err;
+
+	if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
+		      "Odd GPU freq value\n"))
+		val &= ~1;
+
+	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+
+	if (val != dev_priv->gt_pm.rps.cur_freq) {
+		vlv_punit_get(dev_priv);
+		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+		vlv_punit_put(dev_priv);
+		if (err)
+			return err;
+
+		gen6_set_rps_thresholds(dev_priv, val);
+	}
+
+	dev_priv->gt_pm.rps.cur_freq = val;
+	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
+
+	return 0;
+}
+
+/*
+ * vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
+ *
+ * If Gfx is Idle, then
+ * 1. Forcewake Media well.
+ * 2. Request idle freq.
+ * 3. Release Forcewake of Media well.
+ */
+static void vlv_set_rps_idle(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	u32 val = rps->idle_freq;
+	int err;
+
+	if (rps->cur_freq <= val)
+		return;
+
+	/*
+	 * The punit delays the write of the frequency and voltage until it
+	 * determines the GPU is awake. During normal usage we don't want to
+	 * waste power changing the frequency if the GPU is sleeping (rc6).
+	 * However, the GPU and driver is now idle and we do not want to delay
+	 * switching to minimum voltage (reducing power whilst idle) as we do
+	 * not expect to be woken in the near future and so must flush the
+	 * change by waking the device.
+	 *
+	 * We choose to take the media powerwell (either would do to trick the
+	 * punit into committing the voltage change) as that takes a lot less
+	 * power than the render powerwell.
+	 */
+	intel_uncore_forcewake_get(i915, FORCEWAKE_MEDIA);
+	err = valleyview_set_rps(i915, val);
+	intel_uncore_forcewake_put(i915, FORCEWAKE_MEDIA);
+
+	if (err)
+		DRM_ERROR("Failed to set RPS for idle\n");
+}
+
+void gen6_rps_busy(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	mutex_lock(&rps->lock);
+	if (rps->enabled) {
+		u8 freq;
+
+		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+			gen6_rps_reset_ei(dev_priv);
+		I915_WRITE(GEN6_PMINTRMSK,
+			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
+
+		gen6_enable_rps_interrupts(dev_priv);
+
+		/*
+		 * Use the user's desired frequency as a guide, but for better
+		 * performance, jump directly to RPe as our starting frequency.
+		 */
+		freq = max(rps->cur_freq,
+			   rps->efficient_freq);
+
+		if (intel_set_rps(dev_priv,
+				  clamp(freq,
+					rps->min_freq_softlimit,
+					rps->max_freq_softlimit)))
+			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
+	}
+	mutex_unlock(&rps->lock);
+}
+
+void gen6_rps_idle(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	/*
+	 * Flush our bottom-half so that it does not race with us
+	 * setting the idle frequency and so that it is bounded by
+	 * our rpm wakeref. And then disable the interrupts to stop any
+	 * futher RPS reclocking whilst we are asleep.
+	 */
+	gen6_disable_rps_interrupts(dev_priv);
+
+	mutex_lock(&rps->lock);
+	if (rps->enabled) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+			vlv_set_rps_idle(dev_priv);
+		else
+			gen6_set_rps(dev_priv, rps->idle_freq);
+		rps->last_adj = 0;
+		I915_WRITE(GEN6_PMINTRMSK,
+			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
+	}
+	mutex_unlock(&rps->lock);
+}
+
+void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
+{
+	struct intel_rps *rps = &rq->i915->gt_pm.rps;
+	unsigned long flags;
+	bool boost;
+
+	/*
+	 * This is intentionally racy! We peek at the state here, then
+	 * validate inside the RPS worker.
+	 */
+	if (!rps->enabled)
+		return;
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+		return;
+
+	/* Serializes with i915_request_retire() */
+	boost = false;
+	spin_lock_irqsave(&rq->lock, flags);
+	if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
+		boost = !atomic_fetch_inc(&rps->num_waiters);
+		rq->waitboost = true;
+	}
+	spin_unlock_irqrestore(&rq->lock, flags);
+	if (!boost)
+		return;
+
+	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
+		schedule_work(&rps->work);
+
+	atomic_inc(client ? &client->boosts : &rps->boosts);
+}
+
+int intel_set_rps(struct drm_i915_private *i915, u8 val)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	int err;
+
+	lockdep_assert_held(&rps->lock);
+	GEM_BUG_ON(val > rps->max_freq);
+	GEM_BUG_ON(val < rps->min_freq);
+
+	if (!rps->enabled) {
+		rps->cur_freq = val;
+		return 0;
+	}
+
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+		err = valleyview_set_rps(i915, val);
+	else
+		err = gen6_set_rps(i915, val);
+
+	return err;
+}
+
+static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+	I915_WRITE(GEN9_PG_ENABLE, 0);
+}
+
+static void gen9_disable_rps(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
+static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+}
+
+static void gen6_disable_rps(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
+	I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
+static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+}
+
+static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
+static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * We're doing forcewake before Disabling RC6,
+	 * This what the BIOS expects when going into suspend.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
+static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
+{
+	bool enable_rc6 = true;
+	unsigned long rc6_ctx_base;
+	u32 rc_ctl;
+	int rc_sw_target;
+
+	rc_ctl = I915_READ(GEN6_RC_CONTROL);
+	rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
+		       RC_SW_TARGET_STATE_SHIFT;
+	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
+			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
+			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
+			 rc_sw_target);
+
+	if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
+		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
+		enable_rc6 = false;
+	}
+
+	/*
+	 * The exact context size is not known for BXT, so assume a page size
+	 * for this check.
+	 */
+	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
+	if (!(rc6_ctx_base >= dev_priv->dsm_reserved.start &&
+	      rc6_ctx_base + PAGE_SIZE <= dev_priv->dsm_reserved.end + 1)) {
+		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
+		enable_rc6 = false;
+	}
+
+	if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
+	      ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
+	      ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
+	      ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
+		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
+	    !I915_READ(GEN8_PUSHBUS_ENABLE) ||
+	    !I915_READ(GEN8_PUSHBUS_SHIFT)) {
+		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!I915_READ(GEN6_GFXPAUSE)) {
+		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!I915_READ(GEN8_MISC_CTRL0)) {
+		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
+		enable_rc6 = false;
+	}
+
+	return enable_rc6;
+}
+
+static bool sanitize_rc6(struct drm_i915_private *i915)
+{
+	struct intel_device_info *info = mkwrite_device_info(i915);
+
+	/* Powersaving is controlled by the host when inside a VM */
+	if (intel_vgpu_active(i915))
+		info->has_rc6 = 0;
+
+	if (info->has_rc6 &&
+	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
+		DRM_INFO("RC6 disabled by BIOS\n");
+		info->has_rc6 = 0;
+	}
+
+	/*
+	 * We assume that we do not have any deep rc6 levels if we don't have
+	 * have the previous rc6 level supported, i.e. we use HAS_RC6()
+	 * as the initial coarse check for rc6 in general, moving on to
+	 * progressively finer/deeper levels.
+	 */
+	if (!info->has_rc6 && info->has_rc6p)
+		info->has_rc6p = 0;
+
+	return info->has_rc6;
+}
+
+static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	/* All of these values are in units of 50MHz */
+
+	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
+	if (IS_GEN9_LP(dev_priv)) {
+		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
+
+		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
+		rps->min_freq = (rp_state_cap >>  0) & 0xff;
+	} else {
+		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+
+		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
+		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
+		rps->min_freq = (rp_state_cap >> 16) & 0xff;
+	}
+	/* hw_max = RP0 until we check for overclocking */
+	rps->max_freq = rps->rp0_freq;
+
+	rps->efficient_freq = rps->rp1_freq;
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
+	    IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
+		u32 ddcc_status = 0;
+
+		if (sandybridge_pcode_read(dev_priv,
+					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
+					   &ddcc_status) == 0)
+			rps->efficient_freq =
+				clamp_t(u8,
+					((ddcc_status >> 8) & 0xff),
+					rps->min_freq,
+					rps->max_freq);
+	}
+
+	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+		/*
+		 * Store the frequency values in 16.66 MHZ units, which is
+		 * the natural hardware unit for SKL
+		 */
+		rps->rp0_freq *= GEN9_FREQ_SCALER;
+		rps->rp1_freq *= GEN9_FREQ_SCALER;
+		rps->min_freq *= GEN9_FREQ_SCALER;
+		rps->max_freq *= GEN9_FREQ_SCALER;
+		rps->efficient_freq *= GEN9_FREQ_SCALER;
+	}
+}
+
+static void reset_rps(struct drm_i915_private *i915,
+		      int (*set)(struct drm_i915_private *, u8))
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	u8 freq = rps->cur_freq;
+
+	/* force a reset */
+	rps->power = -1;
+	rps->cur_freq = -1;
+
+	if (set(i915, freq))
+		DRM_ERROR("Failed to reset RPS to initial values\n");
+}
+
+/* See the Gen9_GT_PM_Programming_Guide doc for the below */
+static void gen9_enable_rps(struct drm_i915_private *dev_priv)
+{
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* Program defaults and thresholds for RPS */
+	if (IS_GEN9(dev_priv))
+		I915_WRITE(GEN6_RC_VIDEO_FREQ,
+			   GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
+
+	/* 1 second timeout*/
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
+		   GT_INTERVAL_FROM_US(dev_priv, 1000000));
+
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
+
+	/*
+	 * Leaning on the below call to gen6_set_rps to program/setup the
+	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
+	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers.
+	 */
+	reset_rps(dev_priv, gen6_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 rc6_mode;
+
+	/* 1a: Software RC state - RC0 */
+	I915_WRITE(GEN6_RC_STATE, 0);
+
+	/*
+	 * 1b: Get forcewake during program sequence. Although the driver
+	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* 2a: Disable RC states. */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	/* 2b: Program RC6 thresholds.*/
+	if (INTEL_GEN(dev_priv) >= 10) {
+		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+		I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+	} else if (IS_SKYLAKE(dev_priv)) {
+		/*
+		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
+		 * when CPG is enabled
+		 */
+		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
+	} else {
+		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
+	}
+
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+
+	if (HAS_GUC(dev_priv))
+		I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
+
+	I915_WRITE(GEN6_RC_SLEEP, 0);
+
+	/*
+	 * 2c: Program Coarse Power Gating Policies.
+	 *
+	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+	 * use instead is a more conservative estimate for the maximum time
+	 * it takes us to service a CS interrupt and submit a new ELSP - that
+	 * is the time which the GPU is idle waiting for the CPU to select the
+	 * next request to execute. If the idle hysteresis is less than that
+	 * interrupt service latency, the hardware will automatically gate
+	 * the power well and we will then incur the wake up cost on top of
+	 * the service latency. A similar guide from intel_pstate is that we
+	 * do not want the enable hysteresis to less than the wakeup latency.
+	 *
+	 * igt/gem_exec_nop/sequential provides a rough estimate for the
+	 * service latency, and puts it around 10us for Broadwell (and other
+	 * big core) and around 40us for Broxton (and other low power cores).
+	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
+	 * However, the wakeup latency on Broxton is closer to 100us. To be
+	 * conservative, we have to factor in a context switch on top (due
+	 * to ksoftirqd).
+	 */
+	I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
+	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+
+	/* 3a: Enable RC6 */
+	I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
+
+	/* WaRsUseTimeoutMode:cnl (pre-prod) */
+	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
+		rc6_mode = GEN7_RC_CTL_TO_MODE;
+	else
+		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
+
+	I915_WRITE(GEN6_RC_CONTROL,
+		   GEN6_RC_CTL_HW_ENABLE |
+		   GEN6_RC_CTL_RC6_ENABLE |
+		   rc6_mode);
+
+	/*
+	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
+	 * WaRsDisableCoarsePowerGating:skl,cnl
+	 *  - Render/Media PG need to be disabled with RC6.
+	 */
+	if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
+		I915_WRITE(GEN9_PG_ENABLE, 0);
+	else
+		I915_WRITE(GEN9_PG_ENABLE,
+			   GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/* 1a: Software RC state - RC0 */
+	I915_WRITE(GEN6_RC_STATE, 0);
+
+	/*
+	 * 1b: Get forcewake during program sequence. Although the driver
+	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* 2a: Disable RC states. */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	/* 2b: Program RC6 thresholds.*/
+	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+	I915_WRITE(GEN6_RC_SLEEP, 0);
+	I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
+
+	/* 3: Enable RC6 */
+
+	I915_WRITE(GEN6_RC_CONTROL,
+		   GEN6_RC_CTL_HW_ENABLE |
+		   GEN7_RC_CTL_TO_MODE |
+		   GEN6_RC_CTL_RC6_ENABLE);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen8_enable_rps(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* 1 Program defaults and thresholds for RPS*/
+	I915_WRITE(GEN6_RPNSWREQ,
+		   HSW_FREQUENCY(rps->rp1_freq));
+	I915_WRITE(GEN6_RC_VIDEO_FREQ,
+		   HSW_FREQUENCY(rps->rp1_freq));
+	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1s timeout */
+
+	/* Docs recommend 900MHz, and 300 MHz respectively */
+	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
+		   rps->max_freq_softlimit << 24 |
+		   rps->min_freq_softlimit << 16);
+
+	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
+	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
+	I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
+	I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
+
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	/* 2: Enable RPS */
+	I915_WRITE(GEN6_RP_CONTROL,
+		   GEN6_RP_MEDIA_TURBO |
+		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+		   GEN6_RP_MEDIA_IS_GFX |
+		   GEN6_RP_ENABLE |
+		   GEN6_RP_UP_BUSY_AVG |
+		   GEN6_RP_DOWN_IDLE_AVG);
+
+	reset_rps(dev_priv, gen6_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen6_fix_rc6_voltage(struct drm_i915_private *i915)
+{
+	u32 rc6vids = 0;
+
+	if (sandybridge_pcode_read(i915,
+				   GEN6_PCODE_READ_RC6VIDS,
+				   &rc6vids)) {
+		DRM_DEBUG_DRIVER("Couldn't check for BIOS rc6 w/a\n");
+		return;
+	}
+
+	if (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450) {
+		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+				 GEN6_DECODE_RC6_VID(rc6vids & 0xff),
+				 450);
+
+		rc6vids &= 0xffff00;
+		rc6vids |= GEN6_ENCODE_RC6_VID(450);
+		if (sandybridge_pcode_write(i915,
+					    GEN6_PCODE_WRITE_RC6VIDS,
+					    rc6vids))
+			DRM_ERROR("Unable to correct rc6 voltage\n");
+	}
+}
+
+static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 gtfifodbg;
+	u32 rc6_mask;
+
+	I915_WRITE(GEN6_RC_STATE, 0);
+
+	/* Clear the DBG now so we don't confuse earlier errors */
+	gtfifodbg = I915_READ(GTFIFODBG);
+	if (gtfifodbg) {
+		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
+		I915_WRITE(GTFIFODBG, gtfifodbg);
+	}
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* disable the counters and set deterministic thresholds */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
+	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
+	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
+
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+
+	I915_WRITE(GEN6_RC_SLEEP, 0);
+	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
+	if (IS_IVYBRIDGE(dev_priv))
+		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
+	else
+		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
+	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
+	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
+
+	/* We don't use those on Haswell */
+	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+	if (HAS_RC6p(dev_priv))
+		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
+	if (HAS_RC6pp(dev_priv))
+		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
+	I915_WRITE(GEN6_RC_CONTROL,
+		   rc6_mask |
+		   GEN6_RC_CTL_EI_MODE(1) |
+		   GEN6_RC_CTL_HW_ENABLE);
+
+	if (IS_GEN6(dev_priv))
+		gen6_fix_rc6_voltage(dev_priv);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen6_enable_rps(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * Here begins a magic sequence of register writes to enable
+	 * auto-downclocking.
+	 *
+	 * Perhaps there might be some value in exposing these to
+	 * userspace...
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* Power down if completely idle for over 50ms */
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	reset_rps(dev_priv, gen6_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	struct cpufreq_policy *policy;
+	const unsigned int scaling_factor = 180 / 2;
+	unsigned int max_ia_freq, min_ring_freq;
+	unsigned int max_gpu_freq, min_gpu_freq;
+	unsigned int gpu_freq;
+	int min_freq = 15;
+
+	lockdep_assert_held(&rps->lock);
+
+	if (rps->max_freq <= rps->min_freq)
+		return;
+
+	policy = cpufreq_cpu_get(0);
+	if (policy) {
+		max_ia_freq = policy->cpuinfo.max_freq;
+		cpufreq_cpu_put(policy);
+	} else {
+		/*
+		 * Default to measured freq if none found, PCU will ensure we
+		 * don't go over
+		 */
+		max_ia_freq = tsc_khz;
+	}
+
+	/* Convert from kHz to MHz */
+	max_ia_freq /= 1000;
+
+	min_ring_freq = I915_READ(DCLK) & 0xf;
+	/* convert DDR frequency from units of 266.6MHz to bandwidth */
+	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
+
+	min_gpu_freq = rps->min_freq;
+	max_gpu_freq = rps->max_freq;
+	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) > 10) {
+		/* Convert GT frequency to 50 HZ units */
+		min_gpu_freq /= GEN9_FREQ_SCALER;
+		max_gpu_freq /= GEN9_FREQ_SCALER;
+	}
+
+	/*
+	 * For each potential GPU frequency, load a ring frequency we'd like
+	 * to use for memory access.  We do this by specifying the IA frequency
+	 * the PCU should use as a reference to determine the ring frequency.
+	 */
+	for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
+		int diff = max_gpu_freq - gpu_freq;
+		unsigned int ia_freq = 0, ring_freq = 0;
+
+		if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
+			/*
+			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
+			 * No floor required for ring frequency on SKL.
+			 */
+			ring_freq = gpu_freq;
+		} else if (INTEL_GEN(dev_priv) >= 8) {
+			/* max(2 * GT, DDR). NB: GT is 50MHz units */
+			ring_freq = max(min_ring_freq, gpu_freq);
+		} else if (IS_HASWELL(dev_priv)) {
+			ring_freq = mult_frac(gpu_freq, 5, 4);
+			ring_freq = max(min_ring_freq, ring_freq);
+			/* leave ia_freq as the default, chosen by cpufreq */
+		} else {
+			/* On older processors, there is no separate ring
+			 * clock domain, so in order to boost the bandwidth
+			 * of the ring, we need to upclock the CPU (ia_freq).
+			 *
+			 * For GPU frequencies less than 750MHz,
+			 * just use the lowest ring freq.
+			 */
+			if (gpu_freq < min_freq)
+				ia_freq = 800;
+			else
+				ia_freq = max_ia_freq - diff * scaling_factor;
+			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+		}
+
+		sandybridge_pcode_write(dev_priv,
+					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
+					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
+					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
+					gpu_freq);
+	}
+}
+
+static int cherryview_rps_max_freq(struct drm_i915_private *i915)
+{
+	u32 val, rp0;
+
+	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+	switch (INTEL_SSEU(i915)->eu_total) {
+	case 8:
+		/* (2 * 4) config */
+		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
+		break;
+	case 12:
+		/* (2 * 6) config */
+		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
+		break;
+	case 16:
+		/* (2 * 8) config */
+	default:
+		/* Setting (2 * 8) Min RP0 for any other combination */
+		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
+		break;
+	}
+
+	rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
+
+	return rp0;
+}
+
+static int cherryview_rps_rpe_freq(struct drm_i915_private *i915)
+{
+	u32 val, rpe;
+
+	val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
+	rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
+
+	return rpe;
+}
+
+static int cherryview_rps_guar_freq(struct drm_i915_private *i915)
+{
+	u32 val, rp1;
+
+	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+	rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
+
+	return rp1;
+}
+
+static u32 cherryview_rps_min_freq(struct drm_i915_private *i915)
+{
+	u32 val, rpn;
+
+	val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
+	rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
+		       FB_GFX_FREQ_FUSE_MASK);
+
+	return rpn;
+}
+
+static int valleyview_rps_guar_freq(struct drm_i915_private *i915)
+{
+	u32 val, rp1;
+
+	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+	rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
+
+	return rp1;
+}
+
+static int valleyview_rps_max_freq(struct drm_i915_private *i915)
+{
+	u32 val, rp0;
+
+	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
+	/* Clamp to max */
+	rp0 = min_t(u32, rp0, 0xea);
+
+	return rp0;
+}
+
+static int valleyview_rps_rpe_freq(struct drm_i915_private *i915)
+{
+	u32 val, rpe;
+
+	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
+	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
+	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
+	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
+
+	return rpe;
+}
+
+static int valleyview_rps_min_freq(struct drm_i915_private *i915)
+{
+	u32 val;
+
+	val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
+	/*
+	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
+	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
+	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
+	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
+	 * to make sure it matches what Punit accepts.
+	 */
+	return max_t(u32, val, 0xc0);
+}
+
+/* Check that the pctx buffer wasn't move under us. */
+static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
+{
+	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
+
+	WARN_ON(pctx_addr != dev_priv->dsm.start +
+			     dev_priv->vlv_pctx->stolen->start);
+}
+
+/* Check that the pcbr address is not empty. */
+static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
+{
+	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
+
+	WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
+}
+
+static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
+{
+	resource_size_t pctx_paddr, paddr;
+	resource_size_t pctx_size = 32 * 1024;
+	u32 pcbr;
+
+	pcbr = I915_READ(VLV_PCBR);
+	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
+		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+		paddr = dev_priv->dsm.end - pctx_size + 1;
+		GEM_BUG_ON(paddr > U32_MAX);
+
+		pctx_paddr = (paddr & (~4095));
+		I915_WRITE(VLV_PCBR, pctx_paddr);
+	}
+
+	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
+}
+
+static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
+{
+	struct drm_i915_gem_object *pctx;
+	resource_size_t pctx_paddr;
+	resource_size_t pctx_size = 24 * 1024;
+	u32 pcbr;
+
+	pcbr = I915_READ(VLV_PCBR);
+	if (pcbr) {
+		/* BIOS set it up already, grab the pre-alloc'd space */
+		resource_size_t pcbr_offset;
+
+		pcbr_offset = round_down(pcbr, 4096) - dev_priv->dsm.start;
+		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
+								      pcbr_offset,
+								      I915_GTT_OFFSET_NONE,
+								      pctx_size);
+		goto out;
+	}
+
+	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+
+	/*
+	 * From the Gunit register HAS:
+	 * The Gfx driver is expected to program this register and ensure
+	 * proper allocation within Gfx stolen memory.  For example, this
+	 * register should be programmed such than the PCBR range does not
+	 * overlap with other ranges, such as the frame buffer, protected
+	 * memory, or any other relevant ranges.
+	 */
+	pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
+	if (!pctx) {
+		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+		goto out;
+	}
+
+	GEM_BUG_ON(range_overflows_t(u64,
+				     dev_priv->dsm.start,
+				     pctx->stolen->start,
+				     U32_MAX));
+	pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
+	I915_WRITE(VLV_PCBR, pctx_paddr);
+
+out:
+	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
+	dev_priv->vlv_pctx = pctx;
+}
+
+static void valleyview_cleanup_pctx(struct drm_i915_private *i915)
+{
+	if (WARN_ON(!i915->vlv_pctx))
+		return;
+
+	i915_gem_object_put(i915->vlv_pctx);
+	i915->vlv_pctx = NULL;
+}
+
+static void vlv_init_gpll_ref_freq(struct drm_i915_private *i915)
+{
+	i915->gt_pm.rps.gpll_ref_freq =
+		vlv_get_cck_clock(i915, "GPLL ref",
+				  CCK_GPLL_CLOCK_CONTROL,
+				  i915->czclk_freq);
+
+	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
+			 i915->gt_pm.rps.gpll_ref_freq);
+}
+
+static void valleyview_init_gt_powersave(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	u32 val;
+
+	valleyview_setup_pctx(i915);
+
+	vlv_iosf_sb_get(i915,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
+	vlv_init_gpll_ref_freq(i915);
+
+	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+	switch ((val >> 6) & 3) {
+	case 0:
+	case 1:
+		i915->mem_freq = 800;
+		break;
+	case 2:
+		i915->mem_freq = 1066;
+		break;
+	case 3:
+		i915->mem_freq = 1333;
+		break;
+	}
+	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+	rps->max_freq = valleyview_rps_max_freq(i915);
+	rps->rp0_freq = rps->max_freq;
+	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(i915, rps->max_freq),
+			 rps->max_freq);
+
+	rps->efficient_freq = valleyview_rps_rpe_freq(i915);
+	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(i915, rps->efficient_freq),
+			 rps->efficient_freq);
+
+	rps->rp1_freq = valleyview_rps_guar_freq(i915);
+	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(i915, rps->rp1_freq),
+			 rps->rp1_freq);
+
+	rps->min_freq = valleyview_rps_min_freq(i915);
+	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(i915, rps->min_freq),
+			 rps->min_freq);
+
+	vlv_iosf_sb_put(i915,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+}
+
+static void cherryview_init_gt_powersave(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	u32 val;
+
+	cherryview_setup_pctx(i915);
+
+	vlv_iosf_sb_get(i915,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
+	vlv_init_gpll_ref_freq(i915);
+
+	val = vlv_cck_read(i915, CCK_FUSE_REG);
+
+	switch ((val >> 2) & 0x7) {
+	case 3:
+		i915->mem_freq = 2000;
+		break;
+	default:
+		i915->mem_freq = 1600;
+		break;
+	}
+	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+	rps->max_freq = cherryview_rps_max_freq(i915);
+	rps->rp0_freq = rps->max_freq;
+	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(i915, rps->max_freq),
+			 rps->max_freq);
+
+	rps->efficient_freq = cherryview_rps_rpe_freq(i915);
+	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(i915, rps->efficient_freq),
+			 rps->efficient_freq);
+
+	rps->rp1_freq = cherryview_rps_guar_freq(i915);
+	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(i915, rps->rp1_freq),
+			 rps->rp1_freq);
+
+	rps->min_freq = cherryview_rps_min_freq(i915);
+	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(i915, rps->min_freq),
+			 rps->min_freq);
+
+	vlv_iosf_sb_put(i915,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
+	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
+		   rps->min_freq) & 1,
+		  "Odd GPU freq values\n");
+}
+
+static void valleyview_cleanup_gt_powersave(struct drm_i915_private *i915)
+{
+	valleyview_cleanup_pctx(i915);
+}
+
+static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 gtfifodbg, rc6_mode, pcbr;
+
+	gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
+					     GT_FIFO_FREE_ENTRIES_CHV);
+	if (gtfifodbg) {
+		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
+				 gtfifodbg);
+		I915_WRITE(GTFIFODBG, gtfifodbg);
+	}
+
+	cherryview_check_pctx(dev_priv);
+
+	/*
+	 * 1a & 1b: Get forcewake during program sequence. Although the driver
+	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/*  Disable RC states. */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	/* 2a: Program RC6 thresholds.*/
+	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+	I915_WRITE(GEN6_RC_SLEEP, 0);
+
+	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
+	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
+
+	/* Allows RC6 residency counter to work */
+	I915_WRITE(VLV_COUNTER_CONTROL,
+		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+				      VLV_MEDIA_RC6_COUNT_EN |
+				      VLV_RENDER_RC6_COUNT_EN));
+
+	/* For now we assume BIOS is allocating and populating the PCBR  */
+	pcbr = I915_READ(VLV_PCBR);
+
+	/* 3: Enable RC6 */
+	rc6_mode = 0;
+	if (pcbr >> VLV_PCBR_ADDR_SHIFT)
+		rc6_mode = GEN7_RC_CTL_TO_MODE;
+	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* 1: Program defaults and thresholds for RPS*/
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
+	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
+	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
+	I915_WRITE(GEN6_RP_UP_EI, 66000);
+	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
+
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	/* 2: Enable RPS */
+	I915_WRITE(GEN6_RP_CONTROL,
+		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+		   GEN6_RP_MEDIA_IS_GFX |
+		   GEN6_RP_ENABLE |
+		   GEN6_RP_UP_BUSY_AVG |
+		   GEN6_RP_DOWN_IDLE_AVG);
+
+	/* Setting Fixed Bias */
+	vlv_punit_get(dev_priv);
+
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
+	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
+	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+
+	vlv_punit_put(dev_priv);
+
+	/* RPS code assumes GPLL is used */
+	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+	reset_rps(dev_priv, valleyview_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 gtfifodbg;
+
+	valleyview_check_pctx(dev_priv);
+
+	gtfifodbg = I915_READ(GTFIFODBG);
+	if (gtfifodbg) {
+		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
+				 gtfifodbg);
+		I915_WRITE(GTFIFODBG, gtfifodbg);
+	}
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/*  Disable RC states. */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
+
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+
+	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
+
+	/* Allows RC6 residency counter to work */
+	I915_WRITE(VLV_COUNTER_CONTROL,
+		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+				      VLV_MEDIA_RC0_COUNT_EN |
+				      VLV_RENDER_RC0_COUNT_EN |
+				      VLV_MEDIA_RC6_COUNT_EN |
+				      VLV_RENDER_RC6_COUNT_EN));
+
+	I915_WRITE(GEN6_RC_CONTROL,
+		   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
+	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
+	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
+	I915_WRITE(GEN6_RP_UP_EI, 66000);
+	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
+
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	I915_WRITE(GEN6_RP_CONTROL,
+		   GEN6_RP_MEDIA_TURBO |
+		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+		   GEN6_RP_MEDIA_IS_GFX |
+		   GEN6_RP_ENABLE |
+		   GEN6_RP_UP_BUSY_AVG |
+		   GEN6_RP_DOWN_IDLE_CONT);
+
+	vlv_punit_get(dev_priv);
+
+	/* Setting Fixed Bias */
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
+	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
+	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+
+	vlv_punit_put(dev_priv);
+
+	/* RPS code assumes GPLL is used */
+	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+	reset_rps(dev_priv, valleyview_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static unsigned int intel_pxfreq(u32 vidfreq)
+{
+	unsigned int div = (vidfreq & 0x3f0000) >> 16;
+	unsigned int post = (vidfreq & 0x3000) >> 12;
+	unsigned int pre = (vidfreq & 0x7);
+
+	if (!pre)
+		return 0;
+
+	return (div * 133333) / (pre << post);
+}
+
+static const struct cparams {
+	u16 i;
+	u16 t;
+	u16 m;
+	u16 c;
+} cparams[] = {
+	{ 1, 1333, 301, 28664 },
+	{ 1, 1066, 294, 24460 },
+	{ 1, 800, 294, 25192 },
+	{ 0, 1333, 276, 27605 },
+	{ 0, 1066, 276, 27605 },
+	{ 0, 800, 231, 23784 },
+};
+
+static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
+{
+	u64 total_count, diff, ret;
+	u32 count1, count2, count3, m = 0, c = 0;
+	unsigned long now = jiffies_to_msecs(jiffies), diff1;
+	int i;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	diff1 = now - dev_priv->ips.last_time1;
+
+	/*
+	 * Prevent division-by-zero if we are asking too fast.
+	 * Also, we don't get interesting results if we are polling
+	 * faster than once in 10ms, so just return the saved value
+	 * in such cases.
+	 */
+	if (diff1 <= 10)
+		return dev_priv->ips.chipset_power;
+
+	count1 = I915_READ(DMIEC);
+	count2 = I915_READ(DDREC);
+	count3 = I915_READ(CSIEC);
+
+	total_count = count1 + count2 + count3;
+
+	/* FIXME: handle per-counter overflow */
+	if (total_count < dev_priv->ips.last_count1) {
+		diff = ~0UL - dev_priv->ips.last_count1;
+		diff += total_count;
+	} else {
+		diff = total_count - dev_priv->ips.last_count1;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
+		if (cparams[i].i == dev_priv->ips.c_m &&
+		    cparams[i].t == dev_priv->ips.r_t) {
+			m = cparams[i].m;
+			c = cparams[i].c;
+			break;
+		}
+	}
+
+	diff = div_u64(diff, diff1);
+	ret = ((m * diff) + c);
+	ret = div_u64(ret, 10);
+
+	dev_priv->ips.last_count1 = total_count;
+	dev_priv->ips.last_time1 = now;
+
+	dev_priv->ips.chipset_power = ret;
+
+	return ret;
+}
+
+unsigned long i915_chipset_val(struct drm_i915_private *i915)
+{
+	unsigned long val;
+
+	if (!IS_GEN5(i915))
+		return 0;
+
+	intel_runtime_pm_get(i915);
+	spin_lock_irq(&mchdev_lock);
+
+	val = __i915_chipset_val(i915);
+
+	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(i915);
+
+	return val;
+}
+
+unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
+{
+	unsigned long m, x, b;
+	u32 tsfs;
+
+	tsfs = I915_READ(TSFS);
+
+	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
+	x = I915_READ8(TR1);
+
+	b = tsfs & TSFS_INTR_MASK;
+
+	return ((m * x) / 127) - b;
+}
+
+static int _pxvid_to_vd(u8 pxvid)
+{
+	if (pxvid == 0)
+		return 0;
+
+	if (pxvid >= 8 && pxvid < 31)
+		pxvid = 31;
+
+	return (pxvid + 2) * 125;
+}
+
+static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
+{
+	const int vd = _pxvid_to_vd(pxvid);
+	const int vm = vd - 1125;
+
+	if (IS_MOBILE(i915))
+		return vm > 0 ? vm : 0;
+
+	return vd;
+}
+
+static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
+{
+	u64 now, diff, diffms;
+	u32 count;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	now = ktime_get_raw_ns();
+	diffms = now - dev_priv->ips.last_time2;
+	do_div(diffms, NSEC_PER_MSEC);
+
+	/* Don't divide by 0 */
+	if (!diffms)
+		return;
+
+	count = I915_READ(GFXEC);
+
+	if (count < dev_priv->ips.last_count2) {
+		diff = ~0UL - dev_priv->ips.last_count2;
+		diff += count;
+	} else {
+		diff = count - dev_priv->ips.last_count2;
+	}
+
+	dev_priv->ips.last_count2 = count;
+	dev_priv->ips.last_time2 = now;
+
+	/* More magic constants... */
+	diff = diff * 1181;
+	diff = div_u64(diff, diffms * 10);
+	dev_priv->ips.gfx_power = diff;
+}
+
+void i915_update_gfx_val(struct drm_i915_private *i915)
+{
+	if (!IS_GEN5(i915))
+		return;
+
+	intel_runtime_pm_get(i915);
+	spin_lock_irq(&mchdev_lock);
+
+	__i915_update_gfx_val(i915);
+
+	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(i915);
+}
+
+static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
+{
+	unsigned long t, corr, state1, corr2, state2;
+	u32 pxvid, ext_v;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
+	pxvid = (pxvid >> 24) & 0x7f;
+	ext_v = pvid_to_extvid(dev_priv, pxvid);
+
+	state1 = ext_v;
+
+	t = i915_mch_val(dev_priv);
+
+	/* Revel in the empirically derived constants */
+
+	/* Correction factor in 1/100000 units */
+	if (t > 80)
+		corr = ((t * 2349) + 135940);
+	else if (t >= 50)
+		corr = ((t * 964) + 29317);
+	else /* < 50 */
+		corr = ((t * 301) + 1004);
+
+	corr = corr * ((150142 * state1) / 10000 - 78642);
+	corr /= 100000;
+	corr2 = (corr * dev_priv->ips.corr);
+
+	state2 = (corr2 * state1) / 10000;
+	state2 /= 100; /* convert to mW */
+
+	__i915_update_gfx_val(dev_priv);
+
+	return dev_priv->ips.gfx_power + state2;
+}
+
+unsigned long i915_gfx_val(struct drm_i915_private *i915)
+{
+	unsigned long val;
+
+	if (!IS_GEN5(i915))
+		return 0;
+
+	intel_runtime_pm_get(i915);
+	spin_lock_irq(&mchdev_lock);
+
+	val = __i915_gfx_val(i915);
+
+	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(i915);
+
+	return val;
+}
+
+static struct drm_i915_private *i915_mch_dev;
+
+static struct drm_i915_private *mchdev_get(void)
+{
+	struct drm_i915_private *i915;
+
+	rcu_read_lock();
+	i915 = i915_mch_dev;
+	if (!kref_get_unless_zero(&i915->drm.ref))
+		i915 = NULL;
+	rcu_read_unlock();
+
+	return i915;
+}
+
+/**
+ * i915_read_mch_val - return value for IPS use
+ *
+ * Calculate and return a value for the IPS driver to use when deciding whether
+ * we have thermal and power headroom to increase CPU or GPU power budget.
+ */
+unsigned long i915_read_mch_val(void)
+{
+	struct drm_i915_private *i915;
+	unsigned long chipset_val, graphics_val;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return 0;
+
+	intel_runtime_pm_get(i915);
+	spin_lock_irq(&mchdev_lock);
+	chipset_val = __i915_chipset_val(i915);
+	graphics_val = __i915_gfx_val(i915);
+	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(i915);
+
+	drm_dev_put(&i915->drm);
+	return chipset_val + graphics_val;
+}
+EXPORT_SYMBOL_GPL(i915_read_mch_val);
+
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+	struct drm_i915_private *i915;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay > i915->ips.fmax)
+		i915->ips.max_delay--;
+	spin_unlock_irq(&mchdev_lock);
+
+	drm_dev_put(&i915->drm);
+	return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_raise);
+
+/**
+ * i915_gpu_lower - lower GPU frequency limit
+ *
+ * IPS indicates we're close to a thermal limit, so throttle back the GPU
+ * frequency maximum.
+ */
+bool i915_gpu_lower(void)
+{
+	struct drm_i915_private *i915;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay < i915->ips.min_delay)
+		i915->ips.max_delay++;
+	spin_unlock_irq(&mchdev_lock);
+
+	drm_dev_put(&i915->drm);
+	return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_lower);
+
+/**
+ * i915_gpu_busy - indicate GPU business to IPS
+ *
+ * Tell the IPS driver whether or not the GPU is busy.
+ */
+bool i915_gpu_busy(void)
+{
+	struct drm_i915_private *i915;
+	bool ret;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	ret = i915->gt.awake;
+
+	drm_dev_put(&i915->drm);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_busy);
+
+/**
+ * i915_gpu_turbo_disable - disable graphics turbo
+ *
+ * Disable graphics turbo by resetting the max frequency and setting the
+ * current frequency to the default.
+ */
+bool i915_gpu_turbo_disable(void)
+{
+	struct drm_i915_private *i915;
+	bool ret;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	spin_lock_irq(&mchdev_lock);
+	i915->ips.max_delay = i915->ips.fstart;
+	ret = ironlake_set_drps(i915, i915->ips.fstart);
+	spin_unlock_irq(&mchdev_lock);
+
+	drm_dev_put(&i915->drm);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
+
+/**
+ * Tells the intel_ips driver that the i915 driver is now loaded, if
+ * IPS got loaded first.
+ *
+ * This awkward dance is so that neither module has to depend on the
+ * other in order for IPS to do the appropriate communication of
+ * GPU turbo limits to i915.
+ */
+static void
+ips_ping_for_i915_load(void)
+{
+	void (*link)(void);
+
+	link = symbol_get(ips_link_to_i915_driver);
+	if (link) {
+		link();
+		symbol_put(ips_link_to_i915_driver);
+	}
+}
+
+void intel_gpu_ips_init(struct drm_i915_private *i915)
+{
+	/*
+	 * We only register the i915 ips part with intel-ips once everything is
+	 * set up, to avoid intel-ips sneaking in and reading bogus values.
+	 */
+	rcu_assign_pointer(i915_mch_dev, i915);
+
+	ips_ping_for_i915_load();
+}
+
+void intel_gpu_ips_teardown(void)
+{
+	rcu_assign_pointer(i915_mch_dev, NULL);
+}
+
+static void intel_init_emon(struct drm_i915_private *dev_priv)
+{
+	u32 lcfuse;
+	u8 pxw[16];
+	int i;
+
+	/* Disable to program */
+	I915_WRITE(ECR, 0);
+	POSTING_READ(ECR);
+
+	/* Program energy weights for various events */
+	I915_WRITE(SDEW, 0x15040d00);
+	I915_WRITE(CSIEW0, 0x007f0000);
+	I915_WRITE(CSIEW1, 0x1e220004);
+	I915_WRITE(CSIEW2, 0x04000004);
+
+	for (i = 0; i < 5; i++)
+		I915_WRITE(PEW(i), 0);
+	for (i = 0; i < 3; i++)
+		I915_WRITE(DEW(i), 0);
+
+	/* Program P-state weights to account for frequency power adjustment */
+	for (i = 0; i < 16; i++) {
+		u32 pxvidfreq = I915_READ(PXVFREQ(i));
+		unsigned long freq = intel_pxfreq(pxvidfreq);
+		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
+			PXVFREQ_PX_SHIFT;
+		unsigned long val;
+
+		val = vid * vid;
+		val *= freq / 1000;
+		val *= 255;
+		val /= 127 * 127 * 900;
+		if (val > 0xff)
+			DRM_ERROR("bad pxval: %ld\n", val);
+		pxw[i] = val;
+	}
+	/* Render standby states get 0 weight */
+	pxw[14] = 0;
+	pxw[15] = 0;
+
+	for (i = 0; i < 4; i++)
+		I915_WRITE(PXW(i),
+			   pxw[4 * i + 0] << 24 |
+			   pxw[4 * i + 1] << 16 |
+			   pxw[4 * i + 2] << 8 |
+			   pxw[4 * i + 3] << 0);
+
+	/* Adjust magic regs to magic values (more experimental results) */
+	I915_WRITE(OGW0, 0);
+	I915_WRITE(OGW1, 0);
+	I915_WRITE(EG0, 0x00007f00);
+	I915_WRITE(EG1, 0x0000000e);
+	I915_WRITE(EG2, 0x000e0000);
+	I915_WRITE(EG3, 0x68000300);
+	I915_WRITE(EG4, 0x42000000);
+	I915_WRITE(EG5, 0x00140031);
+	I915_WRITE(EG6, 0);
+	I915_WRITE(EG7, 0);
+
+	for (i = 0; i < 8; i++)
+		I915_WRITE(PXWL(i), 0);
+
+	/* Enable PMON + select events */
+	I915_WRITE(ECR, 0x80000019);
+
+	lcfuse = I915_READ(LCFUSE02);
+
+	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
+}
+
+void intel_init_gt_powersave(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+
+	mutex_init(&rps->lock);
+
+	/*
+	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
+	 * requirement.
+	 */
+	if (!sanitize_rc6(i915)) {
+		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
+		intel_runtime_pm_get(i915);
+	}
+
+	mutex_lock(&rps->lock);
+
+	/* Initialize RPS limits (for userspace) */
+	if (IS_CHERRYVIEW(i915))
+		cherryview_init_gt_powersave(i915);
+	else if (IS_VALLEYVIEW(i915))
+		valleyview_init_gt_powersave(i915);
+	else if (INTEL_GEN(i915) >= 6)
+		gen6_init_rps_frequencies(i915);
+
+	/* Derive initial user preferences/limits from the hardware limits */
+	rps->idle_freq = rps->min_freq;
+	rps->cur_freq = rps->idle_freq;
+
+	rps->max_freq_softlimit = rps->max_freq;
+	rps->min_freq_softlimit = rps->min_freq;
+
+	if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+		rps->min_freq_softlimit =
+			max_t(int,
+			      rps->efficient_freq,
+			      intel_freq_opcode(i915, 450));
+
+	/* After setting max-softlimit, find the overclock max freq */
+	if (IS_GEN6(i915) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
+		u32 params = 0;
+
+		sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, &params);
+		if (params & BIT(31)) { /* OC supported */
+			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+					 (rps->max_freq & 0xff) * 50,
+					 (params & 0xff) * 50);
+			rps->max_freq = params & 0xff;
+		}
+	}
+
+	/* Finally allow us to boost to max by default */
+	rps->boost_freq = rps->max_freq;
+
+	mutex_unlock(&rps->lock);
+}
+
+void intel_cleanup_gt_powersave(struct drm_i915_private *i915)
+{
+	if (IS_VALLEYVIEW(i915))
+		valleyview_cleanup_gt_powersave(i915);
+
+	if (!HAS_RC6(i915))
+		intel_runtime_pm_put(i915);
+}
+
+/**
+ * intel_suspend_gt_powersave - suspend PM work and helper threads
+ * @i915: i915 device
+ *
+ * We don't want to disable RC6 or other features here, we just want
+ * to make sure any work we've queued has finished and won't bother
+ * us while we're suspended.
+ */
+void intel_suspend_gt_powersave(struct drm_i915_private *i915)
+{
+	if (INTEL_GEN(i915) < 6)
+		return;
+
+	/* gen6_rps_idle() will be called later to disable interrupts */
+}
+
+void intel_sanitize_gt_powersave(struct drm_i915_private *i915)
+{
+	i915->gt_pm.rps.enabled = true; /* force RPS disabling */
+	i915->gt_pm.rc6.enabled = true; /* force RC6 disabling */
+	intel_disable_gt_powersave(i915);
+
+	if (INTEL_GEN(i915) >= 11)
+		gen11_reset_rps_interrupts(i915);
+	else
+		gen6_reset_rps_interrupts(i915);
+}
+
+static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (!i915->gt_pm.llc_pstate.enabled)
+		return;
+
+	/* Currently there is no HW configuration to be done to disable. */
+
+	i915->gt_pm.llc_pstate.enabled = false;
+}
+
+static void intel_disable_rc6(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (!i915->gt_pm.rc6.enabled)
+		return;
+
+	if (INTEL_GEN(i915) >= 9)
+		gen9_disable_rc6(i915);
+	else if (IS_CHERRYVIEW(i915))
+		cherryview_disable_rc6(i915);
+	else if (IS_VALLEYVIEW(i915))
+		valleyview_disable_rc6(i915);
+	else if (INTEL_GEN(i915) >= 6)
+		gen6_disable_rc6(i915);
+
+	i915->gt_pm.rc6.enabled = false;
+}
+
+static void intel_disable_rps(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (!i915->gt_pm.rps.enabled)
+		return;
+
+	if (INTEL_GEN(i915) >= 9)
+		gen9_disable_rps(i915);
+	else if (IS_CHERRYVIEW(i915))
+		cherryview_disable_rps(i915);
+	else if (IS_VALLEYVIEW(i915))
+		valleyview_disable_rps(i915);
+	else if (INTEL_GEN(i915) >= 6)
+		gen6_disable_rps(i915);
+	else if (IS_IRONLAKE_M(i915))
+		ironlake_disable_drps(i915);
+
+	i915->gt_pm.rps.enabled = false;
+}
+
+void intel_disable_gt_powersave(struct drm_i915_private *i915)
+{
+	mutex_lock(&i915->gt_pm.rps.lock);
+
+	intel_disable_rc6(i915);
+	intel_disable_rps(i915);
+	if (HAS_LLC(i915))
+		intel_disable_llc_pstate(i915);
+
+	mutex_unlock(&i915->gt_pm.rps.lock);
+}
+
+static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (i915->gt_pm.llc_pstate.enabled)
+		return;
+
+	gen6_update_ring_freq(i915);
+
+	i915->gt_pm.llc_pstate.enabled = true;
+}
+
+static void intel_enable_rc6(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (i915->gt_pm.rc6.enabled)
+		return;
+
+	if (IS_CHERRYVIEW(i915))
+		cherryview_enable_rc6(i915);
+	else if (IS_VALLEYVIEW(i915))
+		valleyview_enable_rc6(i915);
+	else if (INTEL_GEN(i915) >= 9)
+		gen9_enable_rc6(i915);
+	else if (IS_BROADWELL(i915))
+		gen8_enable_rc6(i915);
+	else if (INTEL_GEN(i915) >= 6)
+		gen6_enable_rc6(i915);
+
+	i915->gt_pm.rc6.enabled = true;
+}
+
+static void intel_enable_rps(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+
+	lockdep_assert_held(&rps->lock);
+
+	if (rps->enabled)
+		return;
+
+	if (IS_CHERRYVIEW(i915)) {
+		cherryview_enable_rps(i915);
+	} else if (IS_VALLEYVIEW(i915)) {
+		valleyview_enable_rps(i915);
+	} else if (INTEL_GEN(i915) >= 9) {
+		gen9_enable_rps(i915);
+	} else if (IS_BROADWELL(i915)) {
+		gen8_enable_rps(i915);
+	} else if (INTEL_GEN(i915) >= 6) {
+		gen6_enable_rps(i915);
+	} else if (IS_IRONLAKE_M(i915)) {
+		ironlake_enable_drps(i915);
+		intel_init_emon(i915);
+	}
+
+	WARN_ON(rps->max_freq < rps->min_freq);
+	WARN_ON(rps->idle_freq > rps->max_freq);
+
+	WARN_ON(rps->efficient_freq < rps->min_freq);
+	WARN_ON(rps->efficient_freq > rps->max_freq);
+
+	rps->enabled = true;
+}
+
+void intel_enable_gt_powersave(struct drm_i915_private *i915)
+{
+	/* Powersaving is controlled by the host when inside a VM */
+	if (intel_vgpu_active(i915))
+		return;
+
+	mutex_lock(&i915->gt_pm.rps.lock);
+
+	if (HAS_RC6(i915))
+		intel_enable_rc6(i915);
+	intel_enable_rps(i915);
+	if (HAS_LLC(i915))
+		intel_enable_llc_pstate(i915);
+
+	mutex_unlock(&i915->gt_pm.rps.lock);
+}
+
+static int byt_gpu_freq(const struct drm_i915_private *i915, int val)
+{
+	const struct intel_rps *rps = &i915->gt_pm.rps;
+
+	/*
+	 * N = val - 0xb7
+	 * Slow = Fast = GPLL ref * N
+	 */
+	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
+}
+
+static int byt_freq_opcode(const struct drm_i915_private *i915, int val)
+{
+	const struct intel_rps *rps = &i915->gt_pm.rps;
+
+	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
+}
+
+static int chv_gpu_freq(const struct drm_i915_private *i915, int val)
+{
+	const struct intel_rps *rps = &i915->gt_pm.rps;
+
+	/*
+	 * N = val / 2
+	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
+	 */
+	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
+}
+
+static int chv_freq_opcode(const struct drm_i915_private *i915, int val)
+{
+	const struct intel_rps *rps = &i915->gt_pm.rps;
+
+	/* CHV needs even values */
+	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
+}
+
+int intel_gpu_freq(const struct drm_i915_private *i915, int val)
+{
+	if (INTEL_GEN(i915) >= 9)
+		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
+					 GEN9_FREQ_SCALER);
+	else if (IS_CHERRYVIEW(i915))
+		return chv_gpu_freq(i915, val);
+	else if (IS_VALLEYVIEW(i915))
+		return byt_gpu_freq(i915, val);
+	else
+		return val * GT_FREQUENCY_MULTIPLIER;
+}
+
+int intel_freq_opcode(const struct drm_i915_private *i915, int val)
+{
+	if (INTEL_GEN(i915) >= 9)
+		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
+					 GT_FREQUENCY_MULTIPLIER);
+	else if (IS_CHERRYVIEW(i915))
+		return chv_freq_opcode(i915, val);
+	else if (IS_VALLEYVIEW(i915))
+		return byt_freq_opcode(i915, val);
+	else
+		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
+}
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
new file mode 100644
index 000000000000..20e937d6c7e0
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -0,0 +1,105 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2012-2018 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_PM_H__
+#define __INTEL_GT_PM_H__
+
+struct drm_i915_private;
+struct i915_request;
+struct intel_rps_client;
+
+struct intel_rps_ei {
+	ktime_t ktime;
+	u32 render_c0;
+	u32 media_c0;
+};
+
+struct intel_rps {
+	struct mutex lock;
+
+	/*
+	 * work, interrupts_enabled and pm_iir are protected by
+	 * i915->irq_lock
+	 */
+	struct work_struct work;
+	bool interrupts_enabled;
+	u32 pm_iir;
+
+	/* PM interrupt bits that should never be masked */
+	u32 pm_intrmsk_mbz;
+
+	/*
+	 * Frequencies are stored in potentially platform dependent multiples.
+	 * In other words, *_freq needs to be multiplied by X to be interesting.
+	 * Soft limits are those which are used for the dynamic reclocking done
+	 * by the driver (raise frequencies under heavy loads, and lower for
+	 * lighter loads). Hard limits are those imposed by the hardware.
+	 *
+	 * A distinction is made for overclocking, which is never enabled by
+	 * default, and is considered to be above the hard limit if it's
+	 * possible at all.
+	 */
+	u8 cur_freq;		/* Current frequency (cached, may not == HW) */
+	u8 min_freq_softlimit;	/* Minimum frequency permitted by the driver */
+	u8 max_freq_softlimit;	/* Max frequency permitted by the driver */
+	u8 max_freq;		/* Maximum frequency, RP0 if not overclocking */
+	u8 min_freq;		/* AKA RPn. Minimum frequency */
+	u8 boost_freq;		/* Frequency to request when wait boosting */
+	u8 idle_freq;		/* Frequency to request when we are idle */
+	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
+	u8 rp1_freq;		/* "less than" RP0 power/freqency */
+	u8 rp0_freq;		/* Non-overclocked max frequency. */
+	u16 gpll_ref_freq;	/* vlv/chv GPLL reference frequency */
+
+	u8 up_threshold; /* Current %busy required to uplock */
+	u8 down_threshold; /* Current %busy required to downclock */
+
+	int last_adj;
+	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
+
+	bool enabled;
+	atomic_t num_waiters;
+	atomic_t boosts;
+
+	/* manual wa residency calculations */
+	struct intel_rps_ei ei;
+};
+
+struct intel_rc6 {
+	bool enabled;
+	u64 prev_hw_residency[4];
+	u64 cur_residency[4];
+};
+
+struct intel_llc_pstate {
+	bool enabled;
+};
+
+struct intel_gt_pm {
+	struct intel_rps rps;
+	struct intel_rc6 rc6;
+	struct intel_llc_pstate llc_pstate;
+};
+
+void intel_gpu_ips_init(struct drm_i915_private *i915);
+void intel_gpu_ips_teardown(void);
+
+void intel_init_gt_powersave(struct drm_i915_private *i915);
+void intel_cleanup_gt_powersave(struct drm_i915_private *i915);
+void intel_sanitize_gt_powersave(struct drm_i915_private *i915);
+void intel_enable_gt_powersave(struct drm_i915_private *i915);
+void intel_disable_gt_powersave(struct drm_i915_private *i915);
+void intel_suspend_gt_powersave(struct drm_i915_private *i915);
+
+void gen6_rps_busy(struct drm_i915_private *i915);
+void gen6_rps_reset_ei(struct drm_i915_private *i915);
+void gen6_rps_idle(struct drm_i915_private *i915);
+void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
+
+int intel_gpu_freq(const struct drm_i915_private *i915, int val);
+int intel_freq_opcode(const struct drm_i915_private *i915, int val);
+
+#endif /* __INTEL_GT_PM_H__ */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f94fc65c5149..b12290db300c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -34,27 +34,6 @@
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_sideband.h"
-#include "../../../platform/x86/intel_ips.h"
-
-/**
- * DOC: RC6
- *
- * RC6 is a special power stage which allows the GPU to enter an very
- * low-voltage mode when idle, using down to 0V while at this stage.  This
- * stage is entered automatically when the GPU is idle when RC6 support is
- * enabled, and as soon as new workload arises GPU wakes up automatically as well.
- *
- * There are different RC6 modes available in Intel GPU, which differentiate
- * among each other with the latency required to enter and leave RC6 and
- * voltage consumed by the GPU in different states.
- *
- * The combination of the following flags define which states GPU is allowed
- * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
- * RC6pp is deepest RC6. Their support by hardware varies according to the
- * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
- * which brings the most power savings; deeper states save more power, but
- * require higher latency to switch to and wake up.
- */
 
 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
 {
@@ -6102,2549 +6081,267 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
 	intel_enable_ipc(dev_priv);
 }
 
-/*
- * Lock protecting IPS related data structures
- */
-DEFINE_SPINLOCK(mchdev_lock);
+static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * On Ibex Peak and Cougar Point, we need to disable clock
+	 * gating for the panel power sequencer or it will fail to
+	 * start up when no ports are active.
+	 */
+	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
+}
 
-bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
+static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
 {
-	u16 rgvswctl;
+	enum pipe pipe;
 
-	lockdep_assert_held(&mchdev_lock);
+	for_each_pipe(dev_priv, pipe) {
+		I915_WRITE(DSPCNTR(pipe),
+			   I915_READ(DSPCNTR(pipe)) |
+			   DISPPLANE_TRICKLE_FEED_DISABLE);
 
-	rgvswctl = I915_READ16(MEMSWCTL);
-	if (rgvswctl & MEMCTL_CMD_STS) {
-		DRM_DEBUG("gpu busy, RCS change rejected\n");
-		return false; /* still busy with another command */
+		I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
+		POSTING_READ(DSPSURF(pipe));
 	}
-
-	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
-		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
-	I915_WRITE16(MEMSWCTL, rgvswctl);
-	POSTING_READ16(MEMSWCTL);
-
-	rgvswctl |= MEMCTL_CMD_STS;
-	I915_WRITE16(MEMSWCTL, rgvswctl);
-
-	return true;
 }
 
-static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
+static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	u32 rgvmodectl;
-	u8 fmax, fmin, fstart, vstart;
-
-	spin_lock_irq(&mchdev_lock);
-
-	rgvmodectl = I915_READ(MEMMODECTL);
+	u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
-	/* Enable temp reporting */
-	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
-	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
-
-	/* 100ms RC evaluation intervals */
-	I915_WRITE(RCUPEI, 100000);
-	I915_WRITE(RCDNEI, 100000);
-
-	/* Set max/min thresholds to 90ms and 80ms respectively */
-	I915_WRITE(RCBMAXAVG, 90000);
-	I915_WRITE(RCBMINAVG, 80000);
-
-	I915_WRITE(MEMIHYST, 1);
-
-	/* Set up min, max, and cur for interrupt handling */
-	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
-	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
-	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
-		MEMMODE_FSTART_SHIFT;
-
-	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
-		PXVFREQ_PX_SHIFT;
-
-	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
-	dev_priv->ips.fstart = fstart;
-
-	dev_priv->ips.max_delay = fstart;
-	dev_priv->ips.min_delay = fmin;
-	dev_priv->ips.cur_delay = fstart;
+	/*
+	 * Required for FBC
+	 * WaFbcDisableDpfcClockGating:ilk
+	 */
+	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
+		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
+		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
 
-	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
-			 fmax, fmin, fstart);
+	I915_WRITE(PCH_3DCGDIS0,
+		   MARIUNIT_CLOCK_GATE_DISABLE |
+		   SVSMUNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(PCH_3DCGDIS1,
+		   VFMUNIT_CLOCK_GATE_DISABLE);
 
-	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+	/*
+	 * According to the spec the following bits should be set in
+	 * order to enable memory self-refresh
+	 * The bit 22/21 of 0x42004
+	 * The bit 5 of 0x42020
+	 * The bit 15 of 0x45000
+	 */
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
+		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
+	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
+	I915_WRITE(DISP_ARB_CTL,
+		   (I915_READ(DISP_ARB_CTL) |
+		    DISP_FBC_WM_DIS));
 
 	/*
-	 * Interrupts will be enabled in ironlake_irq_postinstall
+	 * Based on the document from hardware guys the following bits
+	 * should be set unconditionally in order to enable FBC.
+	 * The bit 22 of 0x42000
+	 * The bit 22 of 0x42004
+	 * The bit 7,8,9 of 0x42020.
 	 */
+	if (IS_IRONLAKE_M(dev_priv)) {
+		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
+		I915_WRITE(ILK_DISPLAY_CHICKEN1,
+			   I915_READ(ILK_DISPLAY_CHICKEN1) |
+			   ILK_FBCQ_DIS);
+		I915_WRITE(ILK_DISPLAY_CHICKEN2,
+			   I915_READ(ILK_DISPLAY_CHICKEN2) |
+			   ILK_DPARB_GATE);
+	}
 
-	I915_WRITE(VIDSTART, vstart);
-	POSTING_READ(VIDSTART);
+	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
 
-	rgvmodectl |= MEMMODE_SWMODE_EN;
-	I915_WRITE(MEMMODECTL, rgvmodectl);
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   I915_READ(ILK_DISPLAY_CHICKEN2) |
+		   ILK_ELPIN_409_SELECT);
+	I915_WRITE(_3D_CHICKEN2,
+		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
+		   _3D_CHICKEN2_WM_READ_PIPELINED);
 
-	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
-		DRM_ERROR("stuck trying to change perf mode\n");
-	mdelay(1);
+	/* WaDisableRenderCachePipelinedFlush:ilk */
+	I915_WRITE(CACHE_MODE_0,
+		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
 
-	ironlake_set_drps(dev_priv, fstart);
+	/* WaDisable_RenderCache_OperationalFlush:ilk */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
-		I915_READ(DDREC) + I915_READ(CSIEC);
-	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
-	dev_priv->ips.last_count2 = I915_READ(GFXEC);
-	dev_priv->ips.last_time2 = ktime_get_raw_ns();
+	g4x_disable_trickle_feed(dev_priv);
 
-	spin_unlock_irq(&mchdev_lock);
+	ibx_init_clock_gating(dev_priv);
 }
 
-static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
+static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	u16 rgvswctl;
-
-	spin_lock_irq(&mchdev_lock);
-
-	rgvswctl = I915_READ16(MEMSWCTL);
-
-	/* Ack interrupts, disable EFC interrupt */
-	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
-	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
-	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
-	I915_WRITE(DEIIR, DE_PCU_EVENT);
-	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
-
-	/* Go back to the starting frequency */
-	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
-	mdelay(1);
-	rgvswctl |= MEMCTL_CMD_STS;
-	I915_WRITE(MEMSWCTL, rgvswctl);
-	mdelay(1);
+	int pipe;
+	u32 val;
 
-	spin_unlock_irq(&mchdev_lock);
+	/*
+	 * On Ibex Peak and Cougar Point, we need to disable clock
+	 * gating for the panel power sequencer or it will fail to
+	 * start up when no ports are active.
+	 */
+	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
+		   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
+		   PCH_CPUNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
+		   DPLS_EDP_PPS_FIX_DIS);
+	/* The below fixes the weird display corruption, a few pixels shifted
+	 * downward, on (only) LVDS of some HP laptops with IVY.
+	 */
+	for_each_pipe(dev_priv, pipe) {
+		val = I915_READ(TRANS_CHICKEN2(pipe));
+		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
+		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+		if (dev_priv->vbt.fdi_rx_polarity_inverted)
+			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
+		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
+		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
+		I915_WRITE(TRANS_CHICKEN2(pipe), val);
+	}
+	/* WADP0ClockGatingDisable */
+	for_each_pipe(dev_priv, pipe) {
+		I915_WRITE(TRANS_CHICKEN1(pipe),
+			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
+	}
 }
 
-/* There's a funny hw issue where the hw returns all 0 when reading from
- * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
- * ourselves, instead of doing a rmw cycle (which might result in us clearing
- * all limits and the gpu stuck at whatever frequency it is at atm).
- */
-static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
+static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 limits;
-
-	/* Only set the down limit when we've reached the lowest level to avoid
-	 * getting more interrupts, otherwise leave this clear. This prevents a
-	 * race in the hw when coming out of rc6: There's a tiny window where
-	 * the hw runs at the minimal clock before selecting the desired
-	 * frequency, if the down threshold expires in that window we will not
-	 * receive a down interrupt. */
-	if (INTEL_GEN(dev_priv) >= 9) {
-		limits = (rps->max_freq_softlimit) << 23;
-		if (val <= rps->min_freq_softlimit)
-			limits |= (rps->min_freq_softlimit) << 14;
-	} else {
-		limits = rps->max_freq_softlimit << 24;
-		if (val <= rps->min_freq_softlimit)
-			limits |= rps->min_freq_softlimit << 16;
-	}
+	u32 tmp;
 
-	return limits;
+	tmp = I915_READ(MCH_SSKPD);
+	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
+		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
+			      tmp);
 }
 
-static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
+static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int new_power;
-	u32 threshold_up = 0, threshold_down = 0; /* in % */
-	u32 ei_up = 0, ei_down = 0;
-
-	new_power = rps->power;
-	switch (rps->power) {
-	case LOW_POWER:
-		if (val > rps->efficient_freq + 1 &&
-		    val > rps->cur_freq)
-			new_power = BETWEEN;
-		break;
+	I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
 
-	case BETWEEN:
-		if (val <= rps->efficient_freq &&
-		    val < rps->cur_freq)
-			new_power = LOW_POWER;
-		else if (val >= rps->rp0_freq &&
-			 val > rps->cur_freq)
-			new_power = HIGH_POWER;
-		break;
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   I915_READ(ILK_DISPLAY_CHICKEN2) |
+		   ILK_ELPIN_409_SELECT);
 
-	case HIGH_POWER:
-		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
-		    val < rps->cur_freq)
-			new_power = BETWEEN;
-		break;
-	}
-	/* Max/min bins are special */
-	if (val <= rps->min_freq_softlimit)
-		new_power = LOW_POWER;
-	if (val >= rps->max_freq_softlimit)
-		new_power = HIGH_POWER;
-	if (new_power == rps->power)
-		return;
+	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
+	I915_WRITE(_3D_CHICKEN,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
 
-	/* Note the units here are not exactly 1us, but 1280ns. */
-	switch (new_power) {
-	case LOW_POWER:
-		/* Upclock if more than 95% busy over 16ms */
-		ei_up = 16000;
-		threshold_up = 95;
+	/* WaDisable_RenderCache_OperationalFlush:snb */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-		/* Downclock if less than 85% busy over 32ms */
-		ei_down = 32000;
-		threshold_down = 85;
-		break;
+	/*
+	 * BSpec recoomends 8x4 when MSAA is used,
+	 * however in practice 16x4 seems fastest.
+	 *
+	 * Note that PS/WM thread counts depend on the WIZ hashing
+	 * disable bit, which we don't touch here, but it's good
+	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+	 */
+	I915_WRITE(GEN6_GT_MODE,
+		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
 
-	case BETWEEN:
-		/* Upclock if more than 90% busy over 13ms */
-		ei_up = 13000;
-		threshold_up = 90;
+	I915_WRITE(CACHE_MODE_0,
+		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
 
-		/* Downclock if less than 75% busy over 32ms */
-		ei_down = 32000;
-		threshold_down = 75;
-		break;
+	I915_WRITE(GEN6_UCGCTL1,
+		   I915_READ(GEN6_UCGCTL1) |
+		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
+		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	case HIGH_POWER:
-		/* Upclock if more than 85% busy over 10ms */
-		ei_up = 10000;
-		threshold_up = 85;
+	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
+	 * gating disable must be set.  Failure to set it results in
+	 * flickering pixels due to Z write ordering failures after
+	 * some amount of runtime in the Mesa "fire" demo, and Unigine
+	 * Sanctuary and Tropics, and apparently anything else with
+	 * alpha test or pixel discard.
+	 *
+	 * According to the spec, bit 11 (RCCUNIT) must also be set,
+	 * but we didn't debug actual testcases to find it out.
+	 *
+	 * WaDisableRCCUnitClockGating:snb
+	 * WaDisableRCPBUnitClockGating:snb
+	 */
+	I915_WRITE(GEN6_UCGCTL2,
+		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
+		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
 
-		/* Downclock if less than 60% busy over 32ms */
-		ei_down = 32000;
-		threshold_down = 60;
-		break;
-	}
+	/* WaStripsFansDisableFastClipPerformanceFix:snb */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
 
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		/*
-		 * Baytrail and Braswell control the gpu frequency via the
-		 * punit, which is very slow and expensive to communicate with,
-		 * as we synchronously force the package to C0. If we try and
-		 * update the gpufreq too often we cause measurable system
-		 * load for little benefit (effectively stealing CPU time for
-		 * the GPU, negatively impacting overall throughput).
-		 */
-		ei_up <<= 2;
-		ei_down <<= 2;
-	}
+	/*
+	 * Bspec says:
+	 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
+	 * 3DSTATE_SF number of SF output attributes is more than 16."
+	 */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
 
-	I915_WRITE(GEN6_RP_UP_EI,
-		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
-	I915_WRITE(GEN6_RP_UP_THRESHOLD,
-		   GT_INTERVAL_FROM_US(dev_priv,
-				       ei_up * threshold_up / 100));
+	/*
+	 * According to the spec the following bits should be
+	 * set in order to enable memory self-refresh and fbc:
+	 * The bit21 and bit22 of 0x42000
+	 * The bit21 and bit22 of 0x42004
+	 * The bit5 and bit7 of 0x42020
+	 * The bit14 of 0x70180
+	 * The bit14 of 0x71180
+	 *
+	 * WaFbcAsynchFlipDisableFbcQueue:snb
+	 */
+	I915_WRITE(ILK_DISPLAY_CHICKEN1,
+		   I915_READ(ILK_DISPLAY_CHICKEN1) |
+		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   I915_READ(ILK_DISPLAY_CHICKEN2) |
+		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
+	I915_WRITE(ILK_DSPCLK_GATE_D,
+		   I915_READ(ILK_DSPCLK_GATE_D) |
+		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
+		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
 
-	I915_WRITE(GEN6_RP_DOWN_EI,
-		   GT_INTERVAL_FROM_US(dev_priv, ei_down));
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
-		   GT_INTERVAL_FROM_US(dev_priv,
-				       ei_down * threshold_down / 100));
+	g4x_disable_trickle_feed(dev_priv);
 
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_TURBO |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_AVG);
+	cpt_init_clock_gating(dev_priv);
 
-	rps->power = new_power;
-	rps->up_threshold = threshold_up;
-	rps->down_threshold = threshold_down;
-	rps->last_adj = 0;
+	gen6_check_mch_setup(dev_priv);
 }
 
-static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
+static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 mask = 0;
-
-	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
-	if (val > rps->min_freq_softlimit)
-		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
-	if (val < rps->max_freq_softlimit)
-		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
+	u32 reg = I915_READ(GEN7_FF_THREAD_MODE);
 
-	mask &= dev_priv->pm_rps_events;
+	/*
+	 * WaVSThreadDispatchOverride:ivb,vlv
+	 *
+	 * This actually overrides the dispatch
+	 * mode for all thread types.
+	 */
+	reg &= ~GEN7_FF_SCHED_MASK;
+	reg |= GEN7_FF_TS_SCHED_HW;
+	reg |= GEN7_FF_VS_SCHED_HW;
+	reg |= GEN7_FF_DS_SCHED_HW;
 
-	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
+	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
 }
 
-/* gen6_set_rps is called to update the frequency request, but should also be
- * called when the range (min_delay and max_delay) is modified so that we can
- * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
-static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
+static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* min/max delay may still have been modified so be sure to
-	 * write the limits value.
+	/*
+	 * TODO: this bit should only be enabled when really needed, then
+	 * disabled when not needed anymore in order to save power.
 	 */
-	if (val != rps->cur_freq) {
-		gen6_set_rps_thresholds(dev_priv, val);
-
-		if (INTEL_GEN(dev_priv) >= 9)
-			I915_WRITE(GEN6_RPNSWREQ,
-				   GEN9_FREQUENCY(val));
-		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-			I915_WRITE(GEN6_RPNSWREQ,
-				   HSW_FREQUENCY(val));
-		else
-			I915_WRITE(GEN6_RPNSWREQ,
-				   GEN6_FREQUENCY(val) |
-				   GEN6_OFFSET(0) |
-				   GEN6_AGGRESSIVE_TURBO);
-	}
-
-	/* Make sure we continue to get interrupts
-	 * until we hit the minimum or maximum frequencies.
-	 */
-	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
-	rps->cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
-	return 0;
-}
-
-static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
-{
-	int err;
-
-	if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
-		      "Odd GPU freq value\n"))
-		val &= ~1;
-
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
-	if (val != dev_priv->gt_pm.rps.cur_freq) {
-		vlv_punit_get(dev_priv);
-		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-		vlv_punit_put(dev_priv);
-		if (err)
-			return err;
-
-		gen6_set_rps_thresholds(dev_priv, val);
-	}
-
-	dev_priv->gt_pm.rps.cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
-	return 0;
-}
-
-/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
- *
- * * If Gfx is Idle, then
- * 1. Forcewake Media well.
- * 2. Request idle freq.
- * 3. Release Forcewake of Media well.
-*/
-static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val = rps->idle_freq;
-	int err;
-
-	if (rps->cur_freq <= val)
-		return;
-
-	/* The punit delays the write of the frequency and voltage until it
-	 * determines the GPU is awake. During normal usage we don't want to
-	 * waste power changing the frequency if the GPU is sleeping (rc6).
-	 * However, the GPU and driver is now idle and we do not want to delay
-	 * switching to minimum voltage (reducing power whilst idle) as we do
-	 * not expect to be woken in the near future and so must flush the
-	 * change by waking the device.
-	 *
-	 * We choose to take the media powerwell (either would do to trick the
-	 * punit into committing the voltage change) as that takes a lot less
-	 * power than the render powerwell.
-	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
-	err = valleyview_set_rps(dev_priv, val);
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
-
-	if (err)
-		DRM_ERROR("Failed to set RPS for idle\n");
-}
-
-void gen6_rps_busy(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		u8 freq;
-
-		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
-			gen6_rps_reset_ei(dev_priv);
-		I915_WRITE(GEN6_PMINTRMSK,
-			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
-
-		gen6_enable_rps_interrupts(dev_priv);
-
-		/* Use the user's desired frequency as a guide, but for better
-		 * performance, jump directly to RPe as our starting frequency.
-		 */
-		freq = max(rps->cur_freq,
-			   rps->efficient_freq);
-
-		if (intel_set_rps(dev_priv,
-				  clamp(freq,
-					rps->min_freq_softlimit,
-					rps->max_freq_softlimit)))
-			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
-	}
-	mutex_unlock(&rps->lock);
-}
-
-void gen6_rps_idle(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* Flush our bottom-half so that it does not race with us
-	 * setting the idle frequency and so that it is bounded by
-	 * our rpm wakeref. And then disable the interrupts to stop any
-	 * futher RPS reclocking whilst we are asleep.
-	 */
-	gen6_disable_rps_interrupts(dev_priv);
-
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-			vlv_set_rps_idle(dev_priv);
-		else
-			gen6_set_rps(dev_priv, rps->idle_freq);
-		rps->last_adj = 0;
-		I915_WRITE(GEN6_PMINTRMSK,
-			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
-	}
-	mutex_unlock(&rps->lock);
-}
-
-void gen6_rps_boost(struct i915_request *rq,
-		    struct intel_rps_client *rps_client)
-{
-	struct intel_rps *rps = &rq->i915->gt_pm.rps;
-	unsigned long flags;
-	bool boost;
-
-	/* This is intentionally racy! We peek at the state here, then
-	 * validate inside the RPS worker.
-	 */
-	if (!rps->enabled)
-		return;
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
-		return;
-
-	/* Serializes with i915_request_retire() */
-	boost = false;
-	spin_lock_irqsave(&rq->lock, flags);
-	if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
-		boost = !atomic_fetch_inc(&rps->num_waiters);
-		rq->waitboost = true;
-	}
-	spin_unlock_irqrestore(&rq->lock, flags);
-	if (!boost)
-		return;
-
-	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
-		schedule_work(&rps->work);
-
-	atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
-}
-
-int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int err;
-
-	lockdep_assert_held(&rps->lock);
-	GEM_BUG_ON(val > rps->max_freq);
-	GEM_BUG_ON(val < rps->min_freq);
-
-	if (!rps->enabled) {
-		rps->cur_freq = val;
-		return 0;
-	}
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		err = valleyview_set_rps(dev_priv, val);
-	else
-		err = gen6_set_rps(dev_priv, val);
-
-	return err;
-}
-
-static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-	I915_WRITE(GEN9_PG_ENABLE, 0);
-}
-
-static void gen9_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-}
-
-static void gen6_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-}
-
-static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	/* We're doing forcewake before Disabling RC6,
-	 * This what the BIOS expects when going into suspend */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
-{
-	bool enable_rc6 = true;
-	unsigned long rc6_ctx_base;
-	u32 rc_ctl;
-	int rc_sw_target;
-
-	rc_ctl = I915_READ(GEN6_RC_CONTROL);
-	rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
-		       RC_SW_TARGET_STATE_SHIFT;
-	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
-			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
-			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
-			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
-			 rc_sw_target);
-
-	if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
-		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
-		enable_rc6 = false;
-	}
-
-	/*
-	 * The exact context size is not known for BXT, so assume a page size
-	 * for this check.
-	 */
-	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
-	if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
-	      (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
-		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
-		enable_rc6 = false;
-	}
-
-	if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
-	      ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
-	      ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
-	      ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
-		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
-		enable_rc6 = false;
-	}
-
-	if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
-	    !I915_READ(GEN8_PUSHBUS_ENABLE) ||
-	    !I915_READ(GEN8_PUSHBUS_SHIFT)) {
-		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
-		enable_rc6 = false;
-	}
-
-	if (!I915_READ(GEN6_GFXPAUSE)) {
-		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
-		enable_rc6 = false;
-	}
-
-	if (!I915_READ(GEN8_MISC_CTRL0)) {
-		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
-		enable_rc6 = false;
-	}
-
-	return enable_rc6;
-}
-
-static bool sanitize_rc6(struct drm_i915_private *i915)
-{
-	struct intel_device_info *info = mkwrite_device_info(i915);
-
-	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(i915))
-		info->has_rc6 = 0;
-
-	if (info->has_rc6 &&
-	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
-		DRM_INFO("RC6 disabled by BIOS\n");
-		info->has_rc6 = 0;
-	}
-
-	/*
-	 * We assume that we do not have any deep rc6 levels if we don't have
-	 * have the previous rc6 level supported, i.e. we use HAS_RC6()
-	 * as the initial coarse check for rc6 in general, moving on to
-	 * progressively finer/deeper levels.
-	 */
-	if (!info->has_rc6 && info->has_rc6p)
-		info->has_rc6p = 0;
-
-	return info->has_rc6;
-}
-
-static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* All of these values are in units of 50MHz */
-
-	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
-	if (IS_GEN9_LP(dev_priv)) {
-		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
-		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
-		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-		rps->min_freq = (rp_state_cap >>  0) & 0xff;
-	} else {
-		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
-		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-		rps->min_freq = (rp_state_cap >> 16) & 0xff;
-	}
-	/* hw_max = RP0 until we check for overclocking */
-	rps->max_freq = rps->rp0_freq;
-
-	rps->efficient_freq = rps->rp1_freq;
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
-	    IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
-		u32 ddcc_status = 0;
-
-		if (sandybridge_pcode_read(dev_priv,
-					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
-					   &ddcc_status) == 0)
-			rps->efficient_freq =
-				clamp_t(u8,
-					((ddcc_status >> 8) & 0xff),
-					rps->min_freq,
-					rps->max_freq);
-	}
-
-	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
-		/* Store the frequency values in 16.66 MHZ units, which is
-		 * the natural hardware unit for SKL
-		 */
-		rps->rp0_freq *= GEN9_FREQ_SCALER;
-		rps->rp1_freq *= GEN9_FREQ_SCALER;
-		rps->min_freq *= GEN9_FREQ_SCALER;
-		rps->max_freq *= GEN9_FREQ_SCALER;
-		rps->efficient_freq *= GEN9_FREQ_SCALER;
-	}
-}
-
-static void reset_rps(struct drm_i915_private *dev_priv,
-		      int (*set)(struct drm_i915_private *, u8))
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u8 freq = rps->cur_freq;
-
-	/* force a reset */
-	rps->power = -1;
-	rps->cur_freq = -1;
-
-	if (set(dev_priv, freq))
-		DRM_ERROR("Failed to reset RPS to initial values\n");
-}
-
-/* See the Gen9_GT_PM_Programming_Guide doc for the below */
-static void gen9_enable_rps(struct drm_i915_private *dev_priv)
-{
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* Program defaults and thresholds for RPS */
-	if (IS_GEN9(dev_priv))
-		I915_WRITE(GEN6_RC_VIDEO_FREQ,
-			GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
-
-	/* 1 second timeout*/
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
-		GT_INTERVAL_FROM_US(dev_priv, 1000000));
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
-
-	/* Leaning on the below call to gen6_set_rps to program/setup the
-	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
-	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
-	reset_rps(dev_priv, gen6_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 rc6_mode;
-
-	/* 1a: Software RC state - RC0 */
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/* 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* 2a: Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2b: Program RC6 thresholds.*/
-	if (INTEL_GEN(dev_priv) >= 10) {
-		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
-		I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
-	} else if (IS_SKYLAKE(dev_priv)) {
-		/*
-		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
-		 * when CPG is enabled
-		 */
-		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
-	} else {
-		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
-	}
-
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	if (HAS_GUC(dev_priv))
-		I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
-
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-
-	/*
-	 * 2c: Program Coarse Power Gating Policies.
-	 *
-	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
-	 * use instead is a more conservative estimate for the maximum time
-	 * it takes us to service a CS interrupt and submit a new ELSP - that
-	 * is the time which the GPU is idle waiting for the CPU to select the
-	 * next request to execute. If the idle hysteresis is less than that
-	 * interrupt service latency, the hardware will automatically gate
-	 * the power well and we will then incur the wake up cost on top of
-	 * the service latency. A similar guide from intel_pstate is that we
-	 * do not want the enable hysteresis to less than the wakeup latency.
-	 *
-	 * igt/gem_exec_nop/sequential provides a rough estimate for the
-	 * service latency, and puts it around 10us for Broadwell (and other
-	 * big core) and around 40us for Broxton (and other low power cores).
-	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
-	 * However, the wakeup latency on Broxton is closer to 100us. To be
-	 * conservative, we have to factor in a context switch on top (due
-	 * to ksoftirqd).
-	 */
-	I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
-	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
-
-	/* 3a: Enable RC6 */
-	I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
-
-	/* WaRsUseTimeoutMode:cnl (pre-prod) */
-	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
-		rc6_mode = GEN7_RC_CTL_TO_MODE;
-	else
-		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
-
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_HW_ENABLE |
-		   GEN6_RC_CTL_RC6_ENABLE |
-		   rc6_mode);
-
-	/*
-	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
-	 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
-	 */
-	if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
-		I915_WRITE(GEN9_PG_ENABLE, 0);
-	else
-		I915_WRITE(GEN9_PG_ENABLE,
-			   GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	/* 1a: Software RC state - RC0 */
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/* 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* 2a: Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2b: Program RC6 thresholds.*/
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-	I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
-
-	/* 3: Enable RC6 */
-
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_HW_ENABLE |
-		   GEN7_RC_CTL_TO_MODE |
-		   GEN6_RC_CTL_RC6_ENABLE);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen8_enable_rps(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* 1 Program defaults and thresholds for RPS*/
-	I915_WRITE(GEN6_RPNSWREQ,
-		   HSW_FREQUENCY(rps->rp1_freq));
-	I915_WRITE(GEN6_RC_VIDEO_FREQ,
-		   HSW_FREQUENCY(rps->rp1_freq));
-	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
-
-	/* Docs recommend 900MHz, and 300 MHz respectively */
-	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-		   rps->max_freq_softlimit << 24 |
-		   rps->min_freq_softlimit << 16);
-
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
-	I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	/* 2: Enable RPS */
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_TURBO |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_AVG);
-
-	reset_rps(dev_priv, gen6_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 rc6vids, rc6_mask;
-	u32 gtfifodbg;
-	int ret;
-
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/* Clear the DBG now so we don't confuse earlier errors */
-	gtfifodbg = I915_READ(GTFIFODBG);
-	if (gtfifodbg) {
-		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
-		I915_WRITE(GTFIFODBG, gtfifodbg);
-	}
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* disable the counters and set deterministic thresholds */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
-	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
-
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
-	if (IS_IVYBRIDGE(dev_priv))
-		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
-	else
-		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
-	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
-	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
-
-	/* We don't use those on Haswell */
-	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
-	if (HAS_RC6p(dev_priv))
-		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
-	if (HAS_RC6pp(dev_priv))
-		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
-	I915_WRITE(GEN6_RC_CONTROL,
-		   rc6_mask |
-		   GEN6_RC_CTL_EI_MODE(1) |
-		   GEN6_RC_CTL_HW_ENABLE);
-
-	rc6vids = 0;
-	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
-	if (IS_GEN6(dev_priv) && ret) {
-		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
-	} else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
-		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
-			  GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
-		rc6vids &= 0xffff00;
-		rc6vids |= GEN6_ENCODE_RC6_VID(450);
-		ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
-		if (ret)
-			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
-	}
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen6_enable_rps(struct drm_i915_private *dev_priv)
-{
-	/* Here begins a magic sequence of register writes to enable
-	 * auto-downclocking.
-	 *
-	 * Perhaps there might be some value in exposing these to
-	 * userspace...
-	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* Power down if completely idle for over 50ms */
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	reset_rps(dev_priv, gen6_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	const int min_freq = 15;
-	const int scaling_factor = 180;
-	unsigned int gpu_freq;
-	unsigned int max_ia_freq, min_ring_freq;
-	unsigned int max_gpu_freq, min_gpu_freq;
-	struct cpufreq_policy *policy;
-
-	lockdep_assert_held(&rps->lock);
-
-	if (rps->max_freq <= rps->min_freq)
-		return;
-
-	policy = cpufreq_cpu_get(0);
-	if (policy) {
-		max_ia_freq = policy->cpuinfo.max_freq;
-		cpufreq_cpu_put(policy);
-	} else {
-		/*
-		 * Default to measured freq if none found, PCU will ensure we
-		 * don't go over
-		 */
-		max_ia_freq = tsc_khz;
-	}
-
-	/* Convert from kHz to MHz */
-	max_ia_freq /= 1000;
-
-	min_ring_freq = I915_READ(DCLK) & 0xf;
-	/* convert DDR frequency from units of 266.6MHz to bandwidth */
-	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
-
-	min_gpu_freq = rps->min_freq;
-	max_gpu_freq = rps->max_freq;
-	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
-		/* Convert GT frequency to 50 HZ units */
-		min_gpu_freq /= GEN9_FREQ_SCALER;
-		max_gpu_freq /= GEN9_FREQ_SCALER;
-	}
-
-	/*
-	 * For each potential GPU frequency, load a ring frequency we'd like
-	 * to use for memory access.  We do this by specifying the IA frequency
-	 * the PCU should use as a reference to determine the ring frequency.
-	 */
-	for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
-		const int diff = max_gpu_freq - gpu_freq;
-		unsigned int ia_freq = 0, ring_freq = 0;
-
-		if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
-			/*
-			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
-			 * No floor required for ring frequency on SKL.
-			 */
-			ring_freq = gpu_freq;
-		} else if (INTEL_GEN(dev_priv) >= 8) {
-			/* max(2 * GT, DDR). NB: GT is 50MHz units */
-			ring_freq = max(min_ring_freq, gpu_freq);
-		} else if (IS_HASWELL(dev_priv)) {
-			ring_freq = mult_frac(gpu_freq, 5, 4);
-			ring_freq = max(min_ring_freq, ring_freq);
-			/* leave ia_freq as the default, chosen by cpufreq */
-		} else {
-			/* On older processors, there is no separate ring
-			 * clock domain, so in order to boost the bandwidth
-			 * of the ring, we need to upclock the CPU (ia_freq).
-			 *
-			 * For GPU frequencies less than 750MHz,
-			 * just use the lowest ring freq.
-			 */
-			if (gpu_freq < min_freq)
-				ia_freq = 800;
-			else
-				ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
-			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
-		}
-
-		sandybridge_pcode_write(dev_priv,
-					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
-					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
-					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
-					gpu_freq);
-	}
-}
-
-static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp0;
-
-	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
-
-	switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
-	case 8:
-		/* (2 * 4) config */
-		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
-		break;
-	case 12:
-		/* (2 * 6) config */
-		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
-		break;
-	case 16:
-		/* (2 * 8) config */
-	default:
-		/* Setting (2 * 8) Min RP0 for any other combination */
-		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
-		break;
-	}
-
-	rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
-
-	return rp0;
-}
-
-static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rpe;
-
-	val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
-	rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
-
-	return rpe;
-}
-
-static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp1;
-
-	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
-	rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
-
-	return rp1;
-}
-
-static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rpn;
-
-	val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
-	rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
-		       FB_GFX_FREQ_FUSE_MASK);
-
-	return rpn;
-}
-
-static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp1;
-
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
-
-	rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
-
-	return rp1;
-}
-
-static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp0;
-
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
-
-	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
-	/* Clamp to max */
-	rp0 = min_t(u32, rp0, 0xea);
-
-	return rp0;
-}
-
-static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rpe;
-
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
-	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
-	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
-
-	return rpe;
-}
-
-static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
-	/*
-	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
-	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
-	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
-	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
-	 * to make sure it matches what Punit accepts.
-	 */
-	return max_t(u32, val, 0xc0);
-}
-
-/* Check that the pctx buffer wasn't move under us. */
-static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
-{
-	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
-
-	WARN_ON(pctx_addr != dev_priv->dsm.start +
-			     dev_priv->vlv_pctx->stolen->start);
-}
-
-
-/* Check that the pcbr address is not empty. */
-static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
-{
-	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
-
-	WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
-}
-
-static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
-{
-	resource_size_t pctx_paddr, paddr;
-	resource_size_t pctx_size = 32*1024;
-	u32 pcbr;
-
-	pcbr = I915_READ(VLV_PCBR);
-	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
-		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
-		paddr = dev_priv->dsm.end + 1 - pctx_size;
-		GEM_BUG_ON(paddr > U32_MAX);
-
-		pctx_paddr = (paddr & (~4095));
-		I915_WRITE(VLV_PCBR, pctx_paddr);
-	}
-
-	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
-}
-
-static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
-{
-	struct drm_i915_gem_object *pctx;
-	resource_size_t pctx_paddr;
-	resource_size_t pctx_size = 24*1024;
-	u32 pcbr;
-
-	pcbr = I915_READ(VLV_PCBR);
-	if (pcbr) {
-		/* BIOS set it up already, grab the pre-alloc'd space */
-		resource_size_t pcbr_offset;
-
-		pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
-		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
-								      pcbr_offset,
-								      I915_GTT_OFFSET_NONE,
-								      pctx_size);
-		goto out;
-	}
-
-	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
-
-	/*
-	 * From the Gunit register HAS:
-	 * The Gfx driver is expected to program this register and ensure
-	 * proper allocation within Gfx stolen memory.  For example, this
-	 * register should be programmed such than the PCBR range does not
-	 * overlap with other ranges, such as the frame buffer, protected
-	 * memory, or any other relevant ranges.
-	 */
-	pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
-	if (!pctx) {
-		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
-		goto out;
-	}
-
-	GEM_BUG_ON(range_overflows_t(u64,
-				     dev_priv->dsm.start,
-				     pctx->stolen->start,
-				     U32_MAX));
-	pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
-	I915_WRITE(VLV_PCBR, pctx_paddr);
-
-out:
-	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
-	dev_priv->vlv_pctx = pctx;
-}
-
-static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
-{
-	if (WARN_ON(!dev_priv->vlv_pctx))
-		return;
-
-	i915_gem_object_put(dev_priv->vlv_pctx);
-	dev_priv->vlv_pctx = NULL;
-}
-
-static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
-{
-	dev_priv->gt_pm.rps.gpll_ref_freq =
-		vlv_get_cck_clock(dev_priv, "GPLL ref",
-				  CCK_GPLL_CLOCK_CONTROL,
-				  dev_priv->czclk_freq);
-
-	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
-			 dev_priv->gt_pm.rps.gpll_ref_freq);
-}
-
-static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
-
-	valleyview_setup_pctx(dev_priv);
-
-	vlv_iosf_sb_get(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-
-	vlv_init_gpll_ref_freq(dev_priv);
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-	switch ((val >> 6) & 3) {
-	case 0:
-	case 1:
-		dev_priv->mem_freq = 800;
-		break;
-	case 2:
-		dev_priv->mem_freq = 1066;
-		break;
-	case 3:
-		dev_priv->mem_freq = 1333;
-		break;
-	}
-	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
-
-	rps->max_freq = valleyview_rps_max_freq(dev_priv);
-	rps->rp0_freq = rps->max_freq;
-	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->max_freq),
-			 rps->max_freq);
-
-	rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->efficient_freq),
-			 rps->efficient_freq);
-
-	rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->rp1_freq),
-			 rps->rp1_freq);
-
-	rps->min_freq = valleyview_rps_min_freq(dev_priv);
-	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->min_freq),
-			 rps->min_freq);
-
-	vlv_iosf_sb_put(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-}
-
-static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
-
-	cherryview_setup_pctx(dev_priv);
-
-	vlv_iosf_sb_get(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-
-	vlv_init_gpll_ref_freq(dev_priv);
-
-	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-
-	switch ((val >> 2) & 0x7) {
-	case 3:
-		dev_priv->mem_freq = 2000;
-		break;
-	default:
-		dev_priv->mem_freq = 1600;
-		break;
-	}
-	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
-
-	rps->max_freq = cherryview_rps_max_freq(dev_priv);
-	rps->rp0_freq = rps->max_freq;
-	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->max_freq),
-			 rps->max_freq);
-
-	rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->efficient_freq),
-			 rps->efficient_freq);
-
-	rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->rp1_freq),
-			 rps->rp1_freq);
-
-	rps->min_freq = cherryview_rps_min_freq(dev_priv);
-	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->min_freq),
-			 rps->min_freq);
-
-	vlv_iosf_sb_put(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-
-	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
-		   rps->min_freq) & 1,
-		  "Odd GPU freq values\n");
-}
-
-static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	valleyview_cleanup_pctx(dev_priv);
-}
-
-static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 gtfifodbg, rc6_mode, pcbr;
-
-	gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
-					     GT_FIFO_FREE_ENTRIES_CHV);
-	if (gtfifodbg) {
-		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
-				 gtfifodbg);
-		I915_WRITE(GTFIFODBG, gtfifodbg);
-	}
-
-	cherryview_check_pctx(dev_priv);
-
-	/* 1a & 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/*  Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2a: Program RC6 thresholds.*/
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-
-	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
-	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
-
-	/* Allows RC6 residency counter to work */
-	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
-				      VLV_MEDIA_RC6_COUNT_EN |
-				      VLV_RENDER_RC6_COUNT_EN));
-
-	/* For now we assume BIOS is allocating and populating the PCBR  */
-	pcbr = I915_READ(VLV_PCBR);
-
-	/* 3: Enable RC6 */
-	rc6_mode = 0;
-	if (pcbr >> VLV_PCBR_ADDR_SHIFT)
-		rc6_mode = GEN7_RC_CTL_TO_MODE;
-	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* 1: Program defaults and thresholds for RPS*/
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
-	I915_WRITE(GEN6_RP_UP_EI, 66000);
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	/* 2: Enable RPS */
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_AVG);
-
-	/* Setting Fixed Bias */
-	vlv_punit_get(dev_priv);
-
-	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
-	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-
-	vlv_punit_put(dev_priv);
-
-	/* RPS code assumes GPLL is used */
-	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
-
-	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
-	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
-
-	reset_rps(dev_priv, valleyview_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 gtfifodbg;
-
-	valleyview_check_pctx(dev_priv);
-
-	gtfifodbg = I915_READ(GTFIFODBG);
-	if (gtfifodbg) {
-		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
-				 gtfifodbg);
-		I915_WRITE(GTFIFODBG, gtfifodbg);
-	}
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/*  Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
-
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
-
-	/* Allows RC6 residency counter to work */
-	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
-				      VLV_MEDIA_RC0_COUNT_EN |
-				      VLV_RENDER_RC0_COUNT_EN |
-				      VLV_MEDIA_RC6_COUNT_EN |
-				      VLV_RENDER_RC6_COUNT_EN));
-
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
-	I915_WRITE(GEN6_RP_UP_EI, 66000);
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_TURBO |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_CONT);
-
-	vlv_punit_get(dev_priv);
-
-	/* Setting Fixed Bias */
-	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
-	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-
-	vlv_punit_put(dev_priv);
-
-	/* RPS code assumes GPLL is used */
-	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
-
-	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
-	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
-
-	reset_rps(dev_priv, valleyview_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static unsigned long intel_pxfreq(u32 vidfreq)
-{
-	unsigned long freq;
-	int div = (vidfreq & 0x3f0000) >> 16;
-	int post = (vidfreq & 0x3000) >> 12;
-	int pre = (vidfreq & 0x7);
-
-	if (!pre)
-		return 0;
-
-	freq = ((div * 133333) / ((1<<post) * pre));
-
-	return freq;
-}
-
-static const struct cparams {
-	u16 i;
-	u16 t;
-	u16 m;
-	u16 c;
-} cparams[] = {
-	{ 1, 1333, 301, 28664 },
-	{ 1, 1066, 294, 24460 },
-	{ 1, 800, 294, 25192 },
-	{ 0, 1333, 276, 27605 },
-	{ 0, 1066, 276, 27605 },
-	{ 0, 800, 231, 23784 },
-};
-
-static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
-{
-	u64 total_count, diff, ret;
-	u32 count1, count2, count3, m = 0, c = 0;
-	unsigned long now = jiffies_to_msecs(jiffies), diff1;
-	int i;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	diff1 = now - dev_priv->ips.last_time1;
-
-	/* Prevent division-by-zero if we are asking too fast.
-	 * Also, we don't get interesting results if we are polling
-	 * faster than once in 10ms, so just return the saved value
-	 * in such cases.
-	 */
-	if (diff1 <= 10)
-		return dev_priv->ips.chipset_power;
-
-	count1 = I915_READ(DMIEC);
-	count2 = I915_READ(DDREC);
-	count3 = I915_READ(CSIEC);
-
-	total_count = count1 + count2 + count3;
-
-	/* FIXME: handle per-counter overflow */
-	if (total_count < dev_priv->ips.last_count1) {
-		diff = ~0UL - dev_priv->ips.last_count1;
-		diff += total_count;
-	} else {
-		diff = total_count - dev_priv->ips.last_count1;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
-		if (cparams[i].i == dev_priv->ips.c_m &&
-		    cparams[i].t == dev_priv->ips.r_t) {
-			m = cparams[i].m;
-			c = cparams[i].c;
-			break;
-		}
-	}
-
-	diff = div_u64(diff, diff1);
-	ret = ((m * diff) + c);
-	ret = div_u64(ret, 10);
-
-	dev_priv->ips.last_count1 = total_count;
-	dev_priv->ips.last_time1 = now;
-
-	dev_priv->ips.chipset_power = ret;
-
-	return ret;
-}
-
-unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
-{
-	unsigned long val;
-
-	if (!IS_GEN5(dev_priv))
-		return 0;
-
-	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
-
-	val = __i915_chipset_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
-	intel_runtime_pm_put(dev_priv);
-
-	return val;
-}
-
-unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
-{
-	unsigned long m, x, b;
-	u32 tsfs;
-
-	tsfs = I915_READ(TSFS);
-
-	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
-	x = I915_READ8(TR1);
-
-	b = tsfs & TSFS_INTR_MASK;
-
-	return ((m * x) / 127) - b;
-}
-
-static int _pxvid_to_vd(u8 pxvid)
-{
-	if (pxvid == 0)
-		return 0;
-
-	if (pxvid >= 8 && pxvid < 31)
-		pxvid = 31;
-
-	return (pxvid + 2) * 125;
-}
-
-static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
-{
-	const int vd = _pxvid_to_vd(pxvid);
-	const int vm = vd - 1125;
-
-	if (INTEL_INFO(dev_priv)->is_mobile)
-		return vm > 0 ? vm : 0;
-
-	return vd;
-}
-
-static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
-{
-	u64 now, diff, diffms;
-	u32 count;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	now = ktime_get_raw_ns();
-	diffms = now - dev_priv->ips.last_time2;
-	do_div(diffms, NSEC_PER_MSEC);
-
-	/* Don't divide by 0 */
-	if (!diffms)
-		return;
-
-	count = I915_READ(GFXEC);
-
-	if (count < dev_priv->ips.last_count2) {
-		diff = ~0UL - dev_priv->ips.last_count2;
-		diff += count;
-	} else {
-		diff = count - dev_priv->ips.last_count2;
-	}
-
-	dev_priv->ips.last_count2 = count;
-	dev_priv->ips.last_time2 = now;
-
-	/* More magic constants... */
-	diff = diff * 1181;
-	diff = div_u64(diff, diffms * 10);
-	dev_priv->ips.gfx_power = diff;
-}
-
-void i915_update_gfx_val(struct drm_i915_private *dev_priv)
-{
-	if (!IS_GEN5(dev_priv))
-		return;
-
-	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
-
-	__i915_update_gfx_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
-	intel_runtime_pm_put(dev_priv);
-}
-
-static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
-{
-	unsigned long t, corr, state1, corr2, state2;
-	u32 pxvid, ext_v;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
-	pxvid = (pxvid >> 24) & 0x7f;
-	ext_v = pvid_to_extvid(dev_priv, pxvid);
-
-	state1 = ext_v;
-
-	t = i915_mch_val(dev_priv);
-
-	/* Revel in the empirically derived constants */
-
-	/* Correction factor in 1/100000 units */
-	if (t > 80)
-		corr = ((t * 2349) + 135940);
-	else if (t >= 50)
-		corr = ((t * 964) + 29317);
-	else /* < 50 */
-		corr = ((t * 301) + 1004);
-
-	corr = corr * ((150142 * state1) / 10000 - 78642);
-	corr /= 100000;
-	corr2 = (corr * dev_priv->ips.corr);
-
-	state2 = (corr2 * state1) / 10000;
-	state2 /= 100; /* convert to mW */
-
-	__i915_update_gfx_val(dev_priv);
-
-	return dev_priv->ips.gfx_power + state2;
-}
-
-unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
-{
-	unsigned long val;
-
-	if (!IS_GEN5(dev_priv))
-		return 0;
-
-	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
-
-	val = __i915_gfx_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
-	intel_runtime_pm_put(dev_priv);
-
-	return val;
-}
-
-static struct drm_i915_private *i915_mch_dev;
-
-static struct drm_i915_private *mchdev_get(void)
-{
-	struct drm_i915_private *i915;
-
-	rcu_read_lock();
-	i915 = i915_mch_dev;
-	if (!kref_get_unless_zero(&i915->drm.ref))
-		i915 = NULL;
-	rcu_read_unlock();
-
-	return i915;
-}
-
-/**
- * i915_read_mch_val - return value for IPS use
- *
- * Calculate and return a value for the IPS driver to use when deciding whether
- * we have thermal and power headroom to increase CPU or GPU power budget.
- */
-unsigned long i915_read_mch_val(void)
-{
-	struct drm_i915_private *i915;
-	unsigned long chipset_val, graphics_val;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return 0;
-
-	intel_runtime_pm_get(i915);
-	spin_lock_irq(&mchdev_lock);
-	chipset_val = __i915_chipset_val(i915);
-	graphics_val = __i915_gfx_val(i915);
-	spin_unlock_irq(&mchdev_lock);
-	intel_runtime_pm_put(i915);
-
-	drm_dev_put(&i915->drm);
-	return chipset_val + graphics_val;
-}
-EXPORT_SYMBOL_GPL(i915_read_mch_val);
-
-/**
- * i915_gpu_raise - raise GPU frequency limit
- *
- * Raise the limit; IPS indicates we have thermal headroom.
- */
-bool i915_gpu_raise(void)
-{
-	struct drm_i915_private *i915;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	spin_lock_irq(&mchdev_lock);
-	if (i915->ips.max_delay > i915->ips.fmax)
-		i915->ips.max_delay--;
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return true;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_raise);
-
-/**
- * i915_gpu_lower - lower GPU frequency limit
- *
- * IPS indicates we're close to a thermal limit, so throttle back the GPU
- * frequency maximum.
- */
-bool i915_gpu_lower(void)
-{
-	struct drm_i915_private *i915;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	spin_lock_irq(&mchdev_lock);
-	if (i915->ips.max_delay < i915->ips.min_delay)
-		i915->ips.max_delay++;
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return true;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_lower);
-
-/**
- * i915_gpu_busy - indicate GPU business to IPS
- *
- * Tell the IPS driver whether or not the GPU is busy.
- */
-bool i915_gpu_busy(void)
-{
-	struct drm_i915_private *i915;
-	bool ret;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	ret = i915->gt.awake;
-
-	drm_dev_put(&i915->drm);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_busy);
-
-/**
- * i915_gpu_turbo_disable - disable graphics turbo
- *
- * Disable graphics turbo by resetting the max frequency and setting the
- * current frequency to the default.
- */
-bool i915_gpu_turbo_disable(void)
-{
-	struct drm_i915_private *i915;
-	bool ret;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	spin_lock_irq(&mchdev_lock);
-	i915->ips.max_delay = i915->ips.fstart;
-	ret = ironlake_set_drps(i915, i915->ips.fstart);
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
-
-/**
- * Tells the intel_ips driver that the i915 driver is now loaded, if
- * IPS got loaded first.
- *
- * This awkward dance is so that neither module has to depend on the
- * other in order for IPS to do the appropriate communication of
- * GPU turbo limits to i915.
- */
-static void
-ips_ping_for_i915_load(void)
-{
-	void (*link)(void);
-
-	link = symbol_get(ips_link_to_i915_driver);
-	if (link) {
-		link();
-		symbol_put(ips_link_to_i915_driver);
-	}
-}
-
-void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
-{
-	/* We only register the i915 ips part with intel-ips once everything is
-	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
-	rcu_assign_pointer(i915_mch_dev, dev_priv);
-
-	ips_ping_for_i915_load();
-}
-
-void intel_gpu_ips_teardown(void)
-{
-	rcu_assign_pointer(i915_mch_dev, NULL);
-}
-
-static void intel_init_emon(struct drm_i915_private *dev_priv)
-{
-	u32 lcfuse;
-	u8 pxw[16];
-	int i;
-
-	/* Disable to program */
-	I915_WRITE(ECR, 0);
-	POSTING_READ(ECR);
-
-	/* Program energy weights for various events */
-	I915_WRITE(SDEW, 0x15040d00);
-	I915_WRITE(CSIEW0, 0x007f0000);
-	I915_WRITE(CSIEW1, 0x1e220004);
-	I915_WRITE(CSIEW2, 0x04000004);
-
-	for (i = 0; i < 5; i++)
-		I915_WRITE(PEW(i), 0);
-	for (i = 0; i < 3; i++)
-		I915_WRITE(DEW(i), 0);
-
-	/* Program P-state weights to account for frequency power adjustment */
-	for (i = 0; i < 16; i++) {
-		u32 pxvidfreq = I915_READ(PXVFREQ(i));
-		unsigned long freq = intel_pxfreq(pxvidfreq);
-		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
-			PXVFREQ_PX_SHIFT;
-		unsigned long val;
-
-		val = vid * vid;
-		val *= (freq / 1000);
-		val *= 255;
-		val /= (127*127*900);
-		if (val > 0xff)
-			DRM_ERROR("bad pxval: %ld\n", val);
-		pxw[i] = val;
-	}
-	/* Render standby states get 0 weight */
-	pxw[14] = 0;
-	pxw[15] = 0;
-
-	for (i = 0; i < 4; i++) {
-		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
-			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
-		I915_WRITE(PXW(i), val);
-	}
-
-	/* Adjust magic regs to magic values (more experimental results) */
-	I915_WRITE(OGW0, 0);
-	I915_WRITE(OGW1, 0);
-	I915_WRITE(EG0, 0x00007f00);
-	I915_WRITE(EG1, 0x0000000e);
-	I915_WRITE(EG2, 0x000e0000);
-	I915_WRITE(EG3, 0x68000300);
-	I915_WRITE(EG4, 0x42000000);
-	I915_WRITE(EG5, 0x00140031);
-	I915_WRITE(EG6, 0);
-	I915_WRITE(EG7, 0);
-
-	for (i = 0; i < 8; i++)
-		I915_WRITE(PXWL(i), 0);
-
-	/* Enable PMON + select events */
-	I915_WRITE(ECR, 0x80000019);
-
-	lcfuse = I915_READ(LCFUSE02);
-
-	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
-}
-
-void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/*
-	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
-	 * requirement.
-	 */
-	if (!sanitize_rc6(dev_priv)) {
-		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
-		intel_runtime_pm_get(dev_priv);
-	}
-
-	mutex_lock(&rps->lock);
-
-	/* Initialize RPS limits (for userspace) */
-	if (IS_CHERRYVIEW(dev_priv))
-		cherryview_init_gt_powersave(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_init_gt_powersave(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_init_rps_frequencies(dev_priv);
-
-	/* Derive initial user preferences/limits from the hardware limits */
-	rps->idle_freq = rps->min_freq;
-	rps->cur_freq = rps->idle_freq;
-
-	rps->max_freq_softlimit = rps->max_freq;
-	rps->min_freq_softlimit = rps->min_freq;
-
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		rps->min_freq_softlimit =
-			max_t(int,
-			      rps->efficient_freq,
-			      intel_freq_opcode(dev_priv, 450));
-
-	/* After setting max-softlimit, find the overclock max freq */
-	if (IS_GEN6(dev_priv) ||
-	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
-		u32 params = 0;
-
-		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
-		if (params & BIT(31)) { /* OC supported */
-			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
-					 (rps->max_freq & 0xff) * 50,
-					 (params & 0xff) * 50);
-			rps->max_freq = params & 0xff;
-		}
-	}
-
-	/* Finally allow us to boost to max by default */
-	rps->boost_freq = rps->max_freq;
-
-	mutex_unlock(&rps->lock);
-}
-
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	if (IS_VALLEYVIEW(dev_priv))
-		valleyview_cleanup_gt_powersave(dev_priv);
-
-	if (!HAS_RC6(dev_priv))
-		intel_runtime_pm_put(dev_priv);
-}
-
-/**
- * intel_suspend_gt_powersave - suspend PM work and helper threads
- * @dev_priv: i915 device
- *
- * We don't want to disable RC6 or other features here, we just want
- * to make sure any work we've queued has finished and won't bother
- * us while we're suspended.
- */
-void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	if (INTEL_GEN(dev_priv) < 6)
-		return;
-
-	/* gen6_rps_idle() will be called later to disable interrupts */
-}
-
-void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
-	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
-	intel_disable_gt_powersave(dev_priv);
-
-	if (INTEL_GEN(dev_priv) >= 11)
-		gen11_reset_rps_interrupts(dev_priv);
-	else
-		gen6_reset_rps_interrupts(dev_priv);
-}
-
-static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (!i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	/* Currently there is no HW configuration to be done to disable. */
-
-	i915->gt_pm.llc_pstate.enabled = false;
-}
-
-static void intel_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (!dev_priv->gt_pm.rc6.enabled)
-		return;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		gen9_disable_rc6(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
-		cherryview_disable_rc6(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_disable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_disable_rc6(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = false;
-}
-
-static void intel_disable_rps(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (!dev_priv->gt_pm.rps.enabled)
-		return;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		gen9_disable_rps(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
-		cherryview_disable_rps(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_disable_rps(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_disable_rps(dev_priv);
-	else if (IS_IRONLAKE_M(dev_priv))
-		ironlake_disable_drps(dev_priv);
-
-	dev_priv->gt_pm.rps.enabled = false;
-}
-
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	mutex_lock(&dev_priv->gt_pm.rps.lock);
-
-	intel_disable_rc6(dev_priv);
-	intel_disable_rps(dev_priv);
-	if (HAS_LLC(dev_priv))
-		intel_disable_llc_pstate(dev_priv);
-
-	mutex_unlock(&dev_priv->gt_pm.rps.lock);
-}
-
-static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	gen6_update_ring_freq(i915);
-
-	i915->gt_pm.llc_pstate.enabled = true;
-}
-
-static void intel_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (dev_priv->gt_pm.rc6.enabled)
-		return;
-
-	if (IS_CHERRYVIEW(dev_priv))
-		cherryview_enable_rc6(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_enable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 9)
-		gen9_enable_rc6(dev_priv);
-	else if (IS_BROADWELL(dev_priv))
-		gen8_enable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_enable_rc6(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = true;
-}
-
-static void intel_enable_rps(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	lockdep_assert_held(&rps->lock);
-
-	if (rps->enabled)
-		return;
-
-	if (IS_CHERRYVIEW(dev_priv)) {
-		cherryview_enable_rps(dev_priv);
-	} else if (IS_VALLEYVIEW(dev_priv)) {
-		valleyview_enable_rps(dev_priv);
-	} else if (INTEL_GEN(dev_priv) >= 9) {
-		gen9_enable_rps(dev_priv);
-	} else if (IS_BROADWELL(dev_priv)) {
-		gen8_enable_rps(dev_priv);
-	} else if (INTEL_GEN(dev_priv) >= 6) {
-		gen6_enable_rps(dev_priv);
-	} else if (IS_IRONLAKE_M(dev_priv)) {
-		ironlake_enable_drps(dev_priv);
-		intel_init_emon(dev_priv);
-	}
-
-	WARN_ON(rps->max_freq < rps->min_freq);
-	WARN_ON(rps->idle_freq > rps->max_freq);
-
-	WARN_ON(rps->efficient_freq < rps->min_freq);
-	WARN_ON(rps->efficient_freq > rps->max_freq);
-
-	rps->enabled = true;
-}
-
-void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(dev_priv))
-		return;
-
-	mutex_lock(&dev_priv->gt_pm.rps.lock);
-
-	if (HAS_RC6(dev_priv))
-		intel_enable_rc6(dev_priv);
-	intel_enable_rps(dev_priv);
-	if (HAS_LLC(dev_priv))
-		intel_enable_llc_pstate(dev_priv);
-
-	mutex_unlock(&dev_priv->gt_pm.rps.lock);
-}
-
-static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	/*
-	 * On Ibex Peak and Cougar Point, we need to disable clock
-	 * gating for the panel power sequencer or it will fail to
-	 * start up when no ports are active.
-	 */
-	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
-}
-
-static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
-{
-	enum pipe pipe;
-
-	for_each_pipe(dev_priv, pipe) {
-		I915_WRITE(DSPCNTR(pipe),
-			   I915_READ(DSPCNTR(pipe)) |
-			   DISPPLANE_TRICKLE_FEED_DISABLE);
-
-		I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
-		POSTING_READ(DSPSURF(pipe));
-	}
-}
-
-static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
-
-	/*
-	 * Required for FBC
-	 * WaFbcDisableDpfcClockGating:ilk
-	 */
-	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
-		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
-		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
-
-	I915_WRITE(PCH_3DCGDIS0,
-		   MARIUNIT_CLOCK_GATE_DISABLE |
-		   SVSMUNIT_CLOCK_GATE_DISABLE);
-	I915_WRITE(PCH_3DCGDIS1,
-		   VFMUNIT_CLOCK_GATE_DISABLE);
-
-	/*
-	 * According to the spec the following bits should be set in
-	 * order to enable memory self-refresh
-	 * The bit 22/21 of 0x42004
-	 * The bit 5 of 0x42020
-	 * The bit 15 of 0x45000
-	 */
-	I915_WRITE(ILK_DISPLAY_CHICKEN2,
-		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
-		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
-	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
-	I915_WRITE(DISP_ARB_CTL,
-		   (I915_READ(DISP_ARB_CTL) |
-		    DISP_FBC_WM_DIS));
-
-	/*
-	 * Based on the document from hardware guys the following bits
-	 * should be set unconditionally in order to enable FBC.
-	 * The bit 22 of 0x42000
-	 * The bit 22 of 0x42004
-	 * The bit 7,8,9 of 0x42020.
-	 */
-	if (IS_IRONLAKE_M(dev_priv)) {
-		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
-		I915_WRITE(ILK_DISPLAY_CHICKEN1,
-			   I915_READ(ILK_DISPLAY_CHICKEN1) |
-			   ILK_FBCQ_DIS);
-		I915_WRITE(ILK_DISPLAY_CHICKEN2,
-			   I915_READ(ILK_DISPLAY_CHICKEN2) |
-			   ILK_DPARB_GATE);
-	}
-
-	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
-
-	I915_WRITE(ILK_DISPLAY_CHICKEN2,
-		   I915_READ(ILK_DISPLAY_CHICKEN2) |
-		   ILK_ELPIN_409_SELECT);
-	I915_WRITE(_3D_CHICKEN2,
-		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
-		   _3D_CHICKEN2_WM_READ_PIPELINED);
-
-	/* WaDisableRenderCachePipelinedFlush:ilk */
-	I915_WRITE(CACHE_MODE_0,
-		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
-
-	/* WaDisable_RenderCache_OperationalFlush:ilk */
-	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
-	g4x_disable_trickle_feed(dev_priv);
-
-	ibx_init_clock_gating(dev_priv);
-}
-
-static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	int pipe;
-	uint32_t val;
-
-	/*
-	 * On Ibex Peak and Cougar Point, we need to disable clock
-	 * gating for the panel power sequencer or it will fail to
-	 * start up when no ports are active.
-	 */
-	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
-		   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
-		   PCH_CPUNIT_CLOCK_GATE_DISABLE);
-	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
-		   DPLS_EDP_PPS_FIX_DIS);
-	/* The below fixes the weird display corruption, a few pixels shifted
-	 * downward, on (only) LVDS of some HP laptops with IVY.
-	 */
-	for_each_pipe(dev_priv, pipe) {
-		val = I915_READ(TRANS_CHICKEN2(pipe));
-		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
-		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
-		if (dev_priv->vbt.fdi_rx_polarity_inverted)
-			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
-		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
-		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
-		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
-		I915_WRITE(TRANS_CHICKEN2(pipe), val);
-	}
-	/* WADP0ClockGatingDisable */
-	for_each_pipe(dev_priv, pipe) {
-		I915_WRITE(TRANS_CHICKEN1(pipe),
-			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
-	}
-}
-
-static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
-{
-	uint32_t tmp;
-
-	tmp = I915_READ(MCH_SSKPD);
-	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
-		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
-			      tmp);
-}
-
-static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
-
-	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
-
-	I915_WRITE(ILK_DISPLAY_CHICKEN2,
-		   I915_READ(ILK_DISPLAY_CHICKEN2) |
-		   ILK_ELPIN_409_SELECT);
-
-	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
-	I915_WRITE(_3D_CHICKEN,
-		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
-
-	/* WaDisable_RenderCache_OperationalFlush:snb */
-	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
-	/*
-	 * BSpec recoomends 8x4 when MSAA is used,
-	 * however in practice 16x4 seems fastest.
-	 *
-	 * Note that PS/WM thread counts depend on the WIZ hashing
-	 * disable bit, which we don't touch here, but it's good
-	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
-	 */
-	I915_WRITE(GEN6_GT_MODE,
-		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
-
-	I915_WRITE(CACHE_MODE_0,
-		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
-
-	I915_WRITE(GEN6_UCGCTL1,
-		   I915_READ(GEN6_UCGCTL1) |
-		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
-		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
-
-	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
-	 * gating disable must be set.  Failure to set it results in
-	 * flickering pixels due to Z write ordering failures after
-	 * some amount of runtime in the Mesa "fire" demo, and Unigine
-	 * Sanctuary and Tropics, and apparently anything else with
-	 * alpha test or pixel discard.
-	 *
-	 * According to the spec, bit 11 (RCCUNIT) must also be set,
-	 * but we didn't debug actual testcases to find it out.
-	 *
-	 * WaDisableRCCUnitClockGating:snb
-	 * WaDisableRCPBUnitClockGating:snb
-	 */
-	I915_WRITE(GEN6_UCGCTL2,
-		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
-		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
-
-	/* WaStripsFansDisableFastClipPerformanceFix:snb */
-	I915_WRITE(_3D_CHICKEN3,
-		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
-
-	/*
-	 * Bspec says:
-	 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
-	 * 3DSTATE_SF number of SF output attributes is more than 16."
-	 */
-	I915_WRITE(_3D_CHICKEN3,
-		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
-
-	/*
-	 * According to the spec the following bits should be
-	 * set in order to enable memory self-refresh and fbc:
-	 * The bit21 and bit22 of 0x42000
-	 * The bit21 and bit22 of 0x42004
-	 * The bit5 and bit7 of 0x42020
-	 * The bit14 of 0x70180
-	 * The bit14 of 0x71180
-	 *
-	 * WaFbcAsynchFlipDisableFbcQueue:snb
-	 */
-	I915_WRITE(ILK_DISPLAY_CHICKEN1,
-		   I915_READ(ILK_DISPLAY_CHICKEN1) |
-		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
-	I915_WRITE(ILK_DISPLAY_CHICKEN2,
-		   I915_READ(ILK_DISPLAY_CHICKEN2) |
-		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
-	I915_WRITE(ILK_DSPCLK_GATE_D,
-		   I915_READ(ILK_DSPCLK_GATE_D) |
-		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
-		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
-
-	g4x_disable_trickle_feed(dev_priv);
-
-	cpt_init_clock_gating(dev_priv);
-
-	gen6_check_mch_setup(dev_priv);
-}
-
-static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
-{
-	uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
-
-	/*
-	 * WaVSThreadDispatchOverride:ivb,vlv
-	 *
-	 * This actually overrides the dispatch
-	 * mode for all thread types.
-	 */
-	reg &= ~GEN7_FF_SCHED_MASK;
-	reg |= GEN7_FF_TS_SCHED_HW;
-	reg |= GEN7_FF_VS_SCHED_HW;
-	reg |= GEN7_FF_DS_SCHED_HW;
-
-	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
-}
-
-static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	/*
-	 * TODO: this bit should only be enabled when really needed, then
-	 * disabled when not needed anymore in order to save power.
-	 */
-	if (HAS_PCH_LPT_LP(dev_priv))
-		I915_WRITE(SOUTH_DSPCLK_GATE_D,
-			   I915_READ(SOUTH_DSPCLK_GATE_D) |
-			   PCH_LP_PARTITION_LEVEL_DISABLE);
+	if (HAS_PCH_LPT_LP(dev_priv))
+		I915_WRITE(SOUTH_DSPCLK_GATE_D,
+			   I915_READ(SOUTH_DSPCLK_GATE_D) |
+			   PCH_LP_PARTITION_LEVEL_DISABLE);
 
 	/* WADPOClockGatingDisable:hsw */
 	I915_WRITE(TRANS_CHICKEN1(PIPE_A),
@@ -9348,74 +7045,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 	}
 }
 
-static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/*
-	 * N = val - 0xb7
-	 * Slow = Fast = GPLL ref * N
-	 */
-	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
-}
-
-static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
-}
-
-static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/*
-	 * N = val / 2
-	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
-	 */
-	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
-}
-
-static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* CHV needs even values */
-	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
-}
-
-int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
-{
-	if (INTEL_GEN(dev_priv) >= 9)
-		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
-					 GEN9_FREQ_SCALER);
-	else if (IS_CHERRYVIEW(dev_priv))
-		return chv_gpu_freq(dev_priv, val);
-	else if (IS_VALLEYVIEW(dev_priv))
-		return byt_gpu_freq(dev_priv, val);
-	else
-		return val * GT_FREQUENCY_MULTIPLIER;
-}
-
-int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
-{
-	if (INTEL_GEN(dev_priv) >= 9)
-		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
-					 GT_FREQUENCY_MULTIPLIER);
-	else if (IS_CHERRYVIEW(dev_priv))
-		return chv_freq_opcode(dev_priv, val);
-	else if (IS_VALLEYVIEW(dev_priv))
-		return byt_freq_opcode(dev_priv, val);
-	else
-		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
-}
-
 void intel_pm_setup(struct drm_i915_private *dev_priv)
 {
-	mutex_init(&dev_priv->gt_pm.rps.lock);
-	atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
-
 	dev_priv->runtime_pm.suspended = false;
 	atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
 }
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index b36a3b5736a0..bfffa91ba111 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -578,8 +578,6 @@ void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv)
 
 void intel_uncore_sanitize(struct drm_i915_private *dev_priv)
 {
-	/* BIOS often leaves RC6 enabled, but disable it for hw init */
-	intel_sanitize_gt_powersave(dev_priv);
 }
 
 static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 048/262] drm/i915: Move rps worker to intel_gt_pm.c
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (45 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 047/262] drm/i915: Split GT powermanagement functions to intel_gt_pm.c Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 049/262] drm/i915: Move all the RPS irq handlers to intel_gt_pm Chris Wilson
                   ` (8 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

The RPS worker exists to do the bidding of the GT powermanagement, so
move it from i915_irq to intel_gt_pm.c where it can be hidden from the
rest of the world. The goal being that the RPS worker is the one true
way though which all RPS updates are coordinated.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h    |   1 -
 drivers/gpu/drm/i915/i915_irq.c    | 141 ---------------------
 drivers/gpu/drm/i915/i915_sysfs.c  |  38 ++----
 drivers/gpu/drm/i915/intel_gt_pm.c | 189 +++++++++++++++++++++++------
 drivers/gpu/drm/i915/intel_gt_pm.h |   1 -
 5 files changed, 163 insertions(+), 207 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e11b8c7dbf4f..af0ac85615d0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3365,7 +3365,6 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
 extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
 extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
 extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
-extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
 extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
 				  bool enable);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 697800f607c8..24f10c1e9889 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1193,145 +1193,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 	trace_intel_engine_notify(engine, wait);
 }
 
-static void vlv_c0_read(struct drm_i915_private *dev_priv,
-			struct intel_rps_ei *ei)
-{
-	ei->ktime = ktime_get_raw();
-	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
-	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
-}
-
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
-{
-	memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei));
-}
-
-static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	const struct intel_rps_ei *prev = &rps->ei;
-	struct intel_rps_ei now;
-	u32 events = 0;
-
-	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
-		return 0;
-
-	vlv_c0_read(dev_priv, &now);
-
-	if (prev->ktime) {
-		u64 time, c0;
-		u32 render, media;
-
-		time = ktime_us_delta(now.ktime, prev->ktime);
-
-		time *= dev_priv->czclk_freq;
-
-		/* Workload can be split between render + media,
-		 * e.g. SwapBuffers being blitted in X after being rendered in
-		 * mesa. To account for this we need to combine both engines
-		 * into our activity counter.
-		 */
-		render = now.render_c0 - prev->render_c0;
-		media = now.media_c0 - prev->media_c0;
-		c0 = max(render, media);
-		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
-
-		if (c0 > time * rps->up_threshold)
-			events = GEN6_PM_RP_UP_THRESHOLD;
-		else if (c0 < time * rps->down_threshold)
-			events = GEN6_PM_RP_DOWN_THRESHOLD;
-	}
-
-	rps->ei = now;
-	return events;
-}
-
-static void gen6_pm_rps_work(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, struct drm_i915_private, gt_pm.rps.work);
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	bool client_boost = false;
-	int new_delay, adj, min, max;
-	u32 pm_iir = 0;
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (rps->interrupts_enabled) {
-		pm_iir = fetch_and_zero(&rps->pm_iir);
-		client_boost = atomic_read(&rps->num_waiters);
-	}
-	spin_unlock_irq(&dev_priv->irq_lock);
-
-	/* Make sure we didn't queue anything we're not going to process. */
-	WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
-	if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
-		goto out;
-
-	mutex_lock(&rps->lock);
-
-	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
-
-	adj = rps->last_adj;
-	new_delay = rps->cur_freq;
-	min = rps->min_freq_softlimit;
-	max = rps->max_freq_softlimit;
-	if (client_boost)
-		max = rps->max_freq;
-	if (client_boost && new_delay < rps->boost_freq) {
-		new_delay = rps->boost_freq;
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
-		if (adj > 0)
-			adj *= 2;
-		else /* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
-
-		if (new_delay >= rps->max_freq_softlimit)
-			adj = 0;
-	} else if (client_boost) {
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
-		if (rps->cur_freq > rps->efficient_freq)
-			new_delay = rps->efficient_freq;
-		else if (rps->cur_freq > rps->min_freq_softlimit)
-			new_delay = rps->min_freq_softlimit;
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
-		if (adj < 0)
-			adj *= 2;
-		else /* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
-
-		if (new_delay <= rps->min_freq_softlimit)
-			adj = 0;
-	} else { /* unknown event */
-		adj = 0;
-	}
-
-	rps->last_adj = adj;
-
-	/* sysfs frequency interfaces may have snuck in while servicing the
-	 * interrupt
-	 */
-	new_delay += adj;
-	new_delay = clamp_t(int, new_delay, min, max);
-
-	if (intel_set_rps(dev_priv, new_delay)) {
-		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
-		rps->last_adj = 0;
-	}
-
-	mutex_unlock(&rps->lock);
-
-out:
-	/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (rps->interrupts_enabled)
-		gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-
 /**
  * ivybridge_parity_work - Workqueue called when a parity error interrupt
  * occurred.
@@ -4363,8 +4224,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 
 	intel_hpd_init_work(dev_priv);
 
-	INIT_WORK(&rps->work, gen6_pm_rps_work);
-
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 	for (i = 0; i < MAX_L3_SLICES; ++i)
 		dev_priv->l3_parity.remap_info[i] = NULL;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index fde5f0139ca1..a72aab28399f 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -355,17 +355,16 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&rps->lock);
-
 	val = intel_freq_opcode(dev_priv, val);
+
+	mutex_lock(&rps->lock);
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
@@ -378,19 +377,11 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 			  intel_gpu_freq(dev_priv, val));
 
 	rps->max_freq_softlimit = val;
-
-	val = clamp_t(int, rps->cur_freq,
-		      rps->min_freq_softlimit,
-		      rps->max_freq_softlimit);
-
-	/* We still need *_set_rps to process the new max_delay and
-	 * update the interrupt limits and PMINTRMSK even though
-	 * frequency request may be unchanged. */
-	ret = intel_set_rps(dev_priv, val);
+	schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
-	intel_runtime_pm_put(dev_priv);
+	flush_work(&rps->work);
 
 	return ret ?: count;
 }
@@ -410,17 +401,16 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&rps->lock);
-
 	val = intel_freq_opcode(dev_priv, val);
+
+	mutex_lock(&rps->lock);
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
@@ -429,19 +419,11 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	}
 
 	rps->min_freq_softlimit = val;
-
-	val = clamp_t(int, rps->cur_freq,
-		      rps->min_freq_softlimit,
-		      rps->max_freq_softlimit);
-
-	/* We still need *_set_rps to process the new min_delay and
-	 * update the interrupt limits and PMINTRMSK even though
-	 * frequency request may be unchanged. */
-	ret = intel_set_rps(dev_priv, val);
+	schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
-	intel_runtime_pm_put(dev_priv);
+	flush_work(&rps->work);
 
 	return ret ?: count;
 }
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 733d346601ca..c51b40c791f8 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -329,13 +329,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	/*
-	 * min/max delay may still have been modified so be sure to
-	 * write the limits value.
-	 */
 	if (val != rps->cur_freq) {
-		gen6_set_rps_thresholds(dev_priv, val);
-
 		if (INTEL_GEN(dev_priv) >= 9)
 			I915_WRITE(GEN6_RPNSWREQ,
 				   GEN9_FREQUENCY(val));
@@ -349,6 +343,8 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 				   GEN6_AGGRESSIVE_TURBO);
 	}
 
+	gen6_set_rps_thresholds(dev_priv, val);
+
 	/*
 	 * Make sure we continue to get interrupts
 	 * until we hit the minimum or maximum frequencies.
@@ -370,18 +366,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
 		      "Odd GPU freq value\n"))
 		val &= ~1;
 
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
 	if (val != dev_priv->gt_pm.rps.cur_freq) {
 		vlv_punit_get(dev_priv);
 		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
 		vlv_punit_put(dev_priv);
 		if (err)
 			return err;
-
-		gen6_set_rps_thresholds(dev_priv, val);
 	}
 
+	gen6_set_rps_thresholds(dev_priv, val);
+	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+
 	dev_priv->gt_pm.rps.cur_freq = val;
 	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 
@@ -426,6 +421,151 @@ static void vlv_set_rps_idle(struct drm_i915_private *i915)
 		DRM_ERROR("Failed to set RPS for idle\n");
 }
 
+static int intel_set_rps(struct drm_i915_private *i915, u8 val)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	int err;
+
+	lockdep_assert_held(&rps->lock);
+	GEM_BUG_ON(val > rps->max_freq);
+	GEM_BUG_ON(val < rps->min_freq);
+
+	if (!rps->enabled) {
+		rps->cur_freq = val;
+		return 0;
+	}
+
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+		err = valleyview_set_rps(i915, val);
+	else
+		err = gen6_set_rps(i915, val);
+
+	return err;
+}
+
+static void vlv_c0_read(struct drm_i915_private *dev_priv,
+			struct intel_rps_ei *ei)
+{
+	ei->ktime = ktime_get_raw();
+	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
+	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
+}
+
+static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	const struct intel_rps_ei *prev = &rps->ei;
+	struct intel_rps_ei now;
+	u32 events = 0;
+
+	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+		return 0;
+
+	vlv_c0_read(dev_priv, &now);
+
+	if (prev->ktime) {
+		u64 time, c0;
+		u32 render, media;
+
+		time = ktime_us_delta(now.ktime, prev->ktime);
+
+		time *= dev_priv->czclk_freq;
+
+		/* Workload can be split between render + media,
+		 * e.g. SwapBuffers being blitted in X after being rendered in
+		 * mesa. To account for this we need to combine both engines
+		 * into our activity counter.
+		 */
+		render = now.render_c0 - prev->render_c0;
+		media = now.media_c0 - prev->media_c0;
+		c0 = max(render, media);
+		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
+
+		if (c0 > time * rps->up_threshold)
+			events = GEN6_PM_RP_UP_THRESHOLD;
+		else if (c0 < time * rps->down_threshold)
+			events = GEN6_PM_RP_DOWN_THRESHOLD;
+	}
+
+	rps->ei = now;
+	return events;
+}
+
+static void intel_rps_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, gt_pm.rps.work);
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	int freq, adj, min, max;
+	bool client_boost;
+	u32 pm_iir;
+
+	pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
+	pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
+
+	client_boost = atomic_read(&rps->num_waiters);
+
+	mutex_lock(&rps->lock);
+
+	min = rps->min_freq_softlimit;
+	max = rps->max_freq_softlimit;
+	if (client_boost && max < rps->boost_freq)
+		max = rps->boost_freq;
+
+	GEM_BUG_ON(min < rps->min_freq);
+	GEM_BUG_ON(max > rps->max_freq);
+	GEM_BUG_ON(max < min);
+
+	adj = rps->last_adj;
+	freq = rps->cur_freq;
+	if (client_boost && freq < rps->boost_freq) {
+		freq = rps->boost_freq;
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+		if (adj > 0)
+			adj *= 2;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(i915) ? 2 : 1;
+
+		if (freq >= max)
+			adj = 0;
+	} else if (client_boost) {
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
+		if (freq > max_t(int, rps->efficient_freq, min))
+			freq = max_t(int, rps->efficient_freq, min);
+		else if (freq > min_t(int, rps->efficient_freq, min))
+			freq = min_t(int, rps->efficient_freq, min);
+
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+		if (adj < 0)
+			adj *= 2;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(i915) ? -2 : -1;
+
+		if (freq <= min)
+			adj = 0;
+	} else { /* unknown/external event */
+		adj = 0;
+	}
+
+	if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
+		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
+		adj = 0;
+	}
+
+	mutex_unlock(&rps->lock);
+
+	if (pm_iir) {
+		spin_lock_irq(&i915->irq_lock);
+		if (rps->interrupts_enabled)
+			gen6_unmask_pm_irq(i915, i915->pm_rps_events);
+		spin_unlock_irq(&i915->irq_lock);
+		rps->last_adj = adj;
+	}
+}
+
 void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -434,19 +574,17 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 	if (rps->enabled) {
 		u8 freq;
 
-		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
-			gen6_rps_reset_ei(dev_priv);
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
 
 		gen6_enable_rps_interrupts(dev_priv);
+		memset(&rps->ei, 0, sizeof(rps->ei));
 
 		/*
 		 * Use the user's desired frequency as a guide, but for better
 		 * performance, jump directly to RPe as our starting frequency.
 		 */
-		freq = max(rps->cur_freq,
-			   rps->efficient_freq);
+		freq = max(rps->cur_freq, rps->efficient_freq);
 
 		if (intel_set_rps(dev_priv,
 				  clamp(freq,
@@ -515,28 +653,6 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 	atomic_inc(client ? &client->boosts : &rps->boosts);
 }
 
-int intel_set_rps(struct drm_i915_private *i915, u8 val)
-{
-	struct intel_rps *rps = &i915->gt_pm.rps;
-	int err;
-
-	lockdep_assert_held(&rps->lock);
-	GEM_BUG_ON(val > rps->max_freq);
-	GEM_BUG_ON(val < rps->min_freq);
-
-	if (!rps->enabled) {
-		rps->cur_freq = val;
-		return 0;
-	}
-
-	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
-		err = valleyview_set_rps(i915, val);
-	else
-		err = gen6_set_rps(i915, val);
-
-	return err;
-}
-
 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -2124,6 +2240,7 @@ void intel_init_gt_powersave(struct drm_i915_private *i915)
 	struct intel_rps *rps = &i915->gt_pm.rps;
 
 	mutex_init(&rps->lock);
+	INIT_WORK(&rps->work, intel_rps_work);
 
 	/*
 	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 20e937d6c7e0..5c52ca208df1 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -95,7 +95,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *i915);
 void intel_suspend_gt_powersave(struct drm_i915_private *i915);
 
 void gen6_rps_busy(struct drm_i915_private *i915);
-void gen6_rps_reset_ei(struct drm_i915_private *i915);
 void gen6_rps_idle(struct drm_i915_private *i915);
 void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 049/262] drm/i915: Move all the RPS irq handlers to intel_gt_pm
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (46 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 048/262] drm/i915: Move rps worker " Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 050/262] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info Chris Wilson
                   ` (7 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

Since all the RPS handling code is in intel_gt_pm, move the irq handlers
there as well so that it all contained within one file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |   4 -
 drivers/gpu/drm/i915/i915_irq.c         | 314 +++---------------------
 drivers/gpu/drm/i915/intel_drv.h        |  10 +-
 drivers/gpu/drm/i915/intel_gt_pm.c      | 236 +++++++++++++++++-
 drivers/gpu/drm/i915/intel_gt_pm.h      |  11 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   1 +
 6 files changed, 277 insertions(+), 299 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index af0ac85615d0..61ee44868e9d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1584,10 +1584,6 @@ struct drm_i915_private {
 		u32 de_irq_mask[I915_MAX_PIPES];
 	};
 	u32 gt_irq_mask;
-	u32 pm_imr;
-	u32 pm_ier;
-	u32 pm_rps_events;
-	u32 pm_guc_events;
 	u32 pipestat_irq_mask[I915_MAX_PIPES];
 
 	struct i915_hotplug hotplug;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 24f10c1e9889..5685d8610e69 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -204,7 +204,6 @@ static void gen2_assert_iir_is_zero(struct drm_i915_private *dev_priv,
 	POSTING_READ16(type##IMR); \
 } while (0)
 
-static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 
 /* For display hotplug interrupt */
@@ -343,220 +342,6 @@ void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
 	ilk_update_gt_irq(dev_priv, mask, 0);
 }
 
-static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
-{
-	WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11);
-
-	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
-}
-
-static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv)
-{
-	if (INTEL_GEN(dev_priv) >= 11)
-		return GEN11_GPM_WGBOXPERF_INTR_MASK;
-	else if (INTEL_GEN(dev_priv) >= 8)
-		return GEN8_GT_IMR(2);
-	else
-		return GEN6_PMIMR;
-}
-
-static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
-{
-	if (INTEL_GEN(dev_priv) >= 11)
-		return GEN11_GPM_WGBOXPERF_INTR_ENABLE;
-	else if (INTEL_GEN(dev_priv) >= 8)
-		return GEN8_GT_IER(2);
-	else
-		return GEN6_PMIER;
-}
-
-/**
- * snb_update_pm_irq - update GEN6_PMIMR
- * @dev_priv: driver private
- * @interrupt_mask: mask of interrupt bits to update
- * @enabled_irq_mask: mask of interrupt bits to enable
- */
-static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
-			      uint32_t interrupt_mask,
-			      uint32_t enabled_irq_mask)
-{
-	uint32_t new_val;
-
-	WARN_ON(enabled_irq_mask & ~interrupt_mask);
-
-	lockdep_assert_held(&dev_priv->irq_lock);
-
-	new_val = dev_priv->pm_imr;
-	new_val &= ~interrupt_mask;
-	new_val |= (~enabled_irq_mask & interrupt_mask);
-
-	if (new_val != dev_priv->pm_imr) {
-		dev_priv->pm_imr = new_val;
-		I915_WRITE(gen6_pm_imr(dev_priv), dev_priv->pm_imr);
-		POSTING_READ(gen6_pm_imr(dev_priv));
-	}
-}
-
-void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
-		return;
-
-	snb_update_pm_irq(dev_priv, mask, mask);
-}
-
-static void __gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	snb_update_pm_irq(dev_priv, mask, 0);
-}
-
-void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
-		return;
-
-	__gen6_mask_pm_irq(dev_priv, mask);
-}
-
-static void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask)
-{
-	i915_reg_t reg = gen6_pm_iir(dev_priv);
-
-	lockdep_assert_held(&dev_priv->irq_lock);
-
-	I915_WRITE(reg, reset_mask);
-	I915_WRITE(reg, reset_mask);
-	POSTING_READ(reg);
-}
-
-static void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask)
-{
-	lockdep_assert_held(&dev_priv->irq_lock);
-
-	dev_priv->pm_ier |= enable_mask;
-	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier);
-	gen6_unmask_pm_irq(dev_priv, enable_mask);
-	/* unmask_pm_irq provides an implicit barrier (POSTING_READ) */
-}
-
-static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask)
-{
-	lockdep_assert_held(&dev_priv->irq_lock);
-
-	dev_priv->pm_ier &= ~disable_mask;
-	__gen6_mask_pm_irq(dev_priv, disable_mask);
-	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier);
-	/* though a barrier is missing here, but don't really need a one */
-}
-
-void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	spin_lock_irq(&dev_priv->irq_lock);
-
-	while (gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM))
-		;
-
-	dev_priv->gt_pm.rps.pm_iir = 0;
-
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	spin_lock_irq(&dev_priv->irq_lock);
-	gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events);
-	dev_priv->gt_pm.rps.pm_iir = 0;
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	if (READ_ONCE(rps->interrupts_enabled))
-		return;
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	WARN_ON_ONCE(rps->pm_iir);
-
-	if (INTEL_GEN(dev_priv) >= 11)
-		WARN_ON_ONCE(gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM));
-	else
-		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
-
-	rps->interrupts_enabled = true;
-	gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
-
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	if (!READ_ONCE(rps->interrupts_enabled))
-		return;
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	rps->interrupts_enabled = false;
-
-	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
-
-	gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events);
-
-	spin_unlock_irq(&dev_priv->irq_lock);
-	synchronize_irq(dev_priv->drm.irq);
-
-	/* Now that we will not be generating any more work, flush any
-	 * outstanding tasks. As we are called on the RPS idle path,
-	 * we will reset the GPU to minimum frequencies, so the current
-	 * state of the worker can be discarded.
-	 */
-	cancel_work_sync(&rps->work);
-	if (INTEL_GEN(dev_priv) >= 11)
-		gen11_reset_rps_interrupts(dev_priv);
-	else
-		gen6_reset_rps_interrupts(dev_priv);
-}
-
-void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv)
-{
-	assert_rpm_wakelock_held(dev_priv);
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	gen6_reset_pm_iir(dev_priv, dev_priv->pm_guc_events);
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv)
-{
-	assert_rpm_wakelock_held(dev_priv);
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (!dev_priv->guc.interrupts_enabled) {
-		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) &
-				       dev_priv->pm_guc_events);
-		dev_priv->guc.interrupts_enabled = true;
-		gen6_enable_pm_irq(dev_priv, dev_priv->pm_guc_events);
-	}
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
-{
-	assert_rpm_wakelock_held(dev_priv);
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	dev_priv->guc.interrupts_enabled = false;
-
-	gen6_disable_pm_irq(dev_priv, dev_priv->pm_guc_events);
-
-	spin_unlock_irq(&dev_priv->irq_lock);
-	synchronize_irq(dev_priv->drm.irq);
-
-	gen9_reset_guc_interrupts(dev_priv);
-}
-
 /**
  * bdw_update_port_irq - update DE port interrupt
  * @dev_priv: driver private
@@ -1385,12 +1170,13 @@ static void gen8_gt_irq_handler(struct drm_i915_private *i915, u32 master_ctl)
 
 	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
 		iir = raw_reg_read(regs, GEN8_GT_IIR(2));
-		if (likely(iir & (i915->pm_rps_events | i915->pm_guc_events))) {
+		if (likely(iir & (i915->gt_pm.rps.pm_events |
+				  i915->gt_pm.rps.guc_events))) {
 			raw_reg_write(regs, GEN8_GT_IIR(2),
-				      iir & (i915->pm_rps_events |
-					     i915->pm_guc_events));
+				      iir & (i915->gt_pm.rps.pm_events |
+					     i915->gt_pm.rps.guc_events));
 
-			gen6_rps_irq_handler(i915, iir);
+			intel_gt_pm_irq_handler(i915, iir);
 			gen9_guc_irq_handler(i915, iir);
 		}
 	}
@@ -1642,35 +1428,6 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv,
 				     res1, res2);
 }
 
-/* The RPS events need forcewake, so we add them to a work queue and mask their
- * IMR bits until the work is done. Other interrupts can be processed without
- * the work queue. */
-static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	if (pm_iir & dev_priv->pm_rps_events) {
-		spin_lock(&dev_priv->irq_lock);
-		gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
-		if (rps->interrupts_enabled) {
-			rps->pm_iir |= pm_iir & dev_priv->pm_rps_events;
-			schedule_work(&rps->work);
-		}
-		spin_unlock(&dev_priv->irq_lock);
-	}
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		return;
-
-	if (HAS_VEBOX(dev_priv)) {
-		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[VECS]);
-
-		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
-			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
-	}
-}
-
 static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
 {
 	if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT)
@@ -1878,6 +1635,19 @@ static void i9xx_hpd_irq_handler(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void gen6_pm_extra_irq_handler(struct drm_i915_private *dev_priv,
+				      u32 pm_iir)
+{
+	if (HAS_VEBOX(dev_priv)) {
+		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
+			notify_ring(dev_priv->engine[VECS]);
+
+		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
+			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n",
+				  pm_iir);
+	}
+}
+
 static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -1952,7 +1722,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 		if (gt_iir)
 			snb_gt_irq_handler(dev_priv, gt_iir);
 		if (pm_iir)
-			gen6_rps_irq_handler(dev_priv, pm_iir);
+			intel_gt_pm_irq_handler(dev_priv, pm_iir);
 
 		if (hotplug_status)
 			i9xx_hpd_irq_handler(dev_priv, hotplug_status);
@@ -2401,7 +2171,8 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 		if (pm_iir) {
 			I915_WRITE(GEN6_PMIIR, pm_iir);
 			ret = IRQ_HANDLED;
-			gen6_rps_irq_handler(dev_priv, pm_iir);
+			intel_gt_pm_irq_handler(dev_priv, pm_iir);
+			gen6_pm_extra_irq_handler(dev_priv, pm_iir);
 		}
 	}
 
@@ -2694,7 +2465,7 @@ gen11_other_irq_handler(struct drm_i915_private * const i915,
 			const u8 instance, const u16 iir)
 {
 	if (instance == OTHER_GTPM_INSTANCE)
-		return gen6_rps_irq_handler(i915, iir);
+		return intel_gt_pm_irq_handler(i915, iir);
 
 	WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
 		  instance, iir);
@@ -3604,11 +3375,11 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 		 */
 		if (HAS_VEBOX(dev_priv)) {
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
-			dev_priv->pm_ier |= PM_VEBOX_USER_INTERRUPT;
+			dev_priv->gt_pm.ier |= PM_VEBOX_USER_INTERRUPT;
 		}
 
-		dev_priv->pm_imr = 0xffffffff;
-		GEN3_IRQ_INIT(GEN6_PM, dev_priv->pm_imr, pm_irqs);
+		dev_priv->gt_pm.imr = 0xffffffff;
+		GEN3_IRQ_INIT(GEN6_PM, dev_priv->gt_pm.imr, pm_irqs);
 	}
 }
 
@@ -3730,15 +3501,15 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv)
 	if (HAS_L3_DPF(dev_priv))
 		gt_interrupts[0] |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
-	dev_priv->pm_ier = 0x0;
-	dev_priv->pm_imr = ~dev_priv->pm_ier;
+	dev_priv->gt_pm.ier = 0x0;
+	dev_priv->gt_pm.imr = ~dev_priv->gt_pm.ier;
 	GEN8_IRQ_INIT_NDX(GT, 0, ~gt_interrupts[0], gt_interrupts[0]);
 	GEN8_IRQ_INIT_NDX(GT, 1, ~gt_interrupts[1], gt_interrupts[1]);
 	/*
 	 * RPS interrupts will get enabled/disabled on demand when RPS itself
 	 * is enabled/disabled. Same wil be the case for GuC interrupts.
 	 */
-	GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_imr, dev_priv->pm_ier);
+	GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->gt_pm.imr, dev_priv->gt_pm.ier);
 	GEN8_IRQ_INIT_NDX(GT, 3, ~gt_interrupts[3], gt_interrupts[3]);
 }
 
@@ -3835,8 +3606,8 @@ static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv)
 	 * RPS interrupts will get enabled/disabled on demand when RPS itself
 	 * is enabled/disabled.
 	 */
-	dev_priv->pm_ier = 0x0;
-	dev_priv->pm_imr = ~dev_priv->pm_ier;
+	dev_priv->gt_pm.ier = 0x0;
+	dev_priv->gt_pm.imr = ~dev_priv->gt_pm.ier;
 	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
 	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK,  ~0);
 }
@@ -4219,7 +3990,6 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 void intel_irq_init(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	int i;
 
 	intel_hpd_init_work(dev_priv);
@@ -4228,30 +3998,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	for (i = 0; i < MAX_L3_SLICES; ++i)
 		dev_priv->l3_parity.remap_info[i] = NULL;
 
-	if (HAS_GUC_SCHED(dev_priv))
-		dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
-
-	/* Let's track the enabled rps events */
-	if (IS_VALLEYVIEW(dev_priv))
-		/* WaGsvRC0ResidencyMethod:vlv */
-		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
-	else
-		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
-
-	rps->pm_intrmsk_mbz = 0;
-
-	/*
-	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
-	 * if GEN6_PM_UP_EI_EXPIRED is masked.
-	 *
-	 * TODO: verify if this can be reproduced on VLV,CHV.
-	 */
-	if (INTEL_GEN(dev_priv) <= 7)
-		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
-
 	if (IS_GEN2(dev_priv)) {
 		/* Gen2 doesn't have a hardware frame counter */
 		dev->max_vblank_count = 0;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 7cfb71ade096..6bf8dd52070b 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1343,12 +1343,10 @@ bool gen11_reset_one_iir(struct drm_i915_private * const i915,
 			 const unsigned int bit);
 void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
-void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
-void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
-void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv);
-void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv);
-void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv);
-void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv);
+
+bool gen11_reset_one_iir(struct drm_i915_private * const i915,
+			 const unsigned int bank,
+			 const unsigned int bit);
 
 static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915,
 					    u32 mask)
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index c51b40c791f8..79cb4dbafbea 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -315,7 +315,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *i915, u8 val)
 		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
 			 GEN6_PM_RP_UP_THRESHOLD);
 
-	mask &= i915->pm_rps_events;
+	mask &= rps->pm_events;
 
 	return gen6_sanitize_rps_pm_mask(i915, ~mask);
 }
@@ -443,6 +443,145 @@ static int intel_set_rps(struct drm_i915_private *i915, u8 val)
 	return err;
 }
 
+static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
+{
+	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
+}
+
+static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
+{
+	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER;
+}
+
+static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv)
+{
+	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR;
+}
+
+static void gen6_update_pm_irq(struct drm_i915_private *dev_priv,
+			       u32 interrupt_mask,
+			       u32 enabled_irq_mask)
+{
+	u32 new_val;
+
+	lockdep_assert_held(&dev_priv->irq_lock);
+	GEM_BUG_ON(enabled_irq_mask & ~interrupt_mask);
+
+	new_val = dev_priv->gt_pm.imr;
+	new_val &= ~interrupt_mask;
+	new_val |= ~enabled_irq_mask & interrupt_mask;
+
+	if (new_val != dev_priv->gt_pm.imr) {
+		dev_priv->gt_pm.imr = new_val;
+		I915_WRITE(gen6_pm_imr(dev_priv), dev_priv->gt_pm.imr);
+	}
+}
+
+static void gen6_reset_pm_iir(struct drm_i915_private *dev_priv,
+			      u32 reset_mask)
+{
+	i915_reg_t reg = gen6_pm_iir(dev_priv);
+
+	lockdep_assert_held(&dev_priv->irq_lock);
+
+	I915_WRITE(reg, reset_mask);
+	I915_WRITE(reg, reset_mask);
+	POSTING_READ(reg);
+}
+
+static void gen6_enable_pm_irq(struct drm_i915_private *dev_priv,
+			       u32 enable_mask)
+{
+	lockdep_assert_held(&dev_priv->irq_lock);
+
+	dev_priv->gt_pm.ier |= enable_mask;
+	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->gt_pm.ier);
+	gen6_unmask_pm_irq(dev_priv, enable_mask);
+	/* unmask_pm_irq provides an implicit barrier (POSTING_READ) */
+}
+
+static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv,
+				u32 disable_mask)
+{
+	lockdep_assert_held(&dev_priv->irq_lock);
+
+	dev_priv->gt_pm.ier &= ~disable_mask;
+	gen6_update_pm_irq(dev_priv, disable_mask, 0);
+	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->gt_pm.ier);
+	/* though a barrier is missing here, but don't really need a one */
+}
+
+static void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	gen6_reset_pm_iir(dev_priv, rps->pm_events);
+	rps->pm_iir = 0;
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+static void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+
+	while (gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM))
+		cpu_relax();
+	rps->pm_iir = 0;
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+static void enable_rps_interrupts(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	if (READ_ONCE(rps->interrupts_enabled))
+		return;
+
+	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
+		return;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	WARN_ON_ONCE(rps->pm_iir);
+	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & rps->pm_events);
+	rps->interrupts_enabled = true;
+	gen6_enable_pm_irq(dev_priv, rps->pm_events);
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+static void disable_rps_interrupts(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	if (!READ_ONCE(rps->interrupts_enabled))
+		return;
+
+	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
+		return;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	rps->interrupts_enabled = false;
+
+	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
+
+	gen6_disable_pm_irq(dev_priv, rps->pm_events);
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+	synchronize_irq(dev_priv->drm.irq);
+
+	/* Now that we will not be generating any more work, flush any
+	 * outstanding tasks. As we are called on the RPS idle path,
+	 * we will reset the GPU to minimum frequencies, so the current
+	 * state of the worker can be discarded.
+	 */
+	cancel_work_sync(&rps->work);
+	gen6_reset_rps_interrupts(dev_priv);
+}
+
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
 			struct intel_rps_ei *ei)
 {
@@ -500,7 +639,7 @@ static void intel_rps_work(struct work_struct *work)
 	bool client_boost;
 	u32 pm_iir;
 
-	pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
+	pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events;
 	pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
 
 	client_boost = atomic_read(&rps->num_waiters);
@@ -560,12 +699,27 @@ static void intel_rps_work(struct work_struct *work)
 	if (pm_iir) {
 		spin_lock_irq(&i915->irq_lock);
 		if (rps->interrupts_enabled)
-			gen6_unmask_pm_irq(i915, i915->pm_rps_events);
+			gen6_unmask_pm_irq(i915, rps->pm_events);
 		spin_unlock_irq(&i915->irq_lock);
 		rps->last_adj = adj;
 	}
 }
 
+void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	if (pm_iir & rps->pm_events) {
+		spin_lock(&dev_priv->irq_lock);
+		gen6_mask_pm_irq(dev_priv, pm_iir & rps->pm_events);
+		if (rps->interrupts_enabled) {
+			rps->pm_iir |= pm_iir & rps->pm_events;
+			schedule_work(&rps->work);
+		}
+		spin_unlock(&dev_priv->irq_lock);
+	}
+}
+
 void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -577,7 +731,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
 
-		gen6_enable_rps_interrupts(dev_priv);
+		enable_rps_interrupts(dev_priv);
 		memset(&rps->ei, 0, sizeof(rps->ei));
 
 		/*
@@ -605,7 +759,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 	 * our rpm wakeref. And then disable the interrupts to stop any
 	 * futher RPS reclocking whilst we are asleep.
 	 */
-	gen6_disable_rps_interrupts(dev_priv);
+	disable_rps_interrupts(dev_priv);
 
 	mutex_lock(&rps->lock);
 	if (rps->enabled) {
@@ -2242,6 +2396,30 @@ void intel_init_gt_powersave(struct drm_i915_private *i915)
 	mutex_init(&rps->lock);
 	INIT_WORK(&rps->work, intel_rps_work);
 
+	if (HAS_GUC_SCHED(i915))
+		rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
+
+	/* Let's track the enabled rps events */
+	if (IS_VALLEYVIEW(i915))
+		/* WaGsvRC0ResidencyMethod:vlv */
+		rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+	else
+		rps->pm_events = GEN6_PM_RPS_EVENTS;
+
+	rps->pm_intrmsk_mbz = 0;
+
+	/*
+	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
+	 * if GEN6_PM_UP_EI_EXPIRED is masked.
+	 *
+	 * TODO: verify if this can be reproduced on VLV,CHV.
+	 */
+	if (INTEL_GEN(i915) <= 7)
+		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
+
+	if (INTEL_GEN(i915) >= 8)
+		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+
 	/*
 	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
 	 * requirement.
@@ -2539,3 +2717,51 @@ int intel_freq_opcode(const struct drm_i915_private *i915, int val)
 	else
 		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
 }
+
+void gen6_unmask_pm_irq(struct drm_i915_private *i915, u32 mask)
+{
+	gen6_update_pm_irq(i915, mask, mask);
+}
+
+void gen6_mask_pm_irq(struct drm_i915_private *i915, u32 mask)
+{
+	gen6_update_pm_irq(i915, mask, 0);
+}
+
+void gen9_reset_guc_interrupts(struct drm_i915_private *i915)
+{
+	assert_rpm_wakelock_held(i915);
+
+	spin_lock_irq(&i915->irq_lock);
+	gen6_reset_pm_iir(i915, i915->gt_pm.rps.guc_events);
+	spin_unlock_irq(&i915->irq_lock);
+}
+
+void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv)
+{
+	assert_rpm_wakelock_held(dev_priv);
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	if (!dev_priv->guc.interrupts_enabled) {
+		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) &
+				       dev_priv->gt_pm.rps.guc_events);
+		dev_priv->guc.interrupts_enabled = true;
+		gen6_enable_pm_irq(dev_priv, dev_priv->gt_pm.rps.guc_events);
+	}
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
+{
+	assert_rpm_wakelock_held(dev_priv);
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	dev_priv->guc.interrupts_enabled = false;
+
+	gen6_disable_pm_irq(dev_priv, dev_priv->gt_pm.rps.guc_events);
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+	synchronize_irq(dev_priv->drm.irq);
+
+	gen9_reset_guc_interrupts(dev_priv);
+}
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 5c52ca208df1..7a14a2e14b30 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -31,6 +31,9 @@ struct intel_rps {
 	/* PM interrupt bits that should never be masked */
 	u32 pm_intrmsk_mbz;
 
+	u32 pm_events;
+	u32 guc_events;
+
 	/*
 	 * Frequencies are stored in potentially platform dependent multiples.
 	 * In other words, *_freq needs to be multiplied by X to be interesting.
@@ -82,6 +85,9 @@ struct intel_gt_pm {
 	struct intel_rps rps;
 	struct intel_rc6 rc6;
 	struct intel_llc_pstate llc_pstate;
+
+	u32 imr;
+	u32 ier;
 };
 
 void intel_gpu_ips_init(struct drm_i915_private *i915);
@@ -94,6 +100,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *i915);
 void intel_disable_gt_powersave(struct drm_i915_private *i915);
 void intel_suspend_gt_powersave(struct drm_i915_private *i915);
 
+void intel_gt_pm_irq_handler(struct drm_i915_private *i915, u32 pm_iir);
+
 void gen6_rps_busy(struct drm_i915_private *i915);
 void gen6_rps_idle(struct drm_i915_private *i915);
 void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
@@ -101,4 +109,7 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 int intel_gpu_freq(const struct drm_i915_private *i915, int val);
 int intel_freq_opcode(const struct drm_i915_private *i915, int val);
 
+void gen6_unmask_pm_irq(struct drm_i915_private *i915, u32 mask);
+void gen6_mask_pm_irq(struct drm_i915_private *i915, u32 mask);
+
 #endif /* __INTEL_GT_PM_H__ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 47591758069a..1663c13ec68d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -36,6 +36,7 @@
 #include "i915_gem_render_state.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 #include "intel_workarounds.h"
 
 /* Rough estimate of the typical request size, performing a flush,
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 050/262] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (47 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 049/262] drm/i915: Move all the RPS irq handlers to intel_gt_pm Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 051/262] drm/i915: Remove defunct intel_suspend_gt_powersave() Chris Wilson
                   ` (6 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

For consistency (and elegance!), add intel_device_info.has_rps.
The immediate boon is that RPS support is now emitted along the other
capabilities in the debug log and after errors.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 ++
 drivers/gpu/drm/i915/i915_pci.c          |  6 ++++++
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 drivers/gpu/drm/i915/intel_gt_pm.c       | 20 ++++++++++++++++----
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 61ee44868e9d..e873ff9cdae2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2516,6 +2516,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_RC6p(dev_priv)		 ((dev_priv)->info.has_rc6p)
 #define HAS_RC6pp(dev_priv)		 (false) /* HW was never validated */
 
+#define HAS_RPS(dev_priv)	(INTEL_INFO(dev_priv)->has_rps)
+
 #define HAS_CSR(dev_priv)	((dev_priv)->info.has_csr)
 
 #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922e935d..b0070e914228 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -235,6 +235,7 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	GEN5_FEATURES,
 	PLATFORM(INTEL_IRONLAKE),
 	.is_mobile = 1, .has_fbc = 1,
+	.has_rps = true,
 };
 
 #define GEN6_FEATURES \
@@ -246,6 +247,7 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
+	.has_rps = true, \
 	.has_aliasing_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
@@ -290,6 +292,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
+	.has_rps = true, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
@@ -343,6 +346,7 @@ static const struct intel_device_info intel_valleyview_info = {
 	.has_psr = 1,
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
+	.has_rps = true,
 	.has_gmch_display = 1,
 	.has_hotplug = 1,
 	.has_aliasing_ppgtt = 1,
@@ -437,6 +441,7 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_runtime_pm = 1,
 	.has_resource_streamer = 1,
 	.has_rc6 = 1,
+	.has_rps = true,
 	.has_logical_ring_contexts = 1,
 	.has_gmch_display = 1,
 	.has_aliasing_ppgtt = 1,
@@ -510,6 +515,7 @@ static const struct intel_device_info intel_skylake_gt4_info = {
 	.has_csr = 1, \
 	.has_resource_streamer = 1, \
 	.has_rc6 = 1, \
+	.has_rps = true, \
 	.has_dp_mst = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_logical_ring_preemption = 1, \
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 3f9881c548ef..2e01bc6eb5a2 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -103,6 +103,7 @@ enum intel_platform {
 	func(has_psr); \
 	func(has_rc6); \
 	func(has_rc6p); \
+	func(has_rps); \
 	func(has_resource_streamer); \
 	func(has_runtime_pm); \
 	func(has_snoop); \
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 79cb4dbafbea..f096c5cb84ba 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -724,6 +724,9 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
+	if (!HAS_RPS(dev_priv))
+		return;
+
 	mutex_lock(&rps->lock);
 	if (rps->enabled) {
 		u8 freq;
@@ -753,6 +756,9 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
+	if (!HAS_RPS(dev_priv))
+		return;
+
 	/*
 	 * Flush our bottom-half so that it does not race with us
 	 * setting the idle frequency and so that it is bounded by
@@ -780,6 +786,9 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 	unsigned long flags;
 	bool boost;
 
+	if (!HAS_RPS(rq->i915))
+		return;
+
 	/*
 	 * This is intentionally racy! We peek at the state here, then
 	 * validate inside the RPS worker.
@@ -922,8 +931,10 @@ static bool sanitize_rc6(struct drm_i915_private *i915)
 	struct intel_device_info *info = mkwrite_device_info(i915);
 
 	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(i915))
+	if (intel_vgpu_active(i915)) {
 		info->has_rc6 = 0;
+		info->has_rps = 0;
+	}
 
 	if (info->has_rc6 &&
 	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
@@ -2554,7 +2565,7 @@ static void intel_disable_rps(struct drm_i915_private *i915)
 		valleyview_disable_rps(i915);
 	else if (INTEL_GEN(i915) >= 6)
 		gen6_disable_rps(i915);
-	else if (IS_IRONLAKE_M(i915))
+	else if (INTEL_GEN(i915) >= 5)
 		ironlake_disable_drps(i915);
 
 	i915->gt_pm.rps.enabled = false;
@@ -2624,7 +2635,7 @@ static void intel_enable_rps(struct drm_i915_private *i915)
 		gen8_enable_rps(i915);
 	} else if (INTEL_GEN(i915) >= 6) {
 		gen6_enable_rps(i915);
-	} else if (IS_IRONLAKE_M(i915)) {
+	} else if (INTEL_GEN(i915) >= 5) {
 		ironlake_enable_drps(i915);
 		intel_init_emon(i915);
 	}
@@ -2648,7 +2659,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *i915)
 
 	if (HAS_RC6(i915))
 		intel_enable_rc6(i915);
-	intel_enable_rps(i915);
+	if (HAS_RPS(i915))
+		intel_enable_rps(i915);
 	if (HAS_LLC(i915))
 		intel_enable_llc_pstate(i915);
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 051/262] drm/i915: Remove defunct intel_suspend_gt_powersave()
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (48 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 050/262] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 052/262] drm/i915: Reorder GT interface code Chris Wilson
                   ` (5 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

Since commit b7137e0cf1e5 ("drm/i915: Defer enabling rc6 til after we
submit the first batch/context"), intel_suspend_gt_powersave() has been
a no-op. As we still do not need to do anything explicitly on suspend
(we do everything required on idling), remove the defunct function.

References: b7137e0cf1e5 ("drm/i915: Defer enabling rc6 til after we submit the first batch/context")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c    |  1 -
 drivers/gpu/drm/i915/intel_gt_pm.c | 16 ----------------
 drivers/gpu/drm/i915/intel_gt_pm.h |  1 -
 3 files changed, 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b006d0e71a66..a19a94c8ac92 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4975,7 +4975,6 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	int ret;
 
 	intel_runtime_pm_get(dev_priv);
-	intel_suspend_gt_powersave(dev_priv);
 
 	mutex_lock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index f096c5cb84ba..f8b75814796d 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -2491,22 +2491,6 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *i915)
 		intel_runtime_pm_put(i915);
 }
 
-/**
- * intel_suspend_gt_powersave - suspend PM work and helper threads
- * @i915: i915 device
- *
- * We don't want to disable RC6 or other features here, we just want
- * to make sure any work we've queued has finished and won't bother
- * us while we're suspended.
- */
-void intel_suspend_gt_powersave(struct drm_i915_private *i915)
-{
-	if (INTEL_GEN(i915) < 6)
-		return;
-
-	/* gen6_rps_idle() will be called later to disable interrupts */
-}
-
 void intel_sanitize_gt_powersave(struct drm_i915_private *i915)
 {
 	i915->gt_pm.rps.enabled = true; /* force RPS disabling */
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 7a14a2e14b30..afb7a5858dff 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -98,7 +98,6 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *i915);
 void intel_sanitize_gt_powersave(struct drm_i915_private *i915);
 void intel_enable_gt_powersave(struct drm_i915_private *i915);
 void intel_disable_gt_powersave(struct drm_i915_private *i915);
-void intel_suspend_gt_powersave(struct drm_i915_private *i915);
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *i915, u32 pm_iir);
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 052/262] drm/i915: Reorder GT interface code
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (49 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 051/262] drm/i915: Remove defunct intel_suspend_gt_powersave() Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 053/262] drm/i915: Split control of rps and rc6 Chris Wilson
                   ` (4 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

Try to order the intel_gt_pm code to match the order it is used:
 	init
	enable
	disable
	cleanup

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/intel_gt_pm.c | 170 ++++++++++++++---------------
 drivers/gpu/drm/i915/intel_gt_pm.h |   5 +-
 2 files changed, 88 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index f8b75814796d..b69ddb5be3e4 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -2400,6 +2400,18 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
 	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
 }
 
+void intel_sanitize_gt_powersave(struct drm_i915_private *i915)
+{
+	i915->gt_pm.rps.enabled = true; /* force RPS disabling */
+	i915->gt_pm.rc6.enabled = true; /* force RC6 disabling */
+	intel_disable_gt_powersave(i915);
+
+	if (INTEL_GEN(i915) >= 11)
+		gen11_reset_rps_interrupts(i915);
+	else
+		gen6_reset_rps_interrupts(i915);
+}
+
 void intel_init_gt_powersave(struct drm_i915_private *i915)
 {
 	struct intel_rps *rps = &i915->gt_pm.rps;
@@ -2482,91 +2494,6 @@ void intel_init_gt_powersave(struct drm_i915_private *i915)
 	mutex_unlock(&rps->lock);
 }
 
-void intel_cleanup_gt_powersave(struct drm_i915_private *i915)
-{
-	if (IS_VALLEYVIEW(i915))
-		valleyview_cleanup_gt_powersave(i915);
-
-	if (!HAS_RC6(i915))
-		intel_runtime_pm_put(i915);
-}
-
-void intel_sanitize_gt_powersave(struct drm_i915_private *i915)
-{
-	i915->gt_pm.rps.enabled = true; /* force RPS disabling */
-	i915->gt_pm.rc6.enabled = true; /* force RC6 disabling */
-	intel_disable_gt_powersave(i915);
-
-	if (INTEL_GEN(i915) >= 11)
-		gen11_reset_rps_interrupts(i915);
-	else
-		gen6_reset_rps_interrupts(i915);
-}
-
-static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (!i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	/* Currently there is no HW configuration to be done to disable. */
-
-	i915->gt_pm.llc_pstate.enabled = false;
-}
-
-static void intel_disable_rc6(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (!i915->gt_pm.rc6.enabled)
-		return;
-
-	if (INTEL_GEN(i915) >= 9)
-		gen9_disable_rc6(i915);
-	else if (IS_CHERRYVIEW(i915))
-		cherryview_disable_rc6(i915);
-	else if (IS_VALLEYVIEW(i915))
-		valleyview_disable_rc6(i915);
-	else if (INTEL_GEN(i915) >= 6)
-		gen6_disable_rc6(i915);
-
-	i915->gt_pm.rc6.enabled = false;
-}
-
-static void intel_disable_rps(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (!i915->gt_pm.rps.enabled)
-		return;
-
-	if (INTEL_GEN(i915) >= 9)
-		gen9_disable_rps(i915);
-	else if (IS_CHERRYVIEW(i915))
-		cherryview_disable_rps(i915);
-	else if (IS_VALLEYVIEW(i915))
-		valleyview_disable_rps(i915);
-	else if (INTEL_GEN(i915) >= 6)
-		gen6_disable_rps(i915);
-	else if (INTEL_GEN(i915) >= 5)
-		ironlake_disable_drps(i915);
-
-	i915->gt_pm.rps.enabled = false;
-}
-
-void intel_disable_gt_powersave(struct drm_i915_private *i915)
-{
-	mutex_lock(&i915->gt_pm.rps.lock);
-
-	intel_disable_rc6(i915);
-	intel_disable_rps(i915);
-	if (HAS_LLC(i915))
-		intel_disable_llc_pstate(i915);
-
-	mutex_unlock(&i915->gt_pm.rps.lock);
-}
-
 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
@@ -2651,6 +2578,79 @@ void intel_enable_gt_powersave(struct drm_i915_private *i915)
 	mutex_unlock(&i915->gt_pm.rps.lock);
 }
 
+static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (!i915->gt_pm.llc_pstate.enabled)
+		return;
+
+	/* Currently there is no HW configuration to be done to disable. */
+
+	i915->gt_pm.llc_pstate.enabled = false;
+}
+
+static void intel_disable_rc6(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (!i915->gt_pm.rc6.enabled)
+		return;
+
+	if (INTEL_GEN(i915) >= 9)
+		gen9_disable_rc6(i915);
+	else if (IS_CHERRYVIEW(i915))
+		cherryview_disable_rc6(i915);
+	else if (IS_VALLEYVIEW(i915))
+		valleyview_disable_rc6(i915);
+	else if (INTEL_GEN(i915) >= 6)
+		gen6_disable_rc6(i915);
+
+	i915->gt_pm.rc6.enabled = false;
+}
+
+static void intel_disable_rps(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (!i915->gt_pm.rps.enabled)
+		return;
+
+	if (INTEL_GEN(i915) >= 9)
+		gen9_disable_rps(i915);
+	else if (IS_CHERRYVIEW(i915))
+		cherryview_disable_rps(i915);
+	else if (IS_VALLEYVIEW(i915))
+		valleyview_disable_rps(i915);
+	else if (INTEL_GEN(i915) >= 6)
+		gen6_disable_rps(i915);
+	else if (INTEL_GEN(i915) >= 5)
+		ironlake_disable_drps(i915);
+
+	i915->gt_pm.rps.enabled = false;
+}
+
+void intel_disable_gt_powersave(struct drm_i915_private *i915)
+{
+	mutex_lock(&i915->gt_pm.rps.lock);
+
+	intel_disable_rc6(i915);
+	intel_disable_rps(i915);
+	if (HAS_LLC(i915))
+		intel_disable_llc_pstate(i915);
+
+	mutex_unlock(&i915->gt_pm.rps.lock);
+}
+
+void intel_cleanup_gt_powersave(struct drm_i915_private *i915)
+{
+	if (IS_VALLEYVIEW(i915))
+		valleyview_cleanup_gt_powersave(i915);
+
+	if (!HAS_RC6(i915))
+		intel_runtime_pm_put(i915);
+}
+
 static int byt_gpu_freq(const struct drm_i915_private *i915, int val)
 {
 	const struct intel_rps *rps = &i915->gt_pm.rps;
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index afb7a5858dff..bd400c9aed7c 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -93,11 +93,12 @@ struct intel_gt_pm {
 void intel_gpu_ips_init(struct drm_i915_private *i915);
 void intel_gpu_ips_teardown(void);
 
-void intel_init_gt_powersave(struct drm_i915_private *i915);
-void intel_cleanup_gt_powersave(struct drm_i915_private *i915);
 void intel_sanitize_gt_powersave(struct drm_i915_private *i915);
+
+void intel_init_gt_powersave(struct drm_i915_private *i915);
 void intel_enable_gt_powersave(struct drm_i915_private *i915);
 void intel_disable_gt_powersave(struct drm_i915_private *i915);
+void intel_cleanup_gt_powersave(struct drm_i915_private *i915);
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *i915, u32 pm_iir);
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 053/262] drm/i915: Split control of rps and rc6
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (50 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 052/262] drm/i915: Reorder GT interface code Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 054/262] drm/i915: Enabling rc6 and rps have different requirements, so separate them Chris Wilson
                   ` (3 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

Allow ourselves to individually toggle rps or rc6. This will be used
later when we want to enable rps/rc6 at different phases during the
device bring up.

Whilst here, convert the intel_$verb_gt_powersave over to
intel_gt_pm_$verb scheme.

v2: Resurrect llc_pstate, we will need to restore state on resume.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c      |   7 +-
 drivers/gpu/drm/i915/i915_gem.c      |  23 +++---
 drivers/gpu/drm/i915/intel_display.c |   6 +-
 drivers/gpu/drm/i915/intel_gt_pm.c   | 109 +++++++++++++++++----------
 drivers/gpu/drm/i915/intel_gt_pm.h   |  18 +++--
 5 files changed, 102 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5c6fe51eb45a..6914b08c69c0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -944,6 +944,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	intel_wopcm_init_early(&dev_priv->wopcm);
 	intel_uc_init_early(dev_priv);
 	intel_pm_setup(dev_priv);
+	intel_gt_pm_init_early(dev_priv);
 	intel_init_dpio(dev_priv);
 	intel_power_domains_init(dev_priv);
 	intel_irq_init(dev_priv);
@@ -1073,7 +1074,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
  */
 static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv)
 {
-	intel_sanitize_gt_powersave(dev_priv);
+	intel_gt_pm_sanitize(dev_priv);
 	intel_uncore_fini(dev_priv);
 	i915_mmio_cleanup(dev_priv);
 	pci_dev_put(dev_priv->bridge_dev);
@@ -1182,7 +1183,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	intel_uncore_sanitize(dev_priv);
 
 	/* BIOS often leaves RC6 enabled, but disable it for hw init */
-	intel_sanitize_gt_powersave(dev_priv);
+	intel_gt_pm_sanitize(dev_priv);
 
 	intel_opregion_setup(dev_priv);
 
@@ -1722,7 +1723,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	int ret;
 
 	disable_rpm_wakeref_asserts(dev_priv);
-	intel_sanitize_gt_powersave(dev_priv);
+	intel_gt_pm_sanitize(dev_priv);
 
 	ret = i915_ggtt_enable_hw(dev_priv);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a19a94c8ac92..b056c6d63e18 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -202,7 +202,10 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
-	intel_enable_gt_powersave(i915);
+	intel_gt_pm_enable_rps(i915);
+	intel_gt_pm_enable_rc6(i915);
+	intel_gt_pm_enable_llc(i915);
+
 	i915_update_gfx_val(i915);
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_busy(i915);
@@ -5394,10 +5397,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_unlock;
 	}
 
+	intel_gt_pm_init(dev_priv);
+
 	ret = i915_gem_contexts_init(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
-		goto err_ggtt;
+		goto err_pm;
 	}
 
 	ret = intel_engines_init(dev_priv);
@@ -5406,11 +5411,9 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_context;
 	}
 
-	intel_init_gt_powersave(dev_priv);
-
 	ret = intel_uc_init(dev_priv);
 	if (ret)
-		goto err_pm;
+		goto err_engines;
 
 	ret = i915_gem_init_hw(dev_priv);
 	if (ret)
@@ -5458,15 +5461,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_uc_fini_hw(dev_priv);
 err_uc_init:
 	intel_uc_fini(dev_priv);
-err_pm:
-	if (ret != -EIO) {
-		intel_cleanup_gt_powersave(dev_priv);
+err_engines:
+	if (ret != -EIO)
 		i915_gem_cleanup_engines(dev_priv);
-	}
 err_context:
 	if (ret != -EIO)
 		i915_gem_contexts_fini(dev_priv);
-err_ggtt:
+err_pm:
+	if (ret != -EIO)
+		intel_gt_pm_fini(dev_priv);
 err_unlock:
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 84ef96659b51..65688f7c9642 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15685,7 +15685,9 @@ void intel_modeset_cleanup(struct drm_device *dev)
 	flush_work(&dev_priv->atomic_helper.free_work);
 	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
 
-	intel_disable_gt_powersave(dev_priv);
+	intel_gt_pm_disable_llc(dev_priv);
+	intel_gt_pm_disable_rc6(dev_priv);
+	intel_gt_pm_disable_rps(dev_priv);
 
 	/*
 	 * Interrupts and polling as the first thing to avoid creating havoc.
@@ -15714,7 +15716,7 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
 	intel_cleanup_overlay(dev_priv);
 
-	intel_cleanup_gt_powersave(dev_priv);
+	intel_gt_pm_fini(dev_priv);
 
 	intel_teardown_gmbus(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index b69ddb5be3e4..35d7410fdf8f 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -2400,11 +2400,15 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
 	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
 }
 
-void intel_sanitize_gt_powersave(struct drm_i915_private *i915)
+void intel_gt_pm_sanitize(struct drm_i915_private *i915)
 {
-	i915->gt_pm.rps.enabled = true; /* force RPS disabling */
+	intel_gt_pm_disable_llc(i915);
+
 	i915->gt_pm.rc6.enabled = true; /* force RC6 disabling */
-	intel_disable_gt_powersave(i915);
+	intel_gt_pm_disable_rc6(i915);
+
+	i915->gt_pm.rps.enabled = true; /* force RPS disabling */
+	intel_gt_pm_disable_rps(i915);
 
 	if (INTEL_GEN(i915) >= 11)
 		gen11_reset_rps_interrupts(i915);
@@ -2412,12 +2416,17 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *i915)
 		gen6_reset_rps_interrupts(i915);
 }
 
-void intel_init_gt_powersave(struct drm_i915_private *i915)
+void intel_gt_pm_init_early(struct drm_i915_private *i915)
 {
 	struct intel_rps *rps = &i915->gt_pm.rps;
 
 	mutex_init(&rps->lock);
 	INIT_WORK(&rps->work, intel_rps_work);
+}
+
+void intel_gt_pm_init(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
 
 	if (HAS_GUC_SCHED(i915))
 		rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
@@ -2494,19 +2503,7 @@ void intel_init_gt_powersave(struct drm_i915_private *i915)
 	mutex_unlock(&rps->lock);
 }
 
-static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	gen6_update_ring_freq(i915);
-
-	i915->gt_pm.llc_pstate.enabled = true;
-}
-
-static void intel_enable_rc6(struct drm_i915_private *i915)
+static void __enable_rc6(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
@@ -2527,7 +2524,17 @@ static void intel_enable_rc6(struct drm_i915_private *i915)
 	i915->gt_pm.rc6.enabled = true;
 }
 
-static void intel_enable_rps(struct drm_i915_private *i915)
+void intel_gt_pm_enable_rc6(struct drm_i915_private *i915)
+{
+	if (!HAS_RC6(i915))
+		return;
+
+	mutex_lock(&i915->gt_pm.rps.lock);
+	__enable_rc6(i915);
+	mutex_unlock(&i915->gt_pm.rps.lock);
+}
+
+static void __enable_rps(struct drm_i915_private *i915)
 {
 	struct intel_rps *rps = &i915->gt_pm.rps;
 
@@ -2560,37 +2567,38 @@ static void intel_enable_rps(struct drm_i915_private *i915)
 	rps->enabled = true;
 }
 
-void intel_enable_gt_powersave(struct drm_i915_private *i915)
+void intel_gt_pm_enable_rps(struct drm_i915_private *i915)
 {
-	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(i915))
+	if (!HAS_RPS(i915))
 		return;
 
 	mutex_lock(&i915->gt_pm.rps.lock);
-
-	if (HAS_RC6(i915))
-		intel_enable_rc6(i915);
-	if (HAS_RPS(i915))
-		intel_enable_rps(i915);
-	if (HAS_LLC(i915))
-		intel_enable_llc_pstate(i915);
-
+	__enable_rps(i915);
 	mutex_unlock(&i915->gt_pm.rps.lock);
 }
 
-static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
+static void __enable_llc(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
-	if (!i915->gt_pm.llc_pstate.enabled)
+	if (i915->gt_pm.llc_pstate.enabled)
 		return;
 
-	/* Currently there is no HW configuration to be done to disable. */
+	gen6_update_ring_freq(i915);
+	i915->gt_pm.llc_pstate.enabled = true;
+}
 
-	i915->gt_pm.llc_pstate.enabled = false;
+void intel_gt_pm_enable_llc(struct drm_i915_private *i915)
+{
+	if (!HAS_LLC(i915))
+		return;
+
+	mutex_lock(&i915->gt_pm.rps.lock);
+	__enable_llc(i915);
+	mutex_unlock(&i915->gt_pm.rps.lock);
 }
 
-static void intel_disable_rc6(struct drm_i915_private *i915)
+static void __disable_rc6(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
@@ -2609,7 +2617,14 @@ static void intel_disable_rc6(struct drm_i915_private *i915)
 	i915->gt_pm.rc6.enabled = false;
 }
 
-static void intel_disable_rps(struct drm_i915_private *i915)
+void intel_gt_pm_disable_rc6(struct drm_i915_private *i915)
+{
+	mutex_lock(&i915->gt_pm.rps.lock);
+	__disable_rc6(i915);
+	mutex_unlock(&i915->gt_pm.rps.lock);
+}
+
+static void __disable_rps(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
@@ -2630,19 +2645,31 @@ static void intel_disable_rps(struct drm_i915_private *i915)
 	i915->gt_pm.rps.enabled = false;
 }
 
-void intel_disable_gt_powersave(struct drm_i915_private *i915)
+void intel_gt_pm_disable_rps(struct drm_i915_private *i915)
 {
 	mutex_lock(&i915->gt_pm.rps.lock);
+	__disable_rps(i915);
+	mutex_unlock(&i915->gt_pm.rps.lock);
+}
+
+static void __disable_llc(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
-	intel_disable_rc6(i915);
-	intel_disable_rps(i915);
-	if (HAS_LLC(i915))
-		intel_disable_llc_pstate(i915);
+	if (!i915->gt_pm.llc_pstate.enabled)
+		return;
 
+	i915->gt_pm.llc_pstate.enabled = false;
+}
+
+void intel_gt_pm_disable_llc(struct drm_i915_private *i915)
+{
+	mutex_lock(&i915->gt_pm.rps.lock);
+	__disable_llc(i915);
 	mutex_unlock(&i915->gt_pm.rps.lock);
 }
 
-void intel_cleanup_gt_powersave(struct drm_i915_private *i915)
+void intel_gt_pm_fini(struct drm_i915_private *i915)
 {
 	if (IS_VALLEYVIEW(i915))
 		valleyview_cleanup_gt_powersave(i915);
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index bd400c9aed7c..0d95a52c1941 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -93,12 +93,20 @@ struct intel_gt_pm {
 void intel_gpu_ips_init(struct drm_i915_private *i915);
 void intel_gpu_ips_teardown(void);
 
-void intel_sanitize_gt_powersave(struct drm_i915_private *i915);
+void intel_gt_pm_sanitize(struct drm_i915_private *i915);
 
-void intel_init_gt_powersave(struct drm_i915_private *i915);
-void intel_enable_gt_powersave(struct drm_i915_private *i915);
-void intel_disable_gt_powersave(struct drm_i915_private *i915);
-void intel_cleanup_gt_powersave(struct drm_i915_private *i915);
+void intel_gt_pm_init_early(struct drm_i915_private *i915);
+void intel_gt_pm_init(struct drm_i915_private *i915);
+void intel_gt_pm_fini(struct drm_i915_private *i915);
+
+void intel_gt_pm_enable_rps(struct drm_i915_private *i915);
+void intel_gt_pm_disable_rps(struct drm_i915_private *i915);
+
+void intel_gt_pm_enable_rc6(struct drm_i915_private *i915);
+void intel_gt_pm_disable_rc6(struct drm_i915_private *i915);
+
+void intel_gt_pm_enable_llc(struct drm_i915_private *i915);
+void intel_gt_pm_disable_llc(struct drm_i915_private *i915);
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *i915, u32 pm_iir);
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 054/262] drm/i915: Enabling rc6 and rps have different requirements, so separate them
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (51 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 053/262] drm/i915: Split control of rps and rc6 Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 055/262] drm/i915: Simplify rc6/rps enabling Chris Wilson
                   ` (2 subsequent siblings)
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

On Ironlake, we are required to not enable rc6 until the GPU is loaded
with a valid context; after that point it can start to use a powersaving
context for rc6. This seems a reasonable requirement to impose on all
generations as we are already priming the system by loading a context on
resume. We can simply then delay enabling rc6 until we know the GPU is
awake.

v2: Reorder intel_gt_pm_fini in i915_gem_fini to match setup ordering,
and remove the superfluous intel_gt_pm_sanitize() on mmio cleanup.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem.c      | 40 +++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_display.c |  6 -----
 drivers/gpu/drm/i915/intel_gt_pm.c   |  2 ++
 4 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6914b08c69c0..f4851b06f2af 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -645,6 +645,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
 	intel_uc_fini(dev_priv);
 	i915_gem_cleanup_engines(dev_priv);
 	i915_gem_contexts_fini(dev_priv);
+	intel_gt_pm_fini(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_uc_fini_misc(dev_priv);
@@ -1074,7 +1075,6 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
  */
 static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv)
 {
-	intel_gt_pm_sanitize(dev_priv);
 	intel_uncore_fini(dev_priv);
 	i915_mmio_cleanup(dev_priv);
 	pci_dev_put(dev_priv->bridge_dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b056c6d63e18..fc89c7d7417b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -202,10 +202,6 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
-	intel_gt_pm_enable_rps(i915);
-	intel_gt_pm_enable_rc6(i915);
-	intel_gt_pm_enable_llc(i915);
-
 	i915_update_gfx_val(i915);
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_busy(i915);
@@ -3342,11 +3338,38 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		i915_gem_reset_finish_engine(engine);
 	}
 
+	intel_gt_pm_sanitize(i915);
+
 	GEM_TRACE("end\n");
 
 	wake_up_all(&i915->gpu_error.reset_queue);
 }
 
+static int load_power_context(struct drm_i915_private *i915)
+{
+	int err;
+
+	intel_gt_pm_sanitize(i915);
+	intel_gt_pm_enable_rps(i915);
+
+	err = i915_gem_switch_to_kernel_context(i915);
+	if (err)
+		goto err;
+
+	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
+	if (err)
+		goto err;
+
+	intel_gt_pm_enable_rc6(i915);
+	intel_gt_pm_enable_llc(i915);
+
+	return 0;
+
+err:
+	intel_gt_pm_sanitize(i915);
+	return err;
+}
+
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
 	struct i915_timeline *tl;
@@ -5076,7 +5099,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	intel_uc_resume(i915);
 
 	/* Always reload a context for powersaving. */
-	if (i915_gem_switch_to_kernel_context(i915))
+	if (load_power_context(i915))
 		goto err_wedged;
 
 out_unlock:
@@ -5271,11 +5294,8 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 			goto err_active;
 	}
 
-	err = i915_gem_switch_to_kernel_context(i915);
-	if (err)
-		goto err_active;
-
-	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
+	/* Flush the default context image to memory, and enable powersaving. */
+	err = load_power_context(i915);
 	if (err)
 		goto err_active;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 65688f7c9642..80ab7ef254a1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15685,10 +15685,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
 	flush_work(&dev_priv->atomic_helper.free_work);
 	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
 
-	intel_gt_pm_disable_llc(dev_priv);
-	intel_gt_pm_disable_rc6(dev_priv);
-	intel_gt_pm_disable_rps(dev_priv);
-
 	/*
 	 * Interrupts and polling as the first thing to avoid creating havoc.
 	 * Too much stuff here (turning of connectors, ...) would
@@ -15716,8 +15712,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
 	intel_cleanup_overlay(dev_priv);
 
-	intel_gt_pm_fini(dev_priv);
-
 	intel_teardown_gmbus(dev_priv);
 
 	destroy_workqueue(dev_priv->modeset_wq);
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 35d7410fdf8f..8d79ba7c0805 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -2671,6 +2671,8 @@ void intel_gt_pm_disable_llc(struct drm_i915_private *i915)
 
 void intel_gt_pm_fini(struct drm_i915_private *i915)
 {
+	intel_gt_pm_sanitize(i915);
+
 	if (IS_VALLEYVIEW(i915))
 		valleyview_cleanup_gt_powersave(i915);
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 055/262] drm/i915: Simplify rc6/rps enabling
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (52 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 054/262] drm/i915: Enabling rc6 and rps have different requirements, so separate them Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:04 ` [PATCH 056/262] drm/i915: Refactor frequency bounds computation Chris Wilson
  2018-05-17  6:12 ` [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

As we know that whenever the GT is awake, rc6 and rps are enabled (if
available), then we can remove the individual tracking and enabling to
the gen6_rps_busy/gen6_rps_idle() (now called intel_gt_pm_busy and
intel_gt_pm_idle) entry points.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |   6 +-
 drivers/gpu/drm/i915/i915_drv.c      |   3 -
 drivers/gpu/drm/i915/i915_gem.c      |  15 +-
 drivers/gpu/drm/i915/i915_sysfs.c    |   6 +-
 drivers/gpu/drm/i915/intel_display.c |   4 +-
 drivers/gpu/drm/i915/intel_gt_pm.c   | 291 +++++++++------------------
 drivers/gpu/drm/i915/intel_gt_pm.h   |  26 +--
 7 files changed, 121 insertions(+), 230 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 75c89014033c..0b8f62eaf10d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2191,9 +2191,9 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	struct drm_file *file;
 
-	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
+	seq_printf(m, "RPS active? %s\n", yesno(rps->active));
 	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
@@ -2226,9 +2226,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 		   atomic_read(&rps->boosts));
 	mutex_unlock(&dev->filelist_mutex);
 
-	if (INTEL_GEN(dev_priv) >= 6 &&
-	    rps->enabled &&
-	    dev_priv->gt.active_requests) {
+	if (INTEL_GEN(dev_priv) >= 6 && dev_priv->gt.awake) {
 		u32 rpup, rpupei;
 		u32 rpdown, rpdownei;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f4851b06f2af..43fc0fffd9cd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2588,9 +2588,6 @@ static int intel_runtime_suspend(struct device *kdev)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
 
-	if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv))))
-		return -ENODEV;
-
 	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv)))
 		return -ENODEV;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc89c7d7417b..d4b0774a3367 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -151,10 +151,9 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
 	i915_pmu_gt_parked(i915);
 	i915_vma_parked(i915);
 
-	i915->gt.awake = false;
+	intel_gt_pm_idle(i915);
 
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_idle(i915);
+	i915->gt.awake = false;
 
 	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
 
@@ -202,9 +201,9 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
+	intel_gt_pm_busy(i915);
 	i915_update_gfx_val(i915);
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_busy(i915);
+
 	i915_pmu_gt_unparked(i915);
 
 	intel_engines_unpark(i915);
@@ -465,10 +464,8 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 	 * forcing the clocks too high for the whole system, we only allow
 	 * each client to waitboost once in a busy period.
 	 */
-	if (rps_client && !i915_request_started(rq)) {
-		if (INTEL_GEN(rq->i915) >= 6)
-			gen6_rps_boost(rq, rps_client);
-	}
+	if (rps_client && !i915_request_started(rq))
+		intel_rps_boost(rq, rps_client);
 
 	timeout = i915_request_wait(rq, flags, timeout);
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index a72aab28399f..db9d55fe449b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -377,7 +377,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 			  intel_gpu_freq(dev_priv, val));
 
 	rps->max_freq_softlimit = val;
-	schedule_work(&rps->work);
+	if (rps->active)
+		schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
@@ -419,7 +420,8 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	}
 
 	rps->min_freq_softlimit = val;
-	schedule_work(&rps->work);
+	if (rps->active)
+		schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 80ab7ef254a1..1da55f7f5714 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12799,7 +12799,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait,
 	 * vblank without our intervention, so leave RPS alone.
 	 */
 	if (!i915_request_started(rq))
-		gen6_rps_boost(rq, NULL);
+		intel_rps_boost(rq, NULL);
 	i915_request_put(rq);
 
 	drm_crtc_vblank_put(wait->crtc);
@@ -12817,7 +12817,7 @@ static void add_rps_boost_after_vblank(struct drm_crtc *crtc,
 	if (!dma_fence_is_i915(fence))
 		return;
 
-	if (INTEL_GEN(to_i915(crtc->dev)) < 6)
+	if (!HAS_RPS(to_i915(crtc->dev)))
 		return;
 
 	if (drm_crtc_vblank_get(crtc))
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 8d79ba7c0805..328f15db22e4 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -327,15 +327,11 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *i915, u8 val)
  */
 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	if (val != rps->cur_freq) {
+	if (val != dev_priv->gt_pm.rps.cur_freq) {
 		if (INTEL_GEN(dev_priv) >= 9)
-			I915_WRITE(GEN6_RPNSWREQ,
-				   GEN9_FREQUENCY(val));
+			I915_WRITE(GEN6_RPNSWREQ, GEN9_FREQUENCY(val));
 		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-			I915_WRITE(GEN6_RPNSWREQ,
-				   HSW_FREQUENCY(val));
+			I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(val));
 		else
 			I915_WRITE(GEN6_RPNSWREQ,
 				   GEN6_FREQUENCY(val) |
@@ -352,9 +348,6 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
-	rps->cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
 	return 0;
 }
 
@@ -377,48 +370,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	gen6_set_rps_thresholds(dev_priv, val);
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
-	dev_priv->gt_pm.rps.cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
 	return 0;
 }
 
-/*
- * vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
- *
- * If Gfx is Idle, then
- * 1. Forcewake Media well.
- * 2. Request idle freq.
- * 3. Release Forcewake of Media well.
- */
-static void vlv_set_rps_idle(struct drm_i915_private *i915)
+static int __intel_set_rps(struct drm_i915_private *i915, u8 val)
 {
-	struct intel_rps *rps = &i915->gt_pm.rps;
-	u32 val = rps->idle_freq;
-	int err;
-
-	if (rps->cur_freq <= val)
-		return;
-
-	/*
-	 * The punit delays the write of the frequency and voltage until it
-	 * determines the GPU is awake. During normal usage we don't want to
-	 * waste power changing the frequency if the GPU is sleeping (rc6).
-	 * However, the GPU and driver is now idle and we do not want to delay
-	 * switching to minimum voltage (reducing power whilst idle) as we do
-	 * not expect to be woken in the near future and so must flush the
-	 * change by waking the device.
-	 *
-	 * We choose to take the media powerwell (either would do to trick the
-	 * punit into committing the voltage change) as that takes a lot less
-	 * power than the render powerwell.
-	 */
-	intel_uncore_forcewake_get(i915, FORCEWAKE_MEDIA);
-	err = valleyview_set_rps(i915, val);
-	intel_uncore_forcewake_put(i915, FORCEWAKE_MEDIA);
-
-	if (err)
-		DRM_ERROR("Failed to set RPS for idle\n");
+	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+		return valleyview_set_rps(i915, val);
+	else if (INTEL_GEN(i915) >= 6)
+		return gen6_set_rps(i915, val);
+	else
+		return 0;
 }
 
 static int intel_set_rps(struct drm_i915_private *i915, u8 val)
@@ -427,20 +389,20 @@ static int intel_set_rps(struct drm_i915_private *i915, u8 val)
 	int err;
 
 	lockdep_assert_held(&rps->lock);
+	GEM_BUG_ON(!rps->active);
 	GEM_BUG_ON(val > rps->max_freq);
 	GEM_BUG_ON(val < rps->min_freq);
 
-	if (!rps->enabled) {
+	err = __intel_set_rps(i915, val);
+	if (err)
+		return err;
+
+	if (val != rps->cur_freq) {
+		trace_intel_gpu_freq_change(intel_gpu_freq(i915, val));
 		rps->cur_freq = val;
-		return 0;
 	}
 
-	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
-		err = valleyview_set_rps(i915, val);
-	else
-		err = gen6_set_rps(i915, val);
-
-	return err;
+	return 0;
 }
 
 static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
@@ -538,18 +500,11 @@ static void enable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	if (READ_ONCE(rps->interrupts_enabled))
-		return;
-
 	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
 		return;
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	WARN_ON_ONCE(rps->pm_iir);
-	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & rps->pm_events);
-	rps->interrupts_enabled = true;
 	gen6_enable_pm_irq(dev_priv, rps->pm_events);
-
 	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
@@ -557,29 +512,15 @@ static void disable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	if (!READ_ONCE(rps->interrupts_enabled))
-		return;
-
 	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
 		return;
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	rps->interrupts_enabled = false;
-
 	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
-
 	gen6_disable_pm_irq(dev_priv, rps->pm_events);
-
 	spin_unlock_irq(&dev_priv->irq_lock);
-	synchronize_irq(dev_priv->drm.irq);
 
-	/* Now that we will not be generating any more work, flush any
-	 * outstanding tasks. As we are called on the RPS idle path,
-	 * we will reset the GPU to minimum frequencies, so the current
-	 * state of the worker can be discarded.
-	 */
-	cancel_work_sync(&rps->work);
-	gen6_reset_rps_interrupts(dev_priv);
+	synchronize_irq(dev_priv->drm.irq);
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -646,6 +587,9 @@ static void intel_rps_work(struct work_struct *work)
 
 	mutex_lock(&rps->lock);
 
+	if (!rps->active)
+		goto unlock;
+
 	min = rps->min_freq_softlimit;
 	max = rps->max_freq_softlimit;
 	if (client_boost && max < rps->boost_freq)
@@ -694,106 +638,125 @@ static void intel_rps_work(struct work_struct *work)
 		adj = 0;
 	}
 
-	mutex_unlock(&rps->lock);
-
 	if (pm_iir) {
 		spin_lock_irq(&i915->irq_lock);
-		if (rps->interrupts_enabled)
-			gen6_unmask_pm_irq(i915, rps->pm_events);
+		gen6_unmask_pm_irq(i915, rps->pm_events);
 		spin_unlock_irq(&i915->irq_lock);
 		rps->last_adj = adj;
 	}
+
+unlock:
+	mutex_unlock(&rps->lock);
 }
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	if (pm_iir & rps->pm_events) {
+	if (rps->active && pm_iir & rps->pm_events) {
 		spin_lock(&dev_priv->irq_lock);
 		gen6_mask_pm_irq(dev_priv, pm_iir & rps->pm_events);
-		if (rps->interrupts_enabled) {
-			rps->pm_iir |= pm_iir & rps->pm_events;
-			schedule_work(&rps->work);
-		}
+		rps->pm_iir |= pm_iir & rps->pm_events;
 		spin_unlock(&dev_priv->irq_lock);
+
+		schedule_work(&rps->work);
 	}
 }
 
-void gen6_rps_busy(struct drm_i915_private *dev_priv)
+void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u8 freq;
 
 	if (!HAS_RPS(dev_priv))
 		return;
 
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		u8 freq;
+	GEM_BUG_ON(rps->pm_iir);
+	GEM_BUG_ON(rps->active);
 
-		I915_WRITE(GEN6_PMINTRMSK,
-			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
+	mutex_lock(&rps->lock);
+	rps->active = true;
 
-		enable_rps_interrupts(dev_priv);
-		memset(&rps->ei, 0, sizeof(rps->ei));
+	/*
+	 * Use the user's desired frequency as a guide, but for better
+	 * performance, jump directly to RPe as our starting frequency.
+	 */
+	freq = max(rps->cur_freq, rps->efficient_freq);
+	if (intel_set_rps(dev_priv,
+			  clamp(freq,
+				rps->min_freq_softlimit,
+				rps->max_freq_softlimit)))
+		DRM_DEBUG_DRIVER("Failed to set busy frequency\n");
 
-		/*
-		 * Use the user's desired frequency as a guide, but for better
-		 * performance, jump directly to RPe as our starting frequency.
-		 */
-		freq = max(rps->cur_freq, rps->efficient_freq);
+	rps->last_adj = 0;
 
-		if (intel_set_rps(dev_priv,
-				  clamp(freq,
-					rps->min_freq_softlimit,
-					rps->max_freq_softlimit)))
-			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
+	if (INTEL_GEN(dev_priv) >= 6) {
+		memset(&rps->ei, 0, sizeof(rps->ei));
+		enable_rps_interrupts(dev_priv);
 	}
+
 	mutex_unlock(&rps->lock);
 }
 
-void gen6_rps_idle(struct drm_i915_private *dev_priv)
+void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	if (!HAS_RPS(dev_priv))
+	if (!rps->active)
 		return;
 
-	/*
-	 * Flush our bottom-half so that it does not race with us
-	 * setting the idle frequency and so that it is bounded by
-	 * our rpm wakeref. And then disable the interrupts to stop any
-	 * futher RPS reclocking whilst we are asleep.
-	 */
+	mutex_lock(&rps->lock);
+
 	disable_rps_interrupts(dev_priv);
 
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-			vlv_set_rps_idle(dev_priv);
-		else
-			gen6_set_rps(dev_priv, rps->idle_freq);
-		rps->last_adj = 0;
+	if (rps->cur_freq > rps->idle_freq) {
+		/*
+		 * The punit delays the write of the frequency and voltage
+		 * until it determines the GPU is awake. During normal usage we
+		 * don't want to waste power changing the frequency if the GPU
+		 * is sleeping (rc6).  However, the GPU and driver is now idle
+		 * and we do not want to delay switching to minimum voltage
+		 * (reducing power whilst idle) as we do not expect to be woken
+		 * in the near future and so must flush the change by waking
+		 * the device.
+		 *
+		 * We choose to take the media powerwell (either would do to
+		 * trick the punit into committing the voltage change) as that
+		 * takes a lot less power than the render powerwell.
+		 */
+		intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
+		if (__intel_set_rps(dev_priv, rps->idle_freq))
+			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
+		rps->cur_freq = rps->idle_freq;
+		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
+	}
+
+	if (INTEL_GEN(dev_priv) >= 6) {
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 	}
+
+	rps->last_adj = 0;
+	rps->active = false;
 	mutex_unlock(&rps->lock);
+
+	/*
+	 * Now that we will not be generating any more work, flush any
+	 * outstanding tasks. As we are called on the RPS idle path,
+	 * we will reset the GPU to minimum frequencies, so the current
+	 * state of the worker can be discarded.
+	 */
+	cancel_work_sync(&rps->work);
+	gen6_reset_rps_interrupts(dev_priv);
 }
 
-void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
+void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 {
 	struct intel_rps *rps = &rq->i915->gt_pm.rps;
 	unsigned long flags;
 	bool boost;
 
-	if (!HAS_RPS(rq->i915))
-		return;
-
-	/*
-	 * This is intentionally racy! We peek at the state here, then
-	 * validate inside the RPS worker.
-	 */
-	if (!rps->enabled)
+	if (!READ_ONCE(rps->active))
 		return;
 
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
@@ -1005,20 +968,6 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void reset_rps(struct drm_i915_private *i915,
-		      int (*set)(struct drm_i915_private *, u8))
-{
-	struct intel_rps *rps = &i915->gt_pm.rps;
-	u8 freq = rps->cur_freq;
-
-	/* force a reset */
-	rps->power = -1;
-	rps->cur_freq = -1;
-
-	if (set(i915, freq))
-		DRM_ERROR("Failed to reset RPS to initial values\n");
-}
-
 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
 {
@@ -1040,7 +989,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv)
 	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
 	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers.
 	 */
-	reset_rps(dev_priv, gen6_set_rps);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
@@ -1210,8 +1158,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_AVG);
 
-	reset_rps(dev_priv, gen6_set_rps);
-
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
@@ -1311,8 +1257,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
 
-	reset_rps(dev_priv, gen6_set_rps);
-
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
@@ -1829,8 +1773,6 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
 
-	reset_rps(dev_priv, valleyview_set_rps);
-
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
@@ -1915,8 +1857,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
 
-	reset_rps(dev_priv, valleyview_set_rps);
-
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
@@ -2403,11 +2343,7 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
 void intel_gt_pm_sanitize(struct drm_i915_private *i915)
 {
 	intel_gt_pm_disable_llc(i915);
-
-	i915->gt_pm.rc6.enabled = true; /* force RC6 disabling */
 	intel_gt_pm_disable_rc6(i915);
-
-	i915->gt_pm.rps.enabled = true; /* force RPS disabling */
 	intel_gt_pm_disable_rps(i915);
 
 	if (INTEL_GEN(i915) >= 11)
@@ -2507,9 +2443,6 @@ static void __enable_rc6(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
-	if (i915->gt_pm.rc6.enabled)
-		return;
-
 	if (IS_CHERRYVIEW(i915))
 		cherryview_enable_rc6(i915);
 	else if (IS_VALLEYVIEW(i915))
@@ -2520,8 +2453,6 @@ static void __enable_rc6(struct drm_i915_private *i915)
 		gen8_enable_rc6(i915);
 	else if (INTEL_GEN(i915) >= 6)
 		gen6_enable_rc6(i915);
-
-	i915->gt_pm.rc6.enabled = true;
 }
 
 void intel_gt_pm_enable_rc6(struct drm_i915_private *i915)
@@ -2540,9 +2471,6 @@ static void __enable_rps(struct drm_i915_private *i915)
 
 	lockdep_assert_held(&rps->lock);
 
-	if (rps->enabled)
-		return;
-
 	if (IS_CHERRYVIEW(i915)) {
 		cherryview_enable_rps(i915);
 	} else if (IS_VALLEYVIEW(i915)) {
@@ -2564,7 +2492,12 @@ static void __enable_rps(struct drm_i915_private *i915)
 	WARN_ON(rps->efficient_freq < rps->min_freq);
 	WARN_ON(rps->efficient_freq > rps->max_freq);
 
-	rps->enabled = true;
+	/* Force a reset */
+	rps->cur_freq = rps->max_freq;
+	rps->power = -1;
+	__intel_set_rps(i915, rps->idle_freq);
+
+	rps->cur_freq = rps->idle_freq;
 }
 
 void intel_gt_pm_enable_rps(struct drm_i915_private *i915)
@@ -2581,11 +2514,7 @@ static void __enable_llc(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
-	if (i915->gt_pm.llc_pstate.enabled)
-		return;
-
 	gen6_update_ring_freq(i915);
-	i915->gt_pm.llc_pstate.enabled = true;
 }
 
 void intel_gt_pm_enable_llc(struct drm_i915_private *i915)
@@ -2602,9 +2531,6 @@ static void __disable_rc6(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
-	if (!i915->gt_pm.rc6.enabled)
-		return;
-
 	if (INTEL_GEN(i915) >= 9)
 		gen9_disable_rc6(i915);
 	else if (IS_CHERRYVIEW(i915))
@@ -2613,8 +2539,6 @@ static void __disable_rc6(struct drm_i915_private *i915)
 		valleyview_disable_rc6(i915);
 	else if (INTEL_GEN(i915) >= 6)
 		gen6_disable_rc6(i915);
-
-	i915->gt_pm.rc6.enabled = false;
 }
 
 void intel_gt_pm_disable_rc6(struct drm_i915_private *i915)
@@ -2628,9 +2552,6 @@ static void __disable_rps(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
-	if (!i915->gt_pm.rps.enabled)
-		return;
-
 	if (INTEL_GEN(i915) >= 9)
 		gen9_disable_rps(i915);
 	else if (IS_CHERRYVIEW(i915))
@@ -2641,8 +2562,6 @@ static void __disable_rps(struct drm_i915_private *i915)
 		gen6_disable_rps(i915);
 	else if (INTEL_GEN(i915) >= 5)
 		ironlake_disable_drps(i915);
-
-	i915->gt_pm.rps.enabled = false;
 }
 
 void intel_gt_pm_disable_rps(struct drm_i915_private *i915)
@@ -2652,21 +2571,9 @@ void intel_gt_pm_disable_rps(struct drm_i915_private *i915)
 	mutex_unlock(&i915->gt_pm.rps.lock);
 }
 
-static void __disable_llc(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (!i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	i915->gt_pm.llc_pstate.enabled = false;
-}
-
 void intel_gt_pm_disable_llc(struct drm_i915_private *i915)
 {
-	mutex_lock(&i915->gt_pm.rps.lock);
-	__disable_llc(i915);
-	mutex_unlock(&i915->gt_pm.rps.lock);
+	/* Nothing to do here. */
 }
 
 void intel_gt_pm_fini(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 0d95a52c1941..029114155ee4 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -19,14 +19,10 @@ struct intel_rps_ei {
 
 struct intel_rps {
 	struct mutex lock;
-
-	/*
-	 * work, interrupts_enabled and pm_iir are protected by
-	 * i915->irq_lock
-	 */
 	struct work_struct work;
-	bool interrupts_enabled;
-	u32 pm_iir;
+
+	bool active;
+	u32 pm_iir; /* protected by i915->irq_lock */
 
 	/* PM interrupt bits that should never be masked */
 	u32 pm_intrmsk_mbz;
@@ -63,7 +59,6 @@ struct intel_rps {
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
-	bool enabled;
 	atomic_t num_waiters;
 	atomic_t boosts;
 
@@ -72,19 +67,13 @@ struct intel_rps {
 };
 
 struct intel_rc6 {
-	bool enabled;
 	u64 prev_hw_residency[4];
 	u64 cur_residency[4];
 };
 
-struct intel_llc_pstate {
-	bool enabled;
-};
-
 struct intel_gt_pm {
-	struct intel_rps rps;
 	struct intel_rc6 rc6;
-	struct intel_llc_pstate llc_pstate;
+	struct intel_rps rps;
 
 	u32 imr;
 	u32 ier;
@@ -108,11 +97,12 @@ void intel_gt_pm_disable_rc6(struct drm_i915_private *i915);
 void intel_gt_pm_enable_llc(struct drm_i915_private *i915);
 void intel_gt_pm_disable_llc(struct drm_i915_private *i915);
 
+void intel_gt_pm_busy(struct drm_i915_private *i915);
+void intel_gt_pm_idle(struct drm_i915_private *i915);
+
 void intel_gt_pm_irq_handler(struct drm_i915_private *i915, u32 pm_iir);
 
-void gen6_rps_busy(struct drm_i915_private *i915);
-void gen6_rps_idle(struct drm_i915_private *i915);
-void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
+void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 
 int intel_gpu_freq(const struct drm_i915_private *i915, int val);
 int intel_freq_opcode(const struct drm_i915_private *i915, int val);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [PATCH 056/262] drm/i915: Refactor frequency bounds computation
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (53 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 055/262] drm/i915: Simplify rc6/rps enabling Chris Wilson
@ 2018-05-17  6:04 ` Chris Wilson
  2018-05-17  6:12 ` [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:04 UTC (permalink / raw)
  To: intel-gfx

When choosing the initial frequency in intel_gt_pm_busy() we also need
to calculate the current min/max bounds. As this calculation is going to
become more complex with the intersection of several different limits,
refactor it to a common function. The alternative wold be to feed the
initial reclocking through the RPS worker, but the latency in this case
is undesirable.

v2: Only apply the rps->last_adj update if the frequency was unclamped.
The intention is that we don't continue to accumulate the adjustment
when we hit the bounds.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/intel_gt_pm.c | 57 +++++++++++-------------------
 1 file changed, 21 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 328f15db22e4..ac653a37d9ba 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -383,15 +383,25 @@ static int __intel_set_rps(struct drm_i915_private *i915, u8 val)
 		return 0;
 }
 
-static int intel_set_rps(struct drm_i915_private *i915, u8 val)
+static int adjust_rps(struct drm_i915_private *i915, int freq, int adj)
 {
 	struct intel_rps *rps = &i915->gt_pm.rps;
+	int min, max, val;
 	int err;
 
 	lockdep_assert_held(&rps->lock);
 	GEM_BUG_ON(!rps->active);
-	GEM_BUG_ON(val > rps->max_freq);
-	GEM_BUG_ON(val < rps->min_freq);
+
+	min = rps->min_freq_softlimit;
+	max = rps->max_freq_softlimit;
+	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
+		max = rps->boost_freq;
+
+	GEM_BUG_ON(min < rps->min_freq);
+	GEM_BUG_ON(max > rps->max_freq);
+	GEM_BUG_ON(max < min);
+
+	val = clamp(freq + adj, min, max);
 
 	err = __intel_set_rps(i915, val);
 	if (err)
@@ -400,6 +410,7 @@ static int intel_set_rps(struct drm_i915_private *i915, u8 val)
 	if (val != rps->cur_freq) {
 		trace_intel_gpu_freq_change(intel_gpu_freq(i915, val));
 		rps->cur_freq = val;
+		rps->last_adj = val == freq + adj ? adj : 0;
 	}
 
 	return 0;
@@ -576,8 +587,8 @@ static void intel_rps_work(struct work_struct *work)
 	struct drm_i915_private *i915 =
 		container_of(work, struct drm_i915_private, gt_pm.rps.work);
 	struct intel_rps *rps = &i915->gt_pm.rps;
-	int freq, adj, min, max;
 	bool client_boost;
+	int freq, adj;
 	u32 pm_iir;
 
 	pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events;
@@ -590,15 +601,6 @@ static void intel_rps_work(struct work_struct *work)
 	if (!rps->active)
 		goto unlock;
 
-	min = rps->min_freq_softlimit;
-	max = rps->max_freq_softlimit;
-	if (client_boost && max < rps->boost_freq)
-		max = rps->boost_freq;
-
-	GEM_BUG_ON(min < rps->min_freq);
-	GEM_BUG_ON(max > rps->max_freq);
-	GEM_BUG_ON(max < min);
-
 	adj = rps->last_adj;
 	freq = rps->cur_freq;
 	if (client_boost && freq < rps->boost_freq) {
@@ -609,16 +611,13 @@ static void intel_rps_work(struct work_struct *work)
 			adj *= 2;
 		else /* CHV needs even encode values */
 			adj = IS_CHERRYVIEW(i915) ? 2 : 1;
-
-		if (freq >= max)
-			adj = 0;
 	} else if (client_boost) {
 		adj = 0;
 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
-		if (freq > max_t(int, rps->efficient_freq, min))
-			freq = max_t(int, rps->efficient_freq, min);
-		else if (freq > min_t(int, rps->efficient_freq, min))
-			freq = min_t(int, rps->efficient_freq, min);
+		if (freq > rps->efficient_freq)
+			freq = rps->efficient_freq;
+		else if (freq > rps->idle_freq)
+			freq = rps->idle_freq;
 
 		adj = 0;
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
@@ -626,23 +625,17 @@ static void intel_rps_work(struct work_struct *work)
 			adj *= 2;
 		else /* CHV needs even encode values */
 			adj = IS_CHERRYVIEW(i915) ? -2 : -1;
-
-		if (freq <= min)
-			adj = 0;
 	} else { /* unknown/external event */
 		adj = 0;
 	}
 
-	if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
+	if (adjust_rps(i915, freq, adj))
 		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
-		adj = 0;
-	}
 
 	if (pm_iir) {
 		spin_lock_irq(&i915->irq_lock);
 		gen6_unmask_pm_irq(i915, rps->pm_events);
 		spin_unlock_irq(&i915->irq_lock);
-		rps->last_adj = adj;
 	}
 
 unlock:
@@ -666,7 +659,6 @@ void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u8 freq;
 
 	if (!HAS_RPS(dev_priv))
 		return;
@@ -681,14 +673,7 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
 	 * Use the user's desired frequency as a guide, but for better
 	 * performance, jump directly to RPe as our starting frequency.
 	 */
-	freq = max(rps->cur_freq, rps->efficient_freq);
-	if (intel_set_rps(dev_priv,
-			  clamp(freq,
-				rps->min_freq_softlimit,
-				rps->max_freq_softlimit)))
-		DRM_DEBUG_DRIVER("Failed to set busy frequency\n");
-
-	rps->last_adj = 0;
+	adjust_rps(dev_priv, max(rps->cur_freq, rps->efficient_freq), 0);
 
 	if (INTEL_GEN(dev_priv) >= 6) {
 		memset(&rps->ei, 0, sizeof(rps->ei));
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* Re: [PATCH 001/262] drm/i915: Move request->ctx aside
  2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
                   ` (54 preceding siblings ...)
  2018-05-17  6:04 ` [PATCH 056/262] drm/i915: Refactor frequency bounds computation Chris Wilson
@ 2018-05-17  6:12 ` Chris Wilson
  55 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-17  6:12 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2018-05-17 07:03:17)
> In the next patch, we want to store the intel_context pointer inside
> i915_request, as it is frequently access via a convoluted dance when
> submitting the request to hw. Having two context pointers inside
> i915_request leads to confusion so first rename the existing
> i915_gem_context pointer to i915_request.gem_context.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

/o\ Oops, you weren't meant to get all 262. Still too early. I think I
need more than a cup of tea this morning.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH 029/262] drm/i915: Track the purgeable objects on a separate eviction list
  2018-05-17  6:03 ` [PATCH 029/262] drm/i915: Track the purgeable objects on a separate eviction list Chris Wilson
@ 2018-05-18 11:36   ` Matthew Auld
  2018-05-18 11:59     ` Chris Wilson
  0 siblings, 1 reply; 62+ messages in thread
From: Matthew Auld @ 2018-05-18 11:36 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On 17 May 2018 at 07:03, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Currently the purgeable objects, I915_MADV_DONTNEED, as mixed in the
> normal bound/unbound lists. Every shrinker pass starts with an attempt
> to purge from this set of unneeded objects, which entails us doing a
> walk over both lists looking for any candidates. If there are none, and
> since we are shrinking we can reasonably assume that the lists are
> full!, this becomes a very slow futile walk.
>
> If we separate out the purgeable objects into own list, this search then
> becomes its own phase that is preferentially handled during shrinking.
> Instead the cost becomes that we then need to filter the purgeable list
> if we want to distinguish between bound and unbound objects.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          | 13 ++++---
>  drivers/gpu/drm/i915/i915_gem.c          | 49 ++++++++++++++++++------
>  drivers/gpu/drm/i915/i915_gem_shrinker.c | 28 +++++++-------
>  drivers/gpu/drm/i915/i915_vma.c          |  2 +-
>  4 files changed, 60 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 56ffdd523893..1eeee043e164 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -926,6 +926,10 @@ struct i915_gem_mm {
>          * not actually have any pages attached.
>          */
>         struct list_head unbound_list;
> +       /**
> +        * List of objects which are purgeable. May be active.
> +        */
> +       struct list_head purge_list;
>
>         /** List of all objects in gtt_space, currently mmaped by userspace.
>          * All objects within this list must also be on bound_list.
> @@ -3310,11 +3314,10 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915,
>                               unsigned long target,
>                               unsigned long *nr_scanned,
>                               unsigned flags);
> -#define I915_SHRINK_PURGEABLE 0x1
> -#define I915_SHRINK_UNBOUND 0x2
> -#define I915_SHRINK_BOUND 0x4
> -#define I915_SHRINK_ACTIVE 0x8
> -#define I915_SHRINK_VMAPS 0x10
> +#define I915_SHRINK_UNBOUND BIT(0)
> +#define I915_SHRINK_BOUND BIT(1)
> +#define I915_SHRINK_ACTIVE BIT(2)
> +#define I915_SHRINK_VMAPS BIT(3)
>  unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
>  void i915_gem_shrinker_register(struct drm_i915_private *i915);
>  void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 42d95f6490f8..be3092a03722 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1672,8 +1672,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>
>  static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
>  {
> -       struct drm_i915_private *i915;
> -       struct list_head *list;
>         struct i915_vma *vma;
>
>         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> @@ -1688,11 +1686,16 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
>                 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
>         }
>
> -       i915 = to_i915(obj->base.dev);
> -       spin_lock(&i915->mm.obj_lock);
> -       list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
> -       list_move_tail(&obj->mm.link, list);
> -       spin_unlock(&i915->mm.obj_lock);
> +       if (obj->mm.madv == I915_MADV_WILLNEED) {
> +               struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +               struct list_head *list;
> +
> +               spin_lock(&i915->mm.obj_lock);
> +               list = obj->bind_count ?
> +                       &i915->mm.bound_list : &i915->mm.unbound_list;
> +               list_move_tail(&obj->mm.link, list);
> +               spin_unlock(&i915->mm.obj_lock);
> +       }
>  }
>
>  /**
> @@ -2531,7 +2534,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>         sg_page_sizes = 0;
>         for (i = 0; i < page_count; i++) {
>                 const unsigned int shrink[] = {
> -                       I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
> +                       I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
>                         0,
>                 }, *s = shrink;
>                 gfp_t gfp = noreclaim;
> @@ -4519,7 +4522,7 @@ int
>  i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>                        struct drm_file *file_priv)
>  {
> -       struct drm_i915_private *dev_priv = to_i915(dev);
> +       struct drm_i915_private *i915 = to_i915(dev);
>         struct drm_i915_gem_madvise *args = data;
>         struct drm_i915_gem_object *obj;
>         int err;
> @@ -4542,7 +4545,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>
>         if (i915_gem_object_has_pages(obj) &&
>             i915_gem_object_is_tiled(obj) &&
> -           dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
> +           i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
>                 if (obj->mm.madv == I915_MADV_WILLNEED) {
>                         GEM_BUG_ON(!obj->mm.quirked);
>                         __i915_gem_object_unpin_pages(obj);
> @@ -4558,6 +4561,20 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>         if (obj->mm.madv != __I915_MADV_PURGED)
>                 obj->mm.madv = args->madv;
>
> +       if (i915_gem_object_has_pages(obj)) {
> +               struct list_head *list;
> +
> +               spin_lock(&i915->mm.obj_lock);
> +               if (obj->mm.madv != I915_MADV_WILLNEED)
> +                       list = &i915->mm.purge_list;
> +               else if (obj->bind_count)
> +                       list = &i915->mm.bound_list;
> +               else
> +                       list = &i915->mm.unbound_list;
> +               list_move_tail(&obj->mm.link, list);
> +               spin_unlock(&i915->mm.obj_lock);
> +       }
> +
>         /* if the object is no longer attached, discard its backing storage */
>         if (obj->mm.madv == I915_MADV_DONTNEED &&
>             !i915_gem_object_has_pages(obj))
> @@ -4876,9 +4893,18 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>         if (obj->mm.quirked)
>                 __i915_gem_object_unpin_pages(obj);
>
> -       if (discard_backing_storage(obj))
> +       if (discard_backing_storage(obj)) {
> +               struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +
>                 obj->mm.madv = I915_MADV_DONTNEED;
>
> +               if (i915_gem_object_has_pages(obj)) {
> +                       spin_lock(&i915->mm.obj_lock);
> +                       list_move_tail(&obj->mm.link, &i915->mm.purge_list);
> +                       spin_unlock(&i915->mm.obj_lock);
> +               }
> +       }
> +
>         /*
>          * Before we free the object, make sure any pure RCU-only
>          * read-side critical sections are complete, e.g.
> @@ -5527,6 +5553,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
>
>         init_llist_head(&i915->mm.free_list);
>
> +       INIT_LIST_HEAD(&i915->mm.purge_list);
>         INIT_LIST_HEAD(&i915->mm.unbound_list);
>         INIT_LIST_HEAD(&i915->mm.bound_list);
>         INIT_LIST_HEAD(&i915->mm.fence_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 5757fb7c4b5a..ea5a6e2d62d2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -151,6 +151,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>                 struct list_head *list;
>                 unsigned int bit;
>         } phases[] = {
> +               { &i915->mm.purge_list, 0 },
>                 { &i915->mm.unbound_list, I915_SHRINK_UNBOUND },
>                 { &i915->mm.bound_list, I915_SHRINK_BOUND },
>                 { NULL, 0 },
> @@ -182,8 +183,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>          * device just to recover a little memory. If absolutely necessary,
>          * we will force the wake during oom-notifier.
>          */
> -       if ((flags & I915_SHRINK_BOUND) &&
> -           !intel_runtime_pm_get_if_in_use(i915))
> +       if (flags & I915_SHRINK_BOUND && !intel_runtime_pm_get_if_in_use(i915))
>                 flags &= ~I915_SHRINK_BOUND;
>
>         /*
> @@ -209,7 +209,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
>                 struct list_head still_in_list;
>                 struct drm_i915_gem_object *obj;
>
> -               if ((flags & phase->bit) == 0)
> +               if (phase->bit && (flags & phase->bit) == 0)
>                         continue;
>
>                 INIT_LIST_HEAD(&still_in_list);
> @@ -228,10 +228,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
>                                                        mm.link))) {
>                         list_move_tail(&obj->mm.link, &still_in_list);
>
> -                       if (flags & I915_SHRINK_PURGEABLE &&
> -                           obj->mm.madv != I915_MADV_DONTNEED)
> -                               continue;
> -
>                         if (flags & I915_SHRINK_VMAPS &&
>                             !is_vmalloc_addr(obj->mm.mapping))
>                                 continue;
> @@ -241,6 +237,10 @@ i915_gem_shrink(struct drm_i915_private *i915,
>                              i915_gem_object_is_framebuffer(obj)))
>                                 continue;
>
> +                       if (!(flags & I915_SHRINK_BOUND) &&
> +                           READ_ONCE(obj->bind_count))
> +                               continue;
> +
>                         if (!can_release_pages(obj))
>                                 continue;
>
> @@ -325,6 +325,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
>                         count += obj->base.size >> PAGE_SHIFT;
>                         num_objects++;
>                 }
> +       list_for_each_entry(obj, &i915->mm.purge_list, mm.link)
> +               if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) {
> +                       count += obj->base.size >> PAGE_SHIFT;
> +                       num_objects++;
> +               }
>         spin_unlock(&i915->mm.obj_lock);
>
>         /* Update our preferred vmscan batch size for the next pass.
> @@ -361,14 +366,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>                                 sc->nr_to_scan,
>                                 &sc->nr_scanned,
>                                 I915_SHRINK_BOUND |
> -                               I915_SHRINK_UNBOUND |
> -                               I915_SHRINK_PURGEABLE);
> -       if (sc->nr_scanned < sc->nr_to_scan)
> -               freed += i915_gem_shrink(i915,
> -                                        sc->nr_to_scan - sc->nr_scanned,
> -                                        &sc->nr_scanned,
> -                                        I915_SHRINK_BOUND |
> -                                        I915_SHRINK_UNBOUND);
> +                               I915_SHRINK_UNBOUND);
>         if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
>                 intel_runtime_pm_get(i915);
>                 freed += i915_gem_shrink(i915,
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 9324d476e0a7..96039c4ef434 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -624,7 +624,7 @@ i915_vma_remove(struct i915_vma *vma)
>          * no more VMAs exist.
>          */
>         spin_lock(&i915->mm.obj_lock);
> -       if (--obj->bind_count == 0)
> +       if (--obj->bind_count == 0 && obj->mm.madv == I915_MADV_WILLNEED)

How does this play with volatile objects, like gem object internal,
since they are DONTNEED while pinned?

>                 list_move_tail(&obj->mm.link, &i915->mm.unbound_list);
>         spin_unlock(&i915->mm.obj_lock);
>
> --
> 2.17.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH 029/262] drm/i915: Track the purgeable objects on a separate eviction list
  2018-05-18 11:36   ` Matthew Auld
@ 2018-05-18 11:59     ` Chris Wilson
  0 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-18 11:59 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Intel Graphics Development

Quoting Matthew Auld (2018-05-18 12:36:30)
> On 17 May 2018 at 07:03, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index 9324d476e0a7..96039c4ef434 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -624,7 +624,7 @@ i915_vma_remove(struct i915_vma *vma)
> >          * no more VMAs exist.
> >          */
> >         spin_lock(&i915->mm.obj_lock);
> > -       if (--obj->bind_count == 0)
> > +       if (--obj->bind_count == 0 && obj->mm.madv == I915_MADV_WILLNEED)
> 
> How does this play with volatile objects, like gem object internal,
> since they are DONTNEED while pinned?

Not brilliant as they are left on the unbound_list rather than promoted to
the purge_lit. They will still be reaped eventually, but I don't expect
it to make any difference in practice (since they are either short lived
shadow batches, or longer lived and quite-oft pinned ringbuffers). It
really just means stop poking at mm.madv directly, and probably just
make the decision to mark as purgeable from the start and play fewer
games.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH 030/262] drm/i915: Refactor unsettting obj->mm.pages
  2018-05-17  6:03 ` [PATCH 030/262] drm/i915: Refactor unsettting obj->mm.pages Chris Wilson
@ 2018-05-18 13:35   ` Matthew Auld
  0 siblings, 0 replies; 62+ messages in thread
From: Matthew Auld @ 2018-05-18 13:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On 17 May 2018 at 07:03, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> As i915_gem_object_phys_attach() wants to play dirty and mess around
> with obj->mm.pages itself (replacing the shmemfs with a DMA allocation),
> refactor the gubbins so into i915_gem_object_unset_pages() that we don't
> have to duplicate all the secrets.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 68 ++++++++++++++++++---------------
>  1 file changed, 37 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index be3092a03722..4e480874563f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2410,29 +2410,15 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
>         rcu_read_unlock();
>  }
>
> -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> -                                enum i915_mm_subclass subclass)
> +static struct sg_table *
> +__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
>  {
>         struct drm_i915_private *i915 = to_i915(obj->base.dev);
>         struct sg_table *pages;
>
> -       if (i915_gem_object_has_pinned_pages(obj))
> -               return;
> -
> -       GEM_BUG_ON(obj->bind_count);
> -       if (!i915_gem_object_has_pages(obj))
> -               return;
> -
> -       /* May be called by shrinker from within get_pages() (on another bo) */
> -       mutex_lock_nested(&obj->mm.lock, subclass);
> -       if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
> -               goto unlock;
> -
> -       /* ->put_pages might need to allocate memory for the bit17 swizzle
> -        * array, hence protect them from being reaped by removing them from gtt
> -        * lists early. */
>         pages = fetch_and_zero(&obj->mm.pages);
> -       GEM_BUG_ON(!pages);
> +       if (!pages)
> +               return NULL;
>
>         spin_lock(&i915->mm.obj_lock);
>         list_del(&obj->mm.link);
> @@ -2451,12 +2437,37 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>         }
>
>         __i915_gem_object_reset_page_iter(obj);
> +       obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
> +
> +       return pages;
> +}
> +
> +void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> +                                enum i915_mm_subclass subclass)
> +{
> +       struct sg_table *pages;
> +
> +       if (i915_gem_object_has_pinned_pages(obj))
> +               return;
>
> +       GEM_BUG_ON(obj->bind_count);
> +       if (!i915_gem_object_has_pages(obj))
> +               return;
> +
> +       /* May be called by shrinker from within get_pages() (on another bo) */
> +       mutex_lock_nested(&obj->mm.lock, subclass);
> +       if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
> +               goto unlock;
> +
> +       /*
> +        * ->put_pages might need to allocate memory for the bit17 swizzle
> +        * array, hence protect them from being reaped by removing them from gtt
> +        * lists early.
> +        */
> +       pages = __i915_gem_object_unset_pages(obj);
>         if (!IS_ERR(pages))
>                 obj->ops->put_pages(obj, pages);
>
> -       obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
> -
>  unlock:
>         mutex_unlock(&obj->mm.lock);
>  }
> @@ -6013,16 +6024,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
>                 goto err_unlock;
>         }
>
> -       pages = fetch_and_zero(&obj->mm.pages);
> -       if (pages) {
> -               struct drm_i915_private *i915 = to_i915(obj->base.dev);
> -
> -               __i915_gem_object_reset_page_iter(obj);
> -
> -               spin_lock(&i915->mm.obj_lock);
> -               list_del(&obj->mm.link);
> -               spin_unlock(&i915->mm.obj_lock);
> -       }
> +       pages = __i915_gem_object_unset_pages(obj);
>
>         obj->ops = &i915_gem_phys_ops;
>
> @@ -6040,7 +6042,11 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
>
>  err_xfer:
>         obj->ops = &i915_gem_object_ops;
> -       obj->mm.pages = pages;
> +       if (!IS_ERR(pages)) {

!IS_ERR_OR_NULL

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH 031/262] drm/i915: Report all objects with allocated pages to the shrinker
  2018-05-17  6:03 ` [PATCH 031/262] drm/i915: Report all objects with allocated pages to the shrinker Chris Wilson
@ 2018-05-18 16:42   ` Matthew Auld
  2018-05-18 16:45     ` Chris Wilson
  0 siblings, 1 reply; 62+ messages in thread
From: Matthew Auld @ 2018-05-18 16:42 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On 17 May 2018 at 07:03, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Currently, we try to report to the shrinker the precise number of
> objects (pages) that are available to be reaped at this moment. This
> requires searching all objects with allocated pages to see if they
> fulfill the search criteria, and this count is performed quite
> frequently. (The shrinker tries to free ~128 pages on each invocation,
> before which we count all the objects; counting takes longer than
> unbinding the objects!) If we take the pragmatic view that with
> sufficient desire, all objects are eventually reapable (they become
> inactive, or no longer used as framebuffer etc), we can simply return
> the count of pinned pages maintained during get_pages/put_pages rather
> than walk the lists every time.
>
> The downside is that we may (slightly) over-report the number of
> objects/pages we could shrink and so penalize ourselves by shrinking
> more than required. This is mitigated by keeping the order in which we
> shrink objects such that we avoid penalizing active and frequently used
> objects, and if memory is so tight that we need to free them we would
> need to anyway.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      |  2 +-
>  drivers/gpu/drm/i915/i915_drv.h          |  1 -
>  drivers/gpu/drm/i915/i915_gem.c          | 27 ++++-------------------
>  drivers/gpu/drm/i915/i915_gem_shrinker.c | 28 +++++-------------------
>  4 files changed, 11 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index ee8e2ff2c426..72d2238755db 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -434,7 +434,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>         if (ret)
>                 return ret;
>
> -       seq_printf(m, "%u objects, %llu bytes\n",
> +       seq_printf(m, "%u active objects, %llu bytes\n",
>                    dev_priv->mm.object_count,
>                    dev_priv->mm.object_memory);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1eeee043e164..7fa727e62d6f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -986,7 +986,6 @@ struct i915_gem_mm {
>         uint32_t bit_6_swizzle_y;
>
>         /* accounting, useful for userland debugging */
> -       spinlock_t object_stat_lock;
>         u64 object_memory;
>         u32 object_count;
>  };
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4e480874563f..a5694b0a7e6a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -77,25 +77,6 @@ remove_mappable_node(struct drm_mm_node *node)
>         drm_mm_remove_node(node);
>  }
>
> -/* some bookkeeping */
> -static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
> -                                 u64 size)
> -{
> -       spin_lock(&dev_priv->mm.object_stat_lock);
> -       dev_priv->mm.object_count++;
> -       dev_priv->mm.object_memory += size;
> -       spin_unlock(&dev_priv->mm.object_stat_lock);
> -}
> -
> -static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
> -                                    u64 size)
> -{
> -       spin_lock(&dev_priv->mm.object_stat_lock);
> -       dev_priv->mm.object_count--;
> -       dev_priv->mm.object_memory -= size;
> -       spin_unlock(&dev_priv->mm.object_stat_lock);
> -}
> -
>  static int
>  i915_gem_wait_for_error(struct i915_gpu_error *error)
>  {
> @@ -2422,6 +2403,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
>
>         spin_lock(&i915->mm.obj_lock);
>         list_del(&obj->mm.link);
> +       i915->mm.object_count--;
> +       i915->mm.object_memory -= obj->base.size;
>         spin_unlock(&i915->mm.obj_lock);
>
>         if (obj->mm.mapping) {
> @@ -2708,6 +2691,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>         GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
>
>         spin_lock(&i915->mm.obj_lock);
> +       i915->mm.object_count++;
> +       i915->mm.object_memory += obj->base.size;

Is it not worthwhile keeping the i915_gem_object_is_shrinkable() check?
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [PATCH 031/262] drm/i915: Report all objects with allocated pages to the shrinker
  2018-05-18 16:42   ` Matthew Auld
@ 2018-05-18 16:45     ` Chris Wilson
  0 siblings, 0 replies; 62+ messages in thread
From: Chris Wilson @ 2018-05-18 16:45 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Intel Graphics Development

Quoting Matthew Auld (2018-05-18 17:42:27)
> On 17 May 2018 at 07:03, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >  static int
> >  i915_gem_wait_for_error(struct i915_gpu_error *error)
> >  {
> > @@ -2422,6 +2403,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
> >
> >         spin_lock(&i915->mm.obj_lock);
> >         list_del(&obj->mm.link);
> > +       i915->mm.object_count--;
> > +       i915->mm.object_memory -= obj->base.size;
> >         spin_unlock(&i915->mm.obj_lock);
> >
> >         if (obj->mm.mapping) {
> > @@ -2708,6 +2691,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
> >         GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
> >
> >         spin_lock(&i915->mm.obj_lock);
> > +       i915->mm.object_count++;
> > +       i915->mm.object_memory += obj->base.size;
> 
> Is it not worthwhile keeping the i915_gem_object_is_shrinkable() check?

If we rebrand these as i915->mm.shrink_count and shrink_memory, makes
sense.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 62+ messages in thread

end of thread, other threads:[~2018-05-18 16:45 UTC | newest]

Thread overview: 62+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-17  6:03 [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson
2018-05-17  6:03 ` [PATCH 002/262] drm/i915: Move fiddling with engine->last_retired_context Chris Wilson
2018-05-17  6:03 ` [PATCH 003/262] drm/i915: Store a pointer to intel_context in i915_request Chris Wilson
2018-05-17  6:03 ` [PATCH 004/262] drm/i915: Pull the context->pin_count dec into the common intel_context_unpin Chris Wilson
2018-05-17  6:03 ` [PATCH 005/262] drm/i915: Be irqsafe inside reset Chris Wilson
2018-05-17  6:03 ` [PATCH 006/262] drm/i915: Make intel_engine_dump irqsafe Chris Wilson
2018-05-17  6:03 ` [PATCH 007/262] drm/i915/execlists: Handle copying default context state for atomic reset Chris Wilson
2018-05-17  6:03 ` [PATCH 008/262] drm/i915: Allow init_breadcrumbs to be used from irq context Chris Wilson
2018-05-17  6:03 ` [PATCH 009/262] drm/i915/execlists: HWACK checking superseded checking port[0].count Chris Wilson
2018-05-17  6:03 ` [PATCH 010/262] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler Chris Wilson
2018-05-17  6:03 ` [PATCH 011/262] drm/i915/execlists: Double check rpm wakeref Chris Wilson
2018-05-17  6:03 ` [PATCH 012/262] drm/i915: After reset on sanitization, reset the engine backends Chris Wilson
2018-05-17  6:03 ` [PATCH 013/262] drm/i915/execlists: Reset the CSB head tracking on reset/sanitization Chris Wilson
2018-05-17  6:03 ` [PATCH 014/262] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
2018-05-17  6:03 ` [PATCH 015/262] drm/i915/execlists: Process one CSB interrupt at a time Chris Wilson
2018-05-17  6:03 ` [PATCH 016/262] drm/i915/execlists: Unify CSB access pointers Chris Wilson
2018-05-17  6:03 ` [PATCH 017/262] drm/i915/execlists: Process the CSB directly from inside the irq handler Chris Wilson
2018-05-17  6:03 ` [PATCH 018/262] drm/i915/execlists: Direct submission (avoid tasklet/ksoftirqd) Chris Wilson
2018-05-17  6:03 ` [PATCH 019/262] drm/i915: Combine gt irq ack/handlers Chris Wilson
2018-05-17  6:03 ` [PATCH 020/262] drm/i915/execlists: Force preemption via reset on timeout Chris Wilson
2018-05-17  6:03 ` [PATCH 021/262] drm/i915/execlists: Try preempt-reset from hardirq timer context Chris Wilson
2018-05-17  6:03 ` [PATCH 022/262] drm/i915/preemption: Select timeout when scheduling Chris Wilson
2018-05-17  6:03 ` [PATCH 023/262] drm/i915: Use a preemption timeout to enforce interactivity Chris Wilson
2018-05-17  6:03 ` [PATCH 024/262] drm/i915: Allow user control over preempt timeout on their important context Chris Wilson
2018-05-17  6:03 ` [PATCH 025/262] drm/mm: Reject over-sized allocation requests early Chris Wilson
2018-05-17  6:03 ` [PATCH 026/262] drm/mm: Add a search-by-address variant to only inspect a single hole Chris Wilson
2018-05-17  6:03 ` [PATCH 027/262] drm/i915: Limit searching for PIN_HIGH Chris Wilson
2018-05-17  6:03 ` [PATCH 028/262] drm/i915: Pin the ring high Chris Wilson
2018-05-17  6:03 ` [PATCH 029/262] drm/i915: Track the purgeable objects on a separate eviction list Chris Wilson
2018-05-18 11:36   ` Matthew Auld
2018-05-18 11:59     ` Chris Wilson
2018-05-17  6:03 ` [PATCH 030/262] drm/i915: Refactor unsettting obj->mm.pages Chris Wilson
2018-05-18 13:35   ` Matthew Auld
2018-05-17  6:03 ` [PATCH 031/262] drm/i915: Report all objects with allocated pages to the shrinker Chris Wilson
2018-05-18 16:42   ` Matthew Auld
2018-05-18 16:45     ` Chris Wilson
2018-05-17  6:03 ` [PATCH 032/262] drm/i915: Disable preemption and sleeping while using the punit sideband Chris Wilson
2018-05-17  6:03 ` [PATCH 033/262] drm/i915: Lift acquiring the vlv punit magic to a common sb-get Chris Wilson
2018-05-17  6:03 ` [PATCH 034/262] drm/i915: Lift sideband locking for vlv_punit_(read|write) Chris Wilson
2018-05-17  6:03 ` [PATCH 035/262] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview Chris Wilson
2018-05-17  6:03 ` [PATCH 036/262] Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3" Chris Wilson
2018-05-17  6:03 ` [PATCH 037/262] drm/i915: Replace pcu_lock with sb_lock Chris Wilson
2018-05-17  6:03 ` [PATCH 038/262] drm/i915: Separate sideband declarations to intel_sideband.h Chris Wilson
2018-05-17  6:03 ` [PATCH 039/262] drm/i915: Merge sbi read/write into a single accessor Chris Wilson
2018-05-17  6:03 ` [PATCH 040/262] drm/i915: Merge sandybridge_pcode_(read|write) Chris Wilson
2018-05-17  6:03 ` [PATCH 041/262] drm/i915: Move sandybride pcode access to intel_sideband.c Chris Wilson
2018-05-17  6:03 ` [PATCH 042/262] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
2018-05-17  6:03 ` [PATCH 043/262] drm/i915: Record logical context support in driver caps Chris Wilson
2018-05-17  6:04 ` [PATCH 044/262] drm/i915: Generalize i915_gem_sanitize() to reset contexts Chris Wilson
2018-05-17  6:04 ` [PATCH 045/262] drm/i915: Enable render context support for Ironlake (gen5) Chris Wilson
2018-05-17  6:04 ` [PATCH 046/262] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga) Chris Wilson
2018-05-17  6:04 ` [PATCH 047/262] drm/i915: Split GT powermanagement functions to intel_gt_pm.c Chris Wilson
2018-05-17  6:04 ` [PATCH 048/262] drm/i915: Move rps worker " Chris Wilson
2018-05-17  6:04 ` [PATCH 049/262] drm/i915: Move all the RPS irq handlers to intel_gt_pm Chris Wilson
2018-05-17  6:04 ` [PATCH 050/262] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info Chris Wilson
2018-05-17  6:04 ` [PATCH 051/262] drm/i915: Remove defunct intel_suspend_gt_powersave() Chris Wilson
2018-05-17  6:04 ` [PATCH 052/262] drm/i915: Reorder GT interface code Chris Wilson
2018-05-17  6:04 ` [PATCH 053/262] drm/i915: Split control of rps and rc6 Chris Wilson
2018-05-17  6:04 ` [PATCH 054/262] drm/i915: Enabling rc6 and rps have different requirements, so separate them Chris Wilson
2018-05-17  6:04 ` [PATCH 055/262] drm/i915: Simplify rc6/rps enabling Chris Wilson
2018-05-17  6:04 ` [PATCH 056/262] drm/i915: Refactor frequency bounds computation Chris Wilson
2018-05-17  6:12 ` [PATCH 001/262] drm/i915: Move request->ctx aside Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.