All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 089/190] drm/i915: Tidy execlists submission and tracking
Date: Mon, 11 Jan 2016 10:44:33 +0000	[thread overview]
Message-ID: <1452509174-16671-3-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1452509174-16671-1-git-send-email-chris@chris-wilson.co.uk>

Other than dramatically simplifying the submission code (requests ftw),
we can reduce the execlist spinlock duration and importantly avoid
having to hold it across the context switch register reads.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  20 +-
 drivers/gpu/drm/i915/i915_gem.c            |   8 +-
 drivers/gpu/drm/i915/i915_gem_request.h    |  21 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  31 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 505 +++++++++++------------------
 drivers/gpu/drm/i915/intel_lrc.h           |   3 -
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   8 +-
 7 files changed, 209 insertions(+), 387 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 15a6fddfb79b..a5ea90944bbb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2005,8 +2005,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
 		return;
 	}
 
-	seq_printf(m, "CONTEXT: %s %u\n", ring->name,
-		   intel_execlists_ctx_id(ctx_obj));
+	seq_printf(m, "CONTEXT: %s\n", ring->name);
 
 	if (!i915_gem_obj_ggtt_bound(ctx_obj))
 		seq_puts(m, "\tNot bound in GGTT\n");
@@ -2092,7 +2091,6 @@ static int i915_execlists(struct seq_file *m, void *data)
 	intel_runtime_pm_get(dev_priv);
 
 	for_each_ring(ring, dev_priv, ring_id) {
-		struct drm_i915_gem_request *head_req = NULL;
 		int count = 0;
 
 		seq_printf(m, "%s\n", ring->name);
@@ -2105,8 +2103,8 @@ static int i915_execlists(struct seq_file *m, void *data)
 		status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
 		seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
 
-		read_pointer = ring->next_context_status_buffer;
-		write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
+		read_pointer = (status_pointer >> 8) & GEN8_CSB_PTR_MASK;
+		write_pointer = status_pointer & GEN8_CSB_PTR_MASK;
 		if (read_pointer > write_pointer)
 			write_pointer += GEN8_CSB_ENTRIES;
 		seq_printf(m, "\tRead pointer: 0x%08X, write pointer 0x%08X\n",
@@ -2123,21 +2121,9 @@ static int i915_execlists(struct seq_file *m, void *data)
 		spin_lock(&ring->execlist_lock);
 		list_for_each(cursor, &ring->execlist_queue)
 			count++;
-		head_req = list_first_entry_or_null(&ring->execlist_queue,
-				struct drm_i915_gem_request, execlist_link);
 		spin_unlock(&ring->execlist_lock);
 
 		seq_printf(m, "\t%d requests in queue\n", count);
-		if (head_req) {
-			struct drm_i915_gem_object *ctx_obj;
-
-			ctx_obj = head_req->ctx->engine[ring_id].state;
-			seq_printf(m, "\tHead request id: %u\n",
-				   intel_execlists_ctx_id(ctx_obj));
-			seq_printf(m, "\tHead request tail: %u\n",
-				   head_req->tail);
-		}
-
 		seq_putc(m, '\n');
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index eb875ecd7907..054e11cff00f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2193,12 +2193,12 @@ static void i915_gem_reset_ring_cleanup(struct intel_engine_cs *engine)
 
 	if (i915.enable_execlists) {
 		spin_lock(&engine->execlist_lock);
-
-		/* list_splice_tail_init checks for empty lists */
 		list_splice_tail_init(&engine->execlist_queue,
-				      &engine->execlist_retired_req_list);
-
+				      &engine->execlist_completed);
+		memset(&engine->execlist_port, 0,
+		       sizeof(engine->execlist_port));
 		spin_unlock(&engine->execlist_lock);
+
 		intel_execlists_retire_requests(engine);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 59957d5edfdb..c2e83584f8a2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -63,10 +63,11 @@ struct drm_i915_gem_request {
 	 * This is required to calculate the maximum available ringbuffer
 	 * space without overwriting the postfix.
 	 */
-	 u32 postfix;
+	u32 postfix;
 
 	/** Position in the ringbuffer of the end of the whole request */
 	u32 tail;
+	u32 wa_tail;
 
 	/**
 	 * Context and ring buffer related to this request
@@ -99,24 +100,8 @@ struct drm_i915_gem_request {
 	/** process identifier submitting this request */
 	struct pid *pid;
 
-	/**
-	 * The ELSP only accepts two elements at a time, so we queue
-	 * context/tail pairs on a given queue (ring->execlist_queue) until the
-	 * hardware is available. The queue serves a double purpose: we also use
-	 * it to keep track of the up to 2 contexts currently in the hardware
-	 * (usually one in execution and the other queued up by the GPU): We
-	 * only remove elements from the head of the queue when the hardware
-	 * informs us that an element has been completed.
-	 *
-	 * All accesses to the queue are mediated by a spinlock
-	 * (ring->execlist_lock).
-	 */
-
 	/** Execlist link in the submission queue.*/
-	struct list_head execlist_link;
-
-	/** Execlists no. of times this request has been sent to the ELSP */
-	int elsp_submitted;
+	struct list_head execlist_link; /* guarded by engine->execlist_lock */
 };
 
 struct drm_i915_gem_request *
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 5a6251926367..f4e09952d52c 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -393,7 +393,6 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 		struct intel_ring *ring = ctx->engine[i].ring;
 		struct intel_engine_cs *engine;
 		struct drm_i915_gem_object *obj;
-		uint64_t ctx_desc;
 
 		/* TODO: We have a design issue to be solved here. Only when we
 		 * receive the first batch, we know which engine is used by the
@@ -407,8 +406,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 			break;	/* XXX: continue? */
 
 		engine = ring->engine;
-		ctx_desc = intel_lr_context_descriptor(ctx, engine);
-		lrc->context_desc = (u32)ctx_desc;
+		lrc->context_desc = engine->execlist_context_descriptor;
 
 		/* The state page is after PPHWSP */
 		lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
@@ -548,7 +546,7 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
 			WQ_NO_WCFLUSH_WAIT;
 
 	/* The GuC wants only the low-order word of the context descriptor */
-	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, rq->engine);
+	wqi->context_desc = rq->engine->execlist_context_descriptor;
 
 	/* The GuC firmware wants the tail index in QWords, not bytes */
 	tail = rq->ring->tail >> 3;
@@ -562,27 +560,6 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
 
 #define CTX_RING_BUFFER_START		0x08
 
-/* Update the ringbuffer pointer in a saved context image */
-static void lr_context_update(struct drm_i915_gem_request *rq)
-{
-	enum intel_engine_id ring_id = rq->engine->id;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring_id].state;
-	struct drm_i915_gem_object *rb_obj = rq->ring->obj;
-	struct page *page;
-	uint32_t *reg_state;
-
-	BUG_ON(!ctx_obj);
-	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
-	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
-
-	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-	reg_state = kmap_atomic(page);
-
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
-
-	kunmap_atomic(reg_state);
-}
-
 /**
  * i915_guc_submit() - Submit commands through GuC
  * @client:	the guc client where commands will go through
@@ -597,10 +574,6 @@ int i915_guc_submit(struct i915_guc_client *client,
 	enum intel_engine_id ring_id = rq->engine->id;
 	int q_ret, b_ret;
 
-	/* Need this because of the deferred pin ctx and ring */
-	/* Shall we move this right after ring is pinned? */
-	lr_context_update(rq);
-
 	q_ret = guc_add_workqueue_item(client, rq);
 	if (q_ret == 0)
 		b_ret = guc_ring_doorbell(client);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index de5889e95d6d..80b346a3fd8a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -265,233 +265,133 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists
 	return 0;
 }
 
-/**
- * intel_execlists_ctx_id() - get the Execlists Context ID
- * @ctx_obj: Logical Ring Context backing object.
- *
- * Do not confuse with ctx->id! Unfortunately we have a name overload
- * here: the old context ID we pass to userspace as a handler so that
- * they can refer to a context, and the new context ID we pass to the
- * ELSP so that the GPU can inform us of the context status via
- * interrupts.
- *
- * Return: 20-bits globally unique context ID.
- */
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
-{
-	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
-			LRC_PPHWSP_PN * PAGE_SIZE;
-
-	/* LRCA is required to be 4K aligned so the more significant 20 bits
-	 * are globally unique */
-	return lrca >> 12;
-}
-
-static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
-{
-	return (IS_SKL_REVID(ring->dev, 0, SKL_REVID_B0) ||
-		IS_BXT_REVID(ring->dev, 0, BXT_REVID_A1)) &&
-		(ring->id == VCS || ring->id == VCS2);
-}
-
-uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
-				     struct intel_engine_cs *ring)
+static u32 execlists_request_write_tail(struct drm_i915_gem_request *req)
 {
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
-	uint64_t desc;
-	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
-			LRC_PPHWSP_PN * PAGE_SIZE;
-
-	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
-
-	desc = GEN8_CTX_VALID;
-	desc |= GEN8_CTX_ADDRESSING_MODE(ring->i915) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
-	if (IS_GEN8(ring->i915))
-		desc |= GEN8_CTX_L3LLC_COHERENT;
-	desc |= GEN8_CTX_PRIVILEGE;
-	desc |= lrca;
-	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
-
-	/* TODO: WaDisableLiteRestore when we start using semaphore
-	 * signalling between Command Streamers */
-	/* desc |= GEN8_CTX_FORCE_RESTORE; */
+	struct intel_ring *ring = req->ring;
+	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
 
-	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
-	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
-	if (disable_lite_restore_wa(ring))
-		desc |= GEN8_CTX_FORCE_RESTORE;
+	if (ppgtt && !USES_FULL_48BIT_PPGTT(req->i915)) {
+		/* True 32b PPGTT with dynamic page allocation: update PDP
+		 * registers and point the unallocated PDPs to scratch page.
+		 * PML4 is allocated during ppgtt init, so this is not needed
+		 * in 48-bit mode.
+		 */
+		if (ppgtt->pd_dirty_rings & intel_engine_flag(req->engine)) {
+			ASSIGN_CTX_PDP(ppgtt, ring->registers, 3);
+			ASSIGN_CTX_PDP(ppgtt, ring->registers, 2);
+			ASSIGN_CTX_PDP(ppgtt, ring->registers, 1);
+			ASSIGN_CTX_PDP(ppgtt, ring->registers, 0);
+			ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine);
+		}
+	}
 
-	return desc;
+	ring->registers[CTX_RING_TAIL+1] = req->tail;
+	return ring->context_descriptor;
 }
 
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
-				 struct drm_i915_gem_request *rq1)
+static void execlists_submit_pair(struct intel_engine_cs *ring)
 {
+	struct drm_i915_private *dev_priv = ring->i915;
+	uint32_t desc[4];
 
-	struct intel_engine_cs *engine = rq0->engine;
-	struct drm_i915_private *dev_priv = rq0->i915;
-	uint64_t desc[2];
-
-	if (rq1) {
-		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
-		rq1->elsp_submitted++;
-	} else {
-		desc[1] = 0;
-	}
+	if (ring->execlist_port[1]) {
+		desc[0] = execlists_request_write_tail(ring->execlist_port[1]);
+		desc[1] = ring->execlist_port[1]->fence.seqno;
+	} else
+		desc[1] = desc[0] = 0;
 
-	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
-	rq0->elsp_submitted++;
+	desc[2] = execlists_request_write_tail(ring->execlist_port[0]);
+	desc[3] = ring->execlist_port[0]->fence.seqno;
 
-	/* You must always write both descriptors in the order below. */
-	spin_lock_irq(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1]));
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1]));
+	/* Note: You must always write both descriptors in the order below. */
+	I915_WRITE_FW(RING_ELSP(ring), desc[1]);
+	I915_WRITE_FW(RING_ELSP(ring), desc[0]);
+	I915_WRITE_FW(RING_ELSP(ring), desc[3]);
 
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0]));
 	/* The context is automatically loaded after the following */
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0]));
-
-	/* ELSP is a wo register, use another nearby reg for posting */
-	POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine));
-	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
-	spin_unlock_irq(&dev_priv->uncore.lock);
+	I915_WRITE_FW(RING_ELSP(ring), desc[2]);
 }
 
-static int execlists_update_context(struct drm_i915_gem_request *rq)
+static void execlists_context_unqueue(struct intel_engine_cs *engine)
 {
-	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[rq->engine->id].state;
-	struct drm_i915_gem_object *rb_obj = rq->ring->obj;
-	struct page *page;
-	uint32_t *reg_state;
-
-	BUG_ON(!ctx_obj);
-	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
-	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
-
-	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-	reg_state = kmap_atomic(page);
+	struct drm_i915_gem_request *cursor;
+	bool submit = false;
+	int port = 0;
 
-	reg_state[CTX_RING_TAIL+1] = rq->tail;
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
+	assert_spin_locked(&engine->execlist_lock);
 
-	if (ppgtt && !USES_FULL_48BIT_PPGTT(rq->i915)) {
-		/* True 32b PPGTT with dynamic page allocation: update PDP
-		 * registers and point the unallocated PDPs to scratch page.
-		 * PML4 is allocated during ppgtt init, so this is not needed
-		 * in 48-bit mode.
+	/* Try to read in pairs and fill both submission ports */
+	cursor = engine->execlist_port[port];
+	if (cursor != NULL) {
+		/* WaIdleLiteRestore:bdw,skl
+		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
+		 * as we resubmit the request. See gen8_emit_request()
+		 * for where we prepare the padding after the end of the
+		 * request.
 		 */
-		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
-		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
-		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
-		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
-	}
-
-	kunmap_atomic(reg_state);
-
-	return 0;
-}
+		cursor->tail = cursor->wa_tail;
+		cursor = list_next_entry(cursor, execlist_link);
+	} else
+		cursor = list_first_entry(&engine->execlist_queue,
+					  typeof(*cursor),
+					  execlist_link);
+	while (&cursor->execlist_link != &engine->execlist_queue) {
+		/* Same ctx: ignore earlier request, as the
+		 * second request extends the first.
+		 */
+		if (engine->execlist_port[port] &&
+		    cursor->ctx != engine->execlist_port[port]->ctx) {
+			if (++port == ARRAY_SIZE(engine->execlist_port))
+				break;
+		}
 
-static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
-				      struct drm_i915_gem_request *rq1)
-{
-	execlists_update_context(rq0);
+		engine->execlist_port[port] = cursor;
+		submit = true;
 
-	if (rq1)
-		execlists_update_context(rq1);
+		cursor = list_next_entry(cursor, execlist_link);
+	}
 
-	execlists_elsp_write(rq0, rq1);
+	if (submit)
+		execlists_submit_pair(engine);
 }
 
-static void execlists_context_unqueue(struct intel_engine_cs *engine)
+static bool execlists_complete_requests(struct intel_engine_cs *engine,
+					u32 seqno)
 {
-	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-	struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
-
 	assert_spin_locked(&engine->execlist_lock);
 
-	/*
-	 * If irqs are not active generate a warning as batches that finish
-	 * without the irqs may get lost and a GPU Hang may occur.
-	 */
-	WARN_ON(!intel_irqs_enabled(engine->dev->dev_private));
+	do {
+		struct drm_i915_gem_request *req;
 
-	if (list_empty(&engine->execlist_queue))
-		return;
+		req = engine->execlist_port[0];
+		if (req == NULL)
+			break;
 
-	/* Try to read in pairs */
-	list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
-				 execlist_link) {
-		if (!req0) {
-			req0 = cursor;
-		} else if (req0->ctx == cursor->ctx) {
-			/* Same ctx: ignore first request, as second request
-			 * will update tail past first request's workload */
-			cursor->elsp_submitted = req0->elsp_submitted;
-			list_del(&req0->execlist_link);
-			list_add_tail(&req0->execlist_link,
-				&engine->execlist_retired_req_list);
-			req0 = cursor;
-		} else {
-			req1 = cursor;
+		if (!i915_seqno_passed(seqno, req->fence.seqno))
 			break;
-		}
-	}
 
-	if (IS_GEN8(engine->dev) || IS_GEN9(engine->dev)) {
-		/*
-		 * WaIdleLiteRestore: make sure we never cause a lite
-		 * restore with HEAD==TAIL
+		/* Move the completed set of requests from the start of the
+		 * execlist_queue over to the tail of the execlist_completed.
 		 */
-		if (req0->elsp_submitted) {
-			/*
-			 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
-			 * as we resubmit the request. See gen8_add_request()
-			 * for where we prepare the padding after the end of the
-			 * request.
-			 */
-			struct intel_ring *ring;
-
-			ring = req0->ctx->engine[engine->id].ring;
-			req0->tail += 8;
-			req0->tail &= ring->size - 1;
-		}
-	}
-
-	WARN_ON(req1 && req1->elsp_submitted);
+		engine->execlist_completed.prev->next = engine->execlist_queue.next;
+		engine->execlist_completed.prev = &req->execlist_link;
 
-	execlists_submit_requests(req0, req1);
-}
-
-static bool execlists_check_remove_request(struct intel_engine_cs *ring,
-					   u32 request_id)
-{
-	struct drm_i915_gem_request *head_req;
+		engine->execlist_queue.next = req->execlist_link.next;
+		req->execlist_link.next->prev = &engine->execlist_queue;
 
-	assert_spin_locked(&ring->execlist_lock);
+		req->execlist_link.next = &engine->execlist_completed;
 
-	head_req = list_first_entry_or_null(&ring->execlist_queue,
-					    struct drm_i915_gem_request,
-					    execlist_link);
-
-	if (head_req != NULL) {
-		struct drm_i915_gem_object *ctx_obj =
-				head_req->ctx->engine[ring->id].state;
-		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
-			WARN(head_req->elsp_submitted == 0,
-			     "Never submitted head request\n");
-
-			if (--head_req->elsp_submitted <= 0) {
-				list_del(&head_req->execlist_link);
-				list_add_tail(&head_req->execlist_link,
-					&ring->execlist_retired_req_list);
-				return true;
-			}
-		}
-	}
+		/* The hardware has completed the request on this port, it
+		 * will switch to the next.
+		 */
+		engine->execlist_port[0] = engine->execlist_port[1];
+		engine->execlist_port[1] = NULL;
+	} while (1);
 
-	return false;
+	if (engine->execlist_context_descriptor & GEN8_CTX_FORCE_RESTORE)
+		return engine->execlist_port[0] == NULL;
+	else
+		return engine->execlist_port[1] == NULL;
 }
 
 static void set_rtpriority(void)
@@ -504,23 +404,29 @@ static int intel_execlists_submit(void *arg)
 {
 	struct intel_engine_cs *ring = arg;
 	struct drm_i915_private *dev_priv = ring->i915;
+	const i915_reg_t ptrs = RING_CONTEXT_STATUS_PTR(ring);
 
 	set_rtpriority();
 
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 	do {
-		u32 status;
-		u32 status_id;
-		u32 submit_contexts;
 		u8 head, tail;
+		u32 seqno;
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		head = ring->next_context_status_buffer;
-		tail = I915_READ(RING_CONTEXT_STATUS_PTR(ring)) & GEN8_CSB_PTR_MASK;
+		head = tail = 0;
+		if (READ_ONCE(ring->execlist_port[0])) {
+			u32 x = I915_READ_FW(ptrs);
+			head = x >> 8;
+			tail = x;
+		}
 		if (head == tail) {
+			intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 			if (kthread_should_stop())
 				return 0;
 
 			schedule();
+			intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 			continue;
 		}
 		__set_current_state(TASK_RUNNING);
@@ -528,86 +434,46 @@ static int intel_execlists_submit(void *arg)
 		if (head > tail)
 			tail += GEN8_CSB_ENTRIES;
 
-		status = 0;
-		submit_contexts = 0;
-
-		spin_lock(&ring->execlist_lock);
-
+		seqno = 0;
 		while (head++ < tail) {
-			status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, head % GEN8_CSB_ENTRIES));
-			status_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, head % GEN8_CSB_ENTRIES));
-
-			if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
-				continue;
-
-			if (status & GEN8_CTX_STATUS_PREEMPTED) {
-				if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
-					if (execlists_check_remove_request(ring, status_id))
-						WARN(1, "Lite Restored request removed from queue\n");
-				} else
-					WARN(1, "Preemption without Lite Restore\n");
-			}
-
-			if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
-			    (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
-				if (execlists_check_remove_request(ring, status_id))
-					submit_contexts++;
+			u32 status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(ring,
+									     head % GEN8_CSB_ENTRIES));
+			if (unlikely(status & GEN8_CTX_STATUS_PREEMPTED && 0)) {
+				DRM_ERROR("Pre-empted request %x %s Lite Restore\n",
+					  I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(ring, head % GEN8_CSB_ENTRIES)),
+					  status & GEN8_CTX_STATUS_LITE_RESTORE ? "with" : "without");
 			}
+			if (status & (GEN8_CTX_STATUS_ACTIVE_IDLE |
+				      GEN8_CTX_STATUS_ELEMENT_SWITCH))
+				seqno = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(ring,
+										head % GEN8_CSB_ENTRIES));
 		}
 
-		if (disable_lite_restore_wa(ring)) {
-			/* Prevent a ctx to preempt itself */
-			if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
-					(submit_contexts != 0))
+		I915_WRITE_FW(ptrs,
+			      _MASKED_FIELD(GEN8_CSB_PTR_MASK<<8,
+					    (tail % GEN8_CSB_ENTRIES) << 8));
+
+		if (seqno) {
+			spin_lock(&ring->execlist_lock);
+			if (execlists_complete_requests(ring, seqno))
 				execlists_context_unqueue(ring);
-		} else if (submit_contexts != 0) {
-			execlists_context_unqueue(ring);
+			spin_unlock(&ring->execlist_lock);
 		}
-
-		spin_unlock(&ring->execlist_lock);
-
-		WARN(submit_contexts > 2, "More than two context complete events?\n");
-		ring->next_context_status_buffer = tail % GEN8_CSB_ENTRIES;
-		I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
-			   _MASKED_FIELD(GEN8_CSB_PTR_MASK << 8,
-					 ring->next_context_status_buffer<<8));
 	} while (1);
 }
 
 static int execlists_context_queue(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct drm_i915_gem_request *cursor;
-	int num_elements = 0;
 
 	i915_gem_request_get(request);
 
 	spin_lock(&engine->execlist_lock);
-
-	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link)
-		if (++num_elements > 2)
-			break;
-
-	if (num_elements > 2) {
-		struct drm_i915_gem_request *tail_req;
-
-		tail_req = list_last_entry(&engine->execlist_queue,
-					   struct drm_i915_gem_request,
-					   execlist_link);
-
-		if (request->ctx == tail_req->ctx) {
-			WARN(tail_req->elsp_submitted != 0,
-				"More than 2 already-submitted reqs queued\n");
-			list_del(&tail_req->execlist_link);
-			list_add_tail(&tail_req->execlist_link,
-				&engine->execlist_retired_req_list);
-		}
-	}
-
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
-	if (num_elements == 0)
-		execlists_context_unqueue(engine);
-
+	if (engine->execlist_port[0] == NULL) {
+		engine->execlist_port[0] = request;
+		execlists_submit_pair(engine);
+	}
 	spin_unlock(&engine->execlist_lock);
 
 	return 0;
@@ -641,56 +507,32 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 	return 0;
 }
 
-/*
- * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
- * @request: Request to advance the logical ringbuffer of.
- *
- * The tail is updated in our logical ringbuffer struct, not in the actual context. What
- * really happens during submission is that the context and current tail will be placed
- * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
- * point, the tail *inside* the context is updated and the ELSP written to.
- */
-static void
-intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
-{
-	struct drm_i915_private *dev_priv = request->i915;
-
-	intel_ring_advance(request->ring);
-	request->tail = request->ring->tail;
-
-	if (dev_priv->guc.execbuf_client)
-		i915_guc_submit(dev_priv->guc.execbuf_client, request);
-	else
-		execlists_context_queue(request);
-}
-
 bool intel_execlists_retire_requests(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_request *req, *tmp;
-	struct list_head retired_list;
+	struct list_head list;
 
-	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-	if (list_empty(&ring->execlist_retired_req_list))
+	lockdep_assert_held(&ring->dev->struct_mutex);
+	if (list_empty(&ring->execlist_completed))
 		goto out;
 
-	INIT_LIST_HEAD(&retired_list);
 	spin_lock(&ring->execlist_lock);
-	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
+	list_replace_init(&ring->execlist_completed, &list);
 	spin_unlock(&ring->execlist_lock);
 
-	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
+	list_for_each_entry_safe(req, tmp, &list, execlist_link) {
 		struct intel_context *ctx = req->ctx;
 		struct drm_i915_gem_object *ctx_obj =
 				ctx->engine[ring->id].state;
 
 		if (ctx_obj && (ctx != ring->default_context))
 			intel_lr_context_unpin(req);
-		list_del(&req->execlist_link);
+
 		i915_gem_request_put(req);
 	}
 
 out:
-	return list_empty(&ring->execlist_queue);
+	return READ_ONCE(ring->execlist_port[0]) == NULL;
 }
 
 void intel_logical_ring_stop(struct intel_engine_cs *ring)
@@ -720,6 +562,7 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 		struct intel_ring *ringbuf)
 {
 	struct drm_i915_private *dev_priv = ring->i915;
+	u32 ggtt_offset;
 	int ret = 0;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -734,6 +577,16 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 
 	ctx_obj->dirty = true;
 
+	ggtt_offset =
+		i915_gem_obj_ggtt_offset(ctx_obj) + LRC_PPHWSP_PN * PAGE_SIZE;
+	ringbuf->context_descriptor =
+		ggtt_offset | ring->execlist_context_descriptor;
+
+	ringbuf->registers =
+		kmap(i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN));
+	ringbuf->registers[CTX_RING_BUFFER_START+1] =
+		i915_gem_obj_ggtt_offset(ringbuf->obj);
+
 	/* Invalidate GuC TLB. */
 	if (i915.enable_guc_submission)
 		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
@@ -768,6 +621,7 @@ static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
 
 void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
 {
+	struct drm_i915_gem_object *ctx_obj;
 	int engine = rq->engine->id;
 
 	WARN_ON(!mutex_is_locked(&rq->i915->dev->struct_mutex));
@@ -775,7 +629,10 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
 		return;
 
 	intel_ring_unmap(rq->ring);
-	i915_gem_object_ggtt_unpin(rq->ctx->engine[engine].state);
+
+	ctx_obj = rq->ctx->engine[engine].state;
+	kunmap(i915_gem_object_get_page(ctx_obj, LRC_STATE_PN));
+	i915_gem_object_ggtt_unpin(ctx_obj);
 	i915_gem_context_unreference(rq->ctx);
 }
 
@@ -1168,12 +1025,39 @@ out:
 	return ret;
 }
 
+static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
+{
+	return (IS_SKL_REVID(ring->i915, 0, SKL_REVID_B0) ||
+		IS_BXT_REVID(ring->i915, 0, BXT_REVID_A1)) &&
+		(ring->id == VCS || ring->id == VCS2);
+}
+
+static uint64_t lr_context_descriptor(struct intel_engine_cs *ring)
+{
+	uint64_t desc;
+
+	desc = GEN8_CTX_VALID;
+	desc |= GEN8_CTX_ADDRESSING_MODE(ring->i915) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
+	if (IS_GEN8(ring->i915))
+		desc |= GEN8_CTX_L3LLC_COHERENT;
+	desc |= GEN8_CTX_PRIVILEGE;
+
+	/* TODO: WaDisableLiteRestore when we start using semaphore
+	 * signalling between Command Streamers */
+	/* desc |= GEN8_CTX_FORCE_RESTORE; */
+
+	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
+	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
+	if (disable_lite_restore_wa(ring))
+		desc |= GEN8_CTX_FORCE_RESTORE;
+
+	return desc;
+}
+
 static int gen8_init_common_ring(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u8 next_context_status_buffer_hw;
-
 	lrc_setup_hardware_status_page(ring,
 				ring->default_context->engine[ring->id].state);
 
@@ -1197,18 +1081,6 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring)
 	 * SKL  |         ?                |         ?            |
 	 * BXT  |         ?                |         ?            |
 	 */
-	next_context_status_buffer_hw =
-		GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(ring)));
-
-	/*
-	 * When the CSB registers are reset (also after power-up / gpu reset),
-	 * CSB write pointer is set to all 1's, which is not valid, use '5' in
-	 * this special case, so the first element read is CSB[0].
-	 */
-	if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
-		next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
-
-	ring->next_context_status_buffer = next_context_status_buffer_hw;
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
 
 	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
@@ -1482,7 +1354,8 @@ static int gen8_add_request(struct drm_i915_gem_request *request)
 	intel_ring_emit(ring, request->fence.seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	intel_ring_emit(ring, MI_NOOP);
-	intel_logical_ring_advance_and_submit(request);
+	intel_ring_advance(ring);
+	request->tail = ring->tail;
 
 	/*
 	 * Here we add two extra NOOPs as padding to avoid
@@ -1491,6 +1364,12 @@ static int gen8_add_request(struct drm_i915_gem_request *request)
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
+	request->wa_tail = ring->tail;
+
+	if (request->i915->guc.execbuf_client)
+		i915_guc_submit(request->i915->guc.execbuf_client, request);
+	else
+		execlists_context_queue(request);
 
 	return 0;
 }
@@ -1569,9 +1448,11 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 
 	INIT_LIST_HEAD(&ring->buffers);
 	INIT_LIST_HEAD(&ring->execlist_queue);
-	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
+	INIT_LIST_HEAD(&ring->execlist_completed);
 	spin_lock_init(&ring->execlist_lock);
 
+	ring->execlist_context_descriptor = lr_context_descriptor(ring);
+
 	ret = i915_cmd_parser_init_ring(ring);
 	if (ret)
 		goto error;
@@ -1592,8 +1473,6 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 		goto error;
 	}
 
-	ring->next_context_status_buffer =
-			I915_READ(RING_CONTEXT_STATUS_PTR(ring)) & GEN8_CSB_PTR_MASK;
 	task = kthread_run(intel_execlists_submit, ring,
 			   "irq/i915:%de", ring->id);
 	if (IS_ERR(task))
@@ -1904,9 +1783,7 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 					  CTX_CTRL_RS_CTX_ENABLE));
 	ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(ring->mmio_base), 0);
 	ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(ring->mmio_base), 0);
-	/* Ring buffer start address is not known until the buffer is pinned.
-	 * It is written to the context image in execlists_update_context()
-	 */
+	/* Ring buffer start address is not known until the buffer is pinned. */
 	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, RING_START(ring->mmio_base), 0);
 	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, RING_CTL(ring->mmio_base),
 		       ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 33f82a84065a..37601a35d5fc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -74,12 +74,9 @@ int intel_lr_context_deferred_alloc(struct intel_context *ctx,
 void intel_lr_context_unpin(struct drm_i915_gem_request *req);
 void intel_lr_context_reset(struct drm_device *dev,
 			struct intel_context *ctx);
-uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
-				     struct intel_engine_cs *ring);
 
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 
 bool intel_execlists_retire_requests(struct intel_engine_cs *ring);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index edaf07b2292e..3d4d5711aea9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -122,6 +122,9 @@ struct intel_ring {
 	 * we can detect new retirements.
 	 */
 	u32 last_retired_head;
+
+	u32 context_descriptor;
+	u32 *registers;
 };
 
 struct	intel_context;
@@ -293,9 +296,10 @@ struct intel_engine_cs {
 	/* Execlists */
 	struct task_struct *execlists_submit;
 	spinlock_t execlist_lock;
+	struct drm_i915_gem_request *execlist_port[2];
 	struct list_head execlist_queue;
-	struct list_head execlist_retired_req_list;
-	u8 next_context_status_buffer;
+	struct list_head execlist_completed;
+	u32 execlist_context_descriptor;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 
 	/**
-- 
2.7.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-01-11 10:46 UTC|newest]

Thread overview: 263+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-11  9:16 [PATCH 001/190] drm: Release driver references to handle before making it available again Chris Wilson
2016-01-11  9:16 ` [PATCH 002/190] drm/i915: Move the mb() following release-mmap into release-mmap Chris Wilson
2016-01-11  9:16 ` [PATCH 003/190] drm/i915: Add an optional selection from i915 of CONFIG_MMU_NOTIFIER Chris Wilson
2016-02-17 12:59   ` Daniel Vetter
2016-01-11  9:16 ` [PATCH 004/190] drm/i915: Fix some invalid requests cancellations Chris Wilson
2016-01-12 18:16   ` [Intel-gfx] " Dave Gordon
2016-01-12 18:16     ` Dave Gordon
2016-01-13 20:06     ` [Intel-gfx] " Chris Wilson
2016-01-11  9:16 ` [PATCH 005/190] drm/i915: Force clean compilation with -Werror Chris Wilson
2016-01-11  9:16 ` [PATCH 006/190] drm/i915: Add GEM debugging Kconfig option Chris Wilson
2016-01-12 17:44   ` Dave Gordon
2016-01-11  9:16 ` [PATCH 007/190] drm/i915: Hide the atomic_read(reset_counter) behind a helper Chris Wilson
2016-01-11  9:16 ` [PATCH 008/190] drm/i915: Simplify checking of GPU reset_counter in display pageflips Chris Wilson
2016-01-11  9:16 ` [PATCH 009/190] drm/i915: Tighten reset_counter for reset status Chris Wilson
2016-01-11  9:16 ` [PATCH 010/190] drm/i915: Store the reset counter when constructing a request Chris Wilson
2016-01-11  9:16 ` [PATCH 011/190] drm/i915: Simplify reset_counter handling during atomic modesetting Chris Wilson
2016-01-11  9:16 ` [PATCH 012/190] drm/i915: Prevent leaking of -EIO from i915_wait_request() Chris Wilson
2016-01-11  9:16 ` [PATCH 013/190] drm/i915: Suppress error message when GPU resets are disabled Chris Wilson
2016-01-11  9:16 ` [PATCH 014/190] drm/i915: Delay queuing hangcheck to wait-request Chris Wilson
2016-01-11  9:16 ` [PATCH 015/190] drm/i915: Remove the dedicated hangcheck workqueue Chris Wilson
2016-01-11  9:16 ` [PATCH 016/190] drm/i915: Make queueing the hangcheck work inline Chris Wilson
2016-01-11  9:16 ` [PATCH 017/190] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ Chris Wilson
2016-01-11 14:02   ` Dave Gordon
2016-01-21 16:27     ` Mika Kuoppala
2016-03-24  6:39   ` David Weinehall
2016-01-11  9:16 ` [PATCH 018/190] drm/i915: Slaughter the thundering i915_wait_request herd Chris Wilson
2016-01-11  9:16 ` [PATCH 019/190] drm/i915: Separate out the seqno-barrier from engine->get_seqno Chris Wilson
2016-01-11 15:43   ` Dave Gordon
2016-01-11  9:16 ` [PATCH 020/190] drm/i915: Remove the lazy_coherency parameter from request-completed? Chris Wilson
2016-01-11 15:45   ` Dave Gordon
2016-01-11 16:24     ` Chris Wilson
2016-01-12 10:27   ` Mika Kuoppala
2016-01-12 10:51     ` Chris Wilson
2016-01-11  9:16 ` [PATCH 021/190] drm/i915: Use HWS for seqno tracking everywhere Chris Wilson
2016-01-11 20:03   ` Dave Gordon
2016-01-12 10:05   ` Mika Kuoppala
2016-01-12 11:03     ` Chris Wilson
2016-01-12 14:30       ` Mika Kuoppala
2016-01-12 14:46         ` Chris Wilson
2016-01-11  9:16 ` [PATCH 022/190] drm/i915: Check the CPU cached value of seqno after waking the waiter Chris Wilson
2016-01-11  9:16 ` [PATCH 023/190] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted Chris Wilson
2016-01-11  9:16 ` [PATCH 024/190] drm/i915: Replace manual barrier() with READ_ONCE() in HWS accessor Chris Wilson
2016-01-12 14:17   ` Mika Kuoppala
2016-01-11  9:16 ` [PATCH 025/190] drm/i915: Broadwell execlists needs exactly the same seqno w/a as legacy Chris Wilson
2016-01-11  9:16 ` [PATCH 026/190] drm/i915: Stop setting wraparound seqno on initialisation Chris Wilson
2016-01-11  9:16 ` [PATCH 027/190] drm/i915: Only query timestamp when measuring elapsed time Chris Wilson
2016-01-11  9:16 ` [PATCH 028/190] drm/i915: On GPU reset, set the HWS breadcrumb to the last seqno Chris Wilson
2016-01-11  9:16 ` [PATCH 029/190] drm/i915: Convert trace-irq to the breadcrumb waiter Chris Wilson
2016-01-11  9:16 ` [PATCH 030/190] drm/i915: Move the get/put irq locking into the caller Chris Wilson
2016-01-11  9:16 ` [PATCH 031/190] drm/i915: Harden detection of missed interrupts Chris Wilson
2016-01-11  9:16 ` [PATCH 032/190] drm/i915: Remove debug noise on detecting fault-injection " Chris Wilson
2016-01-11  9:16 ` [PATCH 033/190] drm/i915: Only start retire worker when idle Chris Wilson
2016-01-11  9:16 ` [PATCH 034/190] drm/i915: Do not keep postponing the idle-work Chris Wilson
2016-01-11  9:16 ` [PATCH 035/190] drm/i915: Remove redundant queue_delayed_work() from throttle ioctl Chris Wilson
2016-01-11  9:16 ` [PATCH 036/190] drm/i915: Restore waitboost credit to the synchronous waiter Chris Wilson
2016-01-11 16:10   ` Jesse Barnes
2016-01-11  9:16 ` [PATCH 037/190] drm/i915: Add background commentary to "waitboosting" Chris Wilson
2016-01-11  9:16 ` [PATCH 038/190] drm/i915: Flush the RPS bottom-half when the GPU idles Chris Wilson
2016-01-11  9:16 ` [PATCH 039/190] drm/i915: Remove stop-rings debugfs interface Chris Wilson
2016-02-25 17:30   ` Arun Siluvery
2016-01-11  9:16 ` [PATCH 040/190] drm/i915: Record the ringbuffer associated with the request Chris Wilson
2016-01-11  9:16 ` [PATCH 041/190] drm/i915: Allow userspace to request no-error-capture upon GPU hangs Chris Wilson
2016-01-11  9:16 ` [PATCH 042/190] drm/i915: Clean up GPU hang message Chris Wilson
2016-02-25 17:40   ` Arun Siluvery
2016-01-11  9:16 ` [PATCH 043/190] drm/i915: Skip capturing an error state if we already have one Chris Wilson
2016-01-11  9:16 ` [PATCH 044/190] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
2016-02-25 17:52   ` Arun Siluvery
2016-03-08 12:58     ` Tvrtko Ursulin
2016-03-08 13:35       ` Arun Siluvery
2016-01-11  9:16 ` [PATCH 045/190] drm/i915: Move releasing of the GEM request from free to retire/cancel Chris Wilson
2016-03-08 13:15   ` Tvrtko Ursulin
2016-04-05 13:42     ` Tvrtko Ursulin
2016-04-05 14:09       ` Chris Wilson
2016-04-05 14:17         ` Tvrtko Ursulin
2016-04-05 14:27           ` Chris Wilson
2016-04-05 14:45             ` Chris Wilson
2016-04-05 14:10       ` Chris Wilson
2016-01-11  9:16 ` [PATCH 046/190] drm/i915: Derive GEM requests from dma-fence Chris Wilson
2016-01-11  9:16 ` [PATCH 047/190] drm/i915: Rename request reference/unreference to get/put Chris Wilson
2016-01-11  9:16 ` [PATCH 048/190] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
2016-01-11  9:17 ` [PATCH 049/190] drm/i915: Disable waitboosting for mmioflips/semaphores Chris Wilson
2016-01-11  9:17 ` [PATCH 050/190] drm/i915: Refactor duplicate object vmap functions Chris Wilson
2016-01-11  9:17 ` [PATCH 051/190] drm,i915: Introduce drm_malloc_gfp() Chris Wilson
2016-01-11  9:17 ` [PATCH 052/190] drm/i915: Treat ringbuffer writes as write to normal memory Chris Wilson
2016-01-11  9:17 ` [PATCH 053/190] drm/i915: Convert i915_semaphores_is_enabled over to early sanitize Chris Wilson
2016-01-12 19:07   ` Dave Gordon
2016-01-11  9:17 ` [PATCH 054/190] drm/i915: Use the new rq->i915 field where appropriate Chris Wilson
2016-01-11  9:17 ` [PATCH 055/190] drm/i915: Unify intel_logical_ring_emit and intel_ring_emit Chris Wilson
2016-01-12 17:29   ` Dave Gordon
2016-01-11  9:17 ` [PATCH 056/190] drm/i915: Unify intel_ring_begin() Chris Wilson
2016-01-11  9:17 ` [PATCH 057/190] drm/i915: Remove the identical implementations of request space reservation Chris Wilson
2016-01-11  9:17 ` [PATCH 058/190] drm/i915: Rename request->ring to request->engine Chris Wilson
2016-01-28 11:45   ` Tvrtko Ursulin
2016-01-11  9:17 ` [PATCH 059/190] drm/i915: Rename request->ringbuf to request->ring Chris Wilson
2016-01-28 11:48   ` Tvrtko Ursulin
2016-01-11  9:17 ` [PATCH 060/190] drm/i915: Rename backpointer from intel_ringbuffer to intel_engine_cs Chris Wilson
2016-01-28 11:49   ` Tvrtko Ursulin
2016-01-11  9:17 ` [PATCH 061/190] drm/i915: Rename intel_context[engine].ringbuf Chris Wilson
2016-01-11  9:17 ` [PATCH 062/190] drm/i915: Rename extern functions operating on intel_engine_cs Chris Wilson
2016-01-11  9:17 ` [PATCH 063/190] drm/i915: Rename struct intel_ringbuffer to intel_ring Chris Wilson
2016-01-28 11:54   ` Tvrtko Ursulin
2016-01-11  9:17 ` [PATCH 064/190] drm/i915: Rename intel_pin_and_map_ring() Chris Wilson
2016-01-11  9:17 ` [PATCH 065/190] drm/i915: Remove obsolete engine->gpu_caches_dirty Chris Wilson
2016-01-11  9:17 ` [PATCH 066/190] drm/i915: Simplify request_alloc by returning the allocated request Chris Wilson
2016-01-12 17:11   ` Dave Gordon
2016-01-11  9:17 ` [PATCH 067/190] drm/i915: Unify legacy/execlists emission of MI_BATCHBUFFER_START Chris Wilson
2016-01-11  9:17 ` [PATCH 068/190] drm/i915: Unify adding requests between ringbuffer and execlists Chris Wilson
2016-01-11  9:17 ` [PATCH 069/190] drm/i915: Remove duplicate golden render state init from execlists Chris Wilson
2016-01-11  9:17 ` [PATCH 070/190] drm/i915: Unify legacy/execlists submit_execbuf callbacks Chris Wilson
2016-01-11  9:17 ` [PATCH 071/190] drm/i915: Simplify calling engine->sync_to Chris Wilson
2016-01-11  9:17 ` [PATCH 072/190] drm/i915: Execlists cannot pin a context without the object Chris Wilson
2016-01-11 15:24   ` Tvrtko Ursulin
2016-01-11  9:17 ` [PATCH 073/190] drm/i915: Introduce i915_gem_active for request tracking Chris Wilson
2016-01-11 17:32   ` Tvrtko Ursulin
2016-01-11 22:49     ` Chris Wilson
2016-01-12 10:04       ` Tvrtko Ursulin
2016-01-12 11:01         ` Chris Wilson
2016-01-12 13:42           ` Tvrtko Ursulin
2016-01-12 13:44           ` Tvrtko Ursulin
2016-01-12 14:08             ` Chris Wilson
2016-01-11  9:17 ` [PATCH 074/190] drm/i915: Rename request->list to link for consistency Chris Wilson
2016-01-12 13:47   ` Tvrtko Ursulin
2016-01-11  9:17 ` [PATCH 075/190] drm/i915: Refactor activity tracking for requests Chris Wilson
2016-01-28 11:41   ` Tvrtko Ursulin
2016-01-28 11:46     ` Chris Wilson
2016-01-28 11:56       ` Tvrtko Ursulin
2016-01-11  9:17 ` [PATCH 076/190] drm/i915: Rename vma->*_list to *_link for consistency Chris Wilson
2016-01-12 13:49   ` Tvrtko Ursulin
2016-01-11  9:17 ` [PATCH 077/190] drm/i915: Amalgamate GGTT/ppGTT vma debug list walkers Chris Wilson
2016-01-11  9:17 ` [PATCH 078/190] drm/i915: Split early global GTT initialisation Chris Wilson
2016-01-11  9:17 ` [PATCH 079/190] drm/i915: Reduce the pointer dance of i915_is_ggtt() Chris Wilson
2016-01-15 12:12   ` Dave Gordon
2016-01-15 12:24     ` Chris Wilson
2016-01-11  9:17 ` [PATCH 080/190] drm/i915: Store owning file on the i915_address_space Chris Wilson
2016-01-11  9:17 ` [PATCH 081/190] drm/i915: i915_vma_move_to_active prep patch Chris Wilson
2016-01-11  9:17 ` [PATCH 082/190] drm/i915: Count how many VMA are bound for an object Chris Wilson
2016-01-11  9:17 ` [PATCH 083/190] drm/i915: Be more careful when unbinding vma Chris Wilson
2016-01-11  9:17 ` [PATCH 084/190] drm/i915: Track active vma requests Chris Wilson
2016-01-11  9:17 ` [PATCH 085/190] drm/i915: Release vma when the handle is closed Chris Wilson
2016-01-11  9:17 ` [PATCH 086/190] drm/i915: Mark the context and address space as closed Chris Wilson
2016-01-11 10:44 ` [PATCH 087/190] Revert "drm/i915: Clean up associated VMAs on context destruction" Chris Wilson
2016-01-11 10:44   ` [PATCH 088/190] drm/i915: Move execlists interrupt based submission to a bottom-half Chris Wilson
2016-02-19 12:08     ` Tvrtko Ursulin
2016-02-19 12:29       ` Chris Wilson
2016-02-19 14:10         ` Tvrtko Ursulin
2016-02-19 14:34           ` Chris Wilson
2016-02-19 14:52             ` Tvrtko Ursulin
2016-02-19 15:02               ` Chris Wilson
2016-02-19 14:41           ` Chris Wilson
2016-01-11 10:44   ` Chris Wilson [this message]
2016-01-11 10:44   ` [PATCH 090/190] drm/i915: Refactor execlists default context pinning Chris Wilson
2016-01-11 10:44   ` [PATCH 091/190] drm/i915: Move context initialisation to first-use Chris Wilson
2016-01-11 10:44   ` [PATCH 092/190] drm/i915: Move the magical deferred context allocation into the request Chris Wilson
2016-01-11 10:44   ` [PATCH 093/190] drm/i915: Move the forced switch back to the kernel context into eviction Chris Wilson
2016-01-11 10:44   ` [PATCH 094/190] drm/i915: Remove early l3-remap Chris Wilson
2016-01-11 10:44   ` [PATCH 095/190] drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use Chris Wilson
2016-01-11 10:44   ` [PATCH 096/190] drm/i915: Eliminate early submission of context enabling request Chris Wilson
2016-01-11 10:44   ` [PATCH 097/190] drm/i915/shrinker: Flush active on objects before counting Chris Wilson
2016-01-11 10:44   ` [PATCH 098/190] drm/i915: Double check the active status on the batch pool Chris Wilson
2016-01-11 10:44   ` [PATCH 099/190] drm/i915: Check for request completion before choosing CS flips Chris Wilson
2016-01-11 10:44   ` [PATCH 100/190] drm/i915: Remove request retirement before each batch Chris Wilson
2016-01-11 10:44   ` [PATCH 101/190] drm/i915: Only retire if necessary when creating a userptr Chris Wilson
2016-01-11 10:44   ` [PATCH 102/190] drm/i915: Move the "per-ring" default_context to the device Chris Wilson
2016-01-11 14:40     ` Dave Gordon
2016-01-11 10:44   ` [PATCH 103/190] drm/i915: Move pinning of dev_priv->kernel_context into its creator Chris Wilson
2016-01-11 10:44   ` [PATCH 104/190] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
2016-01-11 10:44   ` [PATCH 105/190] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
2016-03-22 14:32     ` David Weinehall
2016-01-11 10:44   ` [PATCH 106/190] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
2016-01-11 10:44   ` [PATCH 107/190] drm/i915: Record allocated vma size Chris Wilson
2016-01-11 10:44   ` [PATCH 108/190] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
2016-01-11 10:44   ` [PATCH 109/190] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
2016-01-11 10:44   ` [PATCH 110/190] drm/i915: Move vma->pin_count:4 to vma->flags Chris Wilson
2016-01-11 10:44   ` [PATCH 111/190] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
2016-01-11 10:44   ` [PATCH 112/190] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
2016-03-24 12:00     ` David Weinehall
2016-01-11 10:44   ` [PATCH 113/190] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
2016-01-11 10:44   ` [PATCH 114/190] drm/i915: Remove (struct_mutex) locking for wait-ioctl Chris Wilson
2016-01-11 10:44   ` [PATCH 115/190] drm/i915: Remove (struct_mutex) locking for busy-ioctl Chris Wilson
2016-01-11 10:45   ` [PATCH 116/190] drm/i915: Reduce locking inside swfinish ioctl Chris Wilson
2016-01-11 10:45   ` [PATCH 117/190] drm/i915: Remove pinned check from madvise ioctl Chris Wilson
2016-01-11 10:45   ` [PATCH 118/190] drm/i915: Remove locking for get_tiling Chris Wilson
2016-01-11 10:45   ` [PATCH 119/190] drm/i915: Reduce amount of duplicate buffer information captured on error Chris Wilson
2016-01-11 10:45   ` [PATCH 120/190] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
2016-01-11 10:45   ` [PATCH 121/190] drm/i915: Scan GGTT active list for context object Chris Wilson
2016-01-11 10:45   ` [PATCH 122/190] drm/i915: Move setting of request->batch into its single callsite Chris Wilson
2016-01-11 10:45   ` [PATCH 123/190] drm/i915: Mark unmappable GGTT entries as PIN_HIGH Chris Wilson
2016-01-11 10:45   ` [PATCH 124/190] drm/i915: Track pinned vma inside guc Chris Wilson
2016-01-11 10:45   ` [PATCH 125/190] drm/i915: Track pinned VMA Chris Wilson
2016-01-11 10:45   ` [PATCH 126/190] drm/i915: Print the batchbuffer offset next to BBADDR in error state Chris Wilson
2016-01-11 10:45   ` [PATCH 127/190] drm/i915: Cache kmap between relocations Chris Wilson
2016-01-11 10:45   ` [PATCH 128/190] drm/i915: Extract i915_gem_obj_prepare_shmem_write() Chris Wilson
2016-01-11 10:45   ` [PATCH 129/190] drm/i915: Before accessing an object via the cpu, flush GTT writes Chris Wilson
2016-01-11 10:45   ` [PATCH 130/190] drm/i915: Wait for writes through the GTT to land before reading back Chris Wilson
2016-01-11 10:45   ` [PATCH 131/190] drm/i915: Pin the pages first in shmem prepare read/write Chris Wilson
2016-01-11 10:45   ` [PATCH 132/190] drm/i915: Tidy up flush cpu/gtt write domains Chris Wilson
2016-01-11 10:45   ` [PATCH 133/190] drm/i915: Convert known clflush paths over to clflush_cache_range() Chris Wilson
2016-01-11 10:45   ` [PATCH 134/190] drm/i915: Refactor execbuffer relocation writing Chris Wilson
2016-01-11 10:45   ` [PATCH 135/190] drm/i915: Move map-and-fenceable tracking to the VMA Chris Wilson
2016-01-11 10:45   ` [PATCH 136/190] drm/i915: Move ioremap_wc tracking onto VMA Chris Wilson
2016-02-11 13:20     ` Tvrtko Ursulin
2016-02-11 13:29       ` Chris Wilson
2016-02-11 14:10         ` Tvrtko Ursulin
2016-02-19 15:11           ` Chris Wilson
2016-02-22 15:29             ` Tvrtko Ursulin
2016-02-23 10:21               ` Chris Wilson
2016-01-11 10:45   ` [PATCH 137/190] drm/i915: Shrink pages around failure to dma map Chris Wilson
2016-01-11 10:45   ` [PATCH 138/190] drm/i915/userptr: Make gup errors stickier Chris Wilson
2016-01-11 10:45   ` [PATCH 139/190] drm/i915: Move fence tracking from object to vma Chris Wilson
2016-01-11 10:45   ` [PATCH 140/190] drm/i915: Fix partial GGTT faulting Chris Wilson
2016-01-11 10:45   ` [PATCH 141/190] drm/i915: Choose not to evict faultable objects from the GGTT Chris Wilson
2016-01-11 11:00 ` [PATCH 142/190] drm/i915: Fallback to using unmappable memory for scanout Chris Wilson
2016-01-11 11:00   ` [PATCH 143/190] drm/i915: Track display alignment on VMA Chris Wilson
2016-01-11 11:00   ` [PATCH 144/190] drm/i915: Bump the inactive MRU tracking for all VMA accessed Chris Wilson
2016-01-11 11:00   ` [PATCH 145/190] drm/i915: Stop discarding GTT cache-domain on unbind vma Chris Wilson
2016-01-12 13:22     ` Joonas Lahtinen
2016-01-11 11:00   ` [PATCH 146/190] io-mapping: Always create a struct to hold metadata about the io-mapping Chris Wilson
2016-01-11 11:00   ` [PATCH 147/190] drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass Chris Wilson
2016-01-11 11:00   ` [PATCH 148/190] drm/i915: Stop marking the unaccessible scratch page as UC Chris Wilson
2016-01-11 11:00   ` [PATCH 149/190] drm/i915: Use i915_vm_to_ppgtt() Chris Wilson
2016-01-11 11:00   ` [PATCH 150/190] drm/i915: Embed the scratch page struct into each VM Chris Wilson
2016-01-11 11:00   ` [PATCH 151/190] drm/i915: Allow DMA pagetables to use highmem Chris Wilson
2016-01-11 11:00   ` [PATCH 152/190] drm/i915: Replace request->postfix with ->head for space searching Chris Wilson
2016-01-11 11:00   ` [PATCH 153/190] drm/i915: Record the position of the start of the request Chris Wilson
2016-01-11 11:00   ` [PATCH 154/190] drm/i915: Move per-request pid from request to ctx Chris Wilson
2016-01-11 11:00   ` [PATCH 155/190] drm/i915: Merge legacy+execlists context structs Chris Wilson
2016-01-11 11:00   ` [PATCH 156/190] drm/i915: Store the active context object on all engines upon error Chris Wilson
2016-01-11 11:00   ` [PATCH 157/190] drm/i915: Tidy execlists by using intel_context_engine locals Chris Wilson
2016-01-11 11:00   ` [PATCH 158/190] drm/i915: Skip holding an object reference for execbuf preparation Chris Wilson
2016-01-11 11:01   ` [PATCH 159/190] drm/i915: Defer active reference until required Chris Wilson
2016-01-11 11:01   ` [PATCH 160/190] drm: Track drm_mm nodes with an interval tree Chris Wilson
2016-01-11 11:01   ` [PATCH 161/190] drm: Convert drm_vma_manager to embedded interval-tree in drm_mm Chris Wilson
2016-01-11 11:01   ` [PATCH 162/190] drm/i915: Allow the user to pass a context to any ring Chris Wilson
2016-01-11 11:01   ` [PATCH 163/190] drm/i915: Fix i915_gem_evict_for_vma (soft-pinning) Chris Wilson
2016-01-11 11:01   ` [PATCH 164/190] drm/i915: Move obj->dirty:1 to obj->flags Chris Wilson
2016-03-24  8:17     ` David Weinehall
2016-01-11 11:01   ` [PATCH 165/190] drm/i915: Use the precomputed value for whether to enable command parsing Chris Wilson
2016-01-11 11:01   ` [PATCH 166/190] drm/i915: Drop spinlocks around adding to the client request list Chris Wilson
2016-01-11 11:01   ` [PATCH 167/190] drm/i915: Amalgamate execbuffer parameter structures Chris Wilson
2016-01-11 11:01   ` [PATCH 168/190] drm/i915: Skip holding context reference for duration of execbuffer call Chris Wilson
2016-01-11 11:01   ` [PATCH 169/190] drm/i915: Use vma->exec_entry as our double-entry placeholder Chris Wilson
2016-01-11 11:01   ` [PATCH 170/190] drm/i915: Store a direct lookup from object handle to vma Chris Wilson
2016-01-11 11:01   ` [PATCH 171/190] drm/i915: Pass vma to relocate entry Chris Wilson
2016-01-11 11:01   ` [PATCH 172/190] drm/i915: Eliminate lots of iterations over the execobjects array Chris Wilson
2016-01-11 11:01   ` [PATCH 173/190] drm/i915: Wait upon userptr get-user-pages within execbuffer Chris Wilson
2016-01-11 11:01   ` [PATCH 174/190] drm/i915: Show context objects in debugfs/i915_gem_objects Chris Wilson
2016-03-24  7:58     ` David Weinehall
2016-01-11 11:01   ` [PATCH 175/190] drm/i915: Remove superfluous i915_add_request_no_flush() helper Chris Wilson
2016-01-11 11:01   ` [PATCH 176/190] drm/i915: Use the MRU stack search after evicting Chris Wilson
2016-01-11 11:01   ` [PATCH 177/190] drm/i915: Use VMA as the primary object for context state Chris Wilson
2016-01-11 11:01   ` [PATCH 178/190] drm/i915: Do an inline flush-active before dropping the mutex when waiting Chris Wilson
2016-01-11 11:01   ` [PATCH 179/190] drm/i915: Skip MI_SET_CONTEXT for the same context Chris Wilson
2016-01-11 11:01   ` [PATCH 180/190] drm/i915: Micro-optimise i915_gem_object_get_dirty_page() Chris Wilson
2016-01-11 11:01   ` [PATCH 181/190] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
2016-01-11 11:01   ` [PATCH 182/190] drm/i915: Avoid allocating a vmap arena for a single page Chris Wilson
2016-01-11 11:01   ` [PATCH 183/190] drm/i915/cmdparser: Use cached vmappings Chris Wilson
2016-01-11 11:01   ` [PATCH 184/190] drm/i915/cmdparser: Only cache the dst vmap Chris Wilson
2016-01-11 11:01   ` [PATCH 185/190] drm/i915/cmdparser: Improve hash function Chris Wilson
2016-01-11 11:01   ` [PATCH 186/190] drm/i915/cmdparser: Compare against the previous command descriptor Chris Wilson
2016-01-11 11:01   ` [PATCH 187/190] drm/i915: Allow execbuffer to use the first object as the batch Chris Wilson
2016-01-11 11:01   ` [PATCH 188/190] drm/i915: Use VMA for ringbuffer tracking Chris Wilson
2016-01-11 11:01   ` [PATCH 189/190] drm/i915: Skip clearing the GGTT on full-ppgtt systems Chris Wilson
2016-01-11 11:01   ` [PATCH 190/190] drm/i915: Do a nonblocking wait first in pread/pwrite Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1452509174-16671-3-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.