All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Do not call API requiring struct_mutex where it is not available
@ 2016-01-15 15:10 Tvrtko Ursulin
  2016-01-15 15:10 ` [PATCH 2/3] drm/i915: Cache ringbuffer GTT VMA Tvrtko Ursulin
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Tvrtko Ursulin @ 2016-01-15 15:10 UTC (permalink / raw)
  To: Intel-gfx; +Cc: Daniel Vetter

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

LRC code was calling GEM API like i915_gem_obj_ggtt_offset from
places where the struct_mutex cannot be grabbed (irq handlers).

To avoid that this patch caches some interesting bits and values
in the engine and context structures.

Some usages are also removed where they are not needed like a
few asserts which are either impossible or have been checked
already during engine initialization.

Side benefit is also that interrupt handlers and command
submission stop evaluating invariant conditionals, like what
Gen we are running on, on every interrupt and every command
submitted.

This patch deals with logical ring context id and descriptors
while subsequent patches will deal with the remaining issues.

v2:
 * Cache the VMA instead of the address. (Chris Wilson)
 * Incorporate Dave Gordon's good comments and function name.

v3:
 * Extract ctx descriptor template to a function and group
   functions dealing with ctx descriptor & co together near
   top of the file. (Dave Gordon)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  15 ++--
 drivers/gpu/drm/i915/i915_drv.h         |   2 +
 drivers/gpu/drm/i915/i915_gem_gtt.h     |   1 -
 drivers/gpu/drm/i915/intel_lrc.c        | 151 +++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_lrc.h        |   4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
 6 files changed, 103 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e3377abc0d4d..0b3550f05026 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1994,12 +1994,13 @@ static int i915_context_status(struct seq_file *m, void *unused)
 }
 
 static void i915_dump_lrc_obj(struct seq_file *m,
-			      struct intel_engine_cs *ring,
-			      struct drm_i915_gem_object *ctx_obj)
+			      struct intel_context *ctx,
+			      struct intel_engine_cs *ring)
 {
 	struct page *page;
 	uint32_t *reg_state;
 	int j;
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
 	unsigned long ggtt_offset = 0;
 
 	if (ctx_obj == NULL) {
@@ -2009,7 +2010,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
 	}
 
 	seq_printf(m, "CONTEXT: %s %u\n", ring->name,
-		   intel_execlists_ctx_id(ctx_obj));
+		   intel_execlists_ctx_id(ctx, ring));
 
 	if (!i915_gem_obj_ggtt_bound(ctx_obj))
 		seq_puts(m, "\tNot bound in GGTT\n");
@@ -2058,8 +2059,7 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 	list_for_each_entry(ctx, &dev_priv->context_list, link) {
 		for_each_ring(ring, dev_priv, i) {
 			if (ring->default_context != ctx)
-				i915_dump_lrc_obj(m, ring,
-						  ctx->engine[i].state);
+				i915_dump_lrc_obj(m, ctx, ring);
 		}
 	}
 
@@ -2133,11 +2133,8 @@ static int i915_execlists(struct seq_file *m, void *data)
 
 		seq_printf(m, "\t%d requests in queue\n", count);
 		if (head_req) {
-			struct drm_i915_gem_object *ctx_obj;
-
-			ctx_obj = head_req->ctx->engine[ring_id].state;
 			seq_printf(m, "\tHead request id: %u\n",
-				   intel_execlists_ctx_id(ctx_obj));
+				   intel_execlists_ctx_id(head_req->ctx, ring));
 			seq_printf(m, "\tHead request tail: %u\n",
 				   head_req->tail);
 		}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eb7bb97f7316..acff98b9c148 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -888,6 +888,8 @@ struct intel_context {
 		struct drm_i915_gem_object *state;
 		struct intel_ringbuffer *ringbuf;
 		int pin_count;
+		struct i915_vma *lrc_vma;
+		u64 lrc_desc;
 	} engine[I915_NUM_RINGS];
 
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index b448ad832dcf..e5737963ab79 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -44,7 +44,6 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
 
 #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
 
-
 /* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5027699c5291..1a0d85325072 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -263,65 +263,92 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists
 	return 0;
 }
 
+static void
+logical_ring_init_platform_invariants(struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+
+	ring->disable_lite_restore_wa = (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+					IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
+					(ring->id == VCS || ring->id == VCS2);
+
+	ring->ctx_desc_template = GEN8_CTX_VALID;
+	ring->ctx_desc_template |= GEN8_CTX_ADDRESSING_MODE(dev) <<
+				   GEN8_CTX_ADDRESSING_MODE_SHIFT;
+	if (IS_GEN8(dev))
+		ring->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT;
+	ring->ctx_desc_template |= GEN8_CTX_PRIVILEGE;
+
+	/* TODO: WaDisableLiteRestore when we start using semaphore
+	 * signalling between Command Streamers */
+	/* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */
+
+	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
+	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
+	if (ring->disable_lite_restore_wa)
+		ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
+}
+
 /**
- * intel_execlists_ctx_id() - get the Execlists Context ID
- * @ctx_obj: Logical Ring Context backing object.
+ * intel_lr_context_descriptor_update() - calculate & cache the descriptor
+ * 					  descriptor for a pinned context
  *
- * Do not confuse with ctx->id! Unfortunately we have a name overload
- * here: the old context ID we pass to userspace as a handler so that
- * they can refer to a context, and the new context ID we pass to the
- * ELSP so that the GPU can inform us of the context status via
- * interrupts.
+ * @ctx: Context to work on
+ * @ring: Engine the descriptor will be used with
  *
- * Return: 20-bits globally unique context ID.
+ * The context descriptor encodes various attributes of a context,
+ * including its GTT address and some flags. Because it's fairly
+ * expensive to calculate, we'll just do it once and cache the result,
+ * which remains valid until the context is unpinned.
+ *
+ * This is what a descriptor looks like, from LSB to MSB:
+ *    bits 0-11:    flags, GEN8_CTX_* (cached in ctx_desc_template)
+ *    bits 12-31:    LRCA, GTT address of (the HWSP of) this context
+ *    bits 32-51:    ctx ID, a globally unique tag (the LRCA again!)
+ *    bits 52-63:    reserved, may encode the engine ID (for GuC)
  */
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
+static void
+intel_lr_context_descriptor_update(struct intel_context *ctx,
+				   struct intel_engine_cs *ring)
 {
-	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
-			LRC_PPHWSP_PN * PAGE_SIZE;
+	uint64_t lrca, desc;
 
-	/* LRCA is required to be 4K aligned so the more significant 20 bits
-	 * are globally unique */
-	return lrca >> 12;
-}
+	lrca = ctx->engine[ring->id].lrc_vma->node.start +
+	       LRC_PPHWSP_PN * PAGE_SIZE;
 
-static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
-{
-	struct drm_device *dev = ring->dev;
+	desc = ring->ctx_desc_template;			   /* bits  0-11 */
+	desc |= lrca;					   /* bits 12-31 */
+	desc |= (lrca >> PAGE_SHIFT) << GEN8_CTX_ID_SHIFT; /* bits 32-51 */
 
-	return (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
-		IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
-	       (ring->id == VCS || ring->id == VCS2);
+	ctx->engine[ring->id].lrc_desc = desc;
 }
 
 uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
 				     struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
-	uint64_t desc;
-	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
-			LRC_PPHWSP_PN * PAGE_SIZE;
-
-	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
-
-	desc = GEN8_CTX_VALID;
-	desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
-	if (IS_GEN8(ctx_obj->base.dev))
-		desc |= GEN8_CTX_L3LLC_COHERENT;
-	desc |= GEN8_CTX_PRIVILEGE;
-	desc |= lrca;
-	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
-
-	/* TODO: WaDisableLiteRestore when we start using semaphore
-	 * signalling between Command Streamers */
-	/* desc |= GEN8_CTX_FORCE_RESTORE; */
-
-	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
-	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
-	if (disable_lite_restore_wa(ring))
-		desc |= GEN8_CTX_FORCE_RESTORE;
+	return ctx->engine[ring->id].lrc_desc;
+}
 
-	return desc;
+/**
+ * intel_execlists_ctx_id() - get the Execlists Context ID
+ * @ctx: Context to get the ID for
+ * @ring: Engine to get the ID for
+ *
+ * Do not confuse with ctx->id! Unfortunately we have a name overload
+ * here: the old context ID we pass to userspace as a handler so that
+ * they can refer to a context, and the new context ID we pass to the
+ * ELSP so that the GPU can inform us of the context status via
+ * interrupts.
+ *
+ * The context ID is a portion of the context descriptor, so we can
+ * just extract the required part from the cached descriptor.
+ *
+ * Return: 20-bits globally unique context ID.
+ */
+u32 intel_execlists_ctx_id(struct intel_context *ctx,
+			   struct intel_engine_cs *ring)
+{
+	return intel_lr_context_descriptor(ctx, ring) >> GEN8_CTX_ID_SHIFT;
 }
 
 static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
@@ -369,8 +396,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 	uint32_t *reg_state;
 
 	BUG_ON(!ctx_obj);
-	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
-	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
 
 	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
 	reg_state = kmap_atomic(page);
@@ -477,9 +502,7 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 					    execlist_link);
 
 	if (head_req != NULL) {
-		struct drm_i915_gem_object *ctx_obj =
-				head_req->ctx->engine[ring->id].state;
-		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
+		if (intel_execlists_ctx_id(head_req->ctx, ring) == request_id) {
 			WARN(head_req->elsp_submitted == 0,
 			     "Never submitted head request\n");
 
@@ -556,7 +579,7 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 		}
 	}
 
-	if (disable_lite_restore_wa(ring)) {
+	if (ring->disable_lite_restore_wa) {
 		/* Prevent a ctx to preempt itself */
 		if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
 		    (submit_contexts != 0))
@@ -1039,14 +1062,16 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
 }
 
 static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
-		struct drm_i915_gem_object *ctx_obj,
-		struct intel_ringbuffer *ringbuf)
+				   struct intel_context *ctx)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret = 0;
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+	int ret;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
 	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
 			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
 	if (ret)
@@ -1056,6 +1081,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 	if (ret)
 		goto unpin_ctx_obj;
 
+	ctx->engine[ring->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
+	intel_lr_context_descriptor_update(ctx, ring);
 	ctx_obj->dirty = true;
 
 	/* Invalidate GuC TLB. */
@@ -1074,11 +1101,9 @@ static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
 {
 	int ret = 0;
 	struct intel_engine_cs *ring = rq->ring;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-	struct intel_ringbuffer *ringbuf = rq->ringbuf;
 
 	if (rq->ctx->engine[ring->id].pin_count++ == 0) {
-		ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf);
+		ret = intel_lr_context_do_pin(ring, rq->ctx);
 		if (ret)
 			goto reset_pin_count;
 	}
@@ -1100,6 +1125,8 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
 		if (--rq->ctx->engine[ring->id].pin_count == 0) {
 			intel_unpin_ringbuffer_obj(ringbuf);
 			i915_gem_object_ggtt_unpin(ctx_obj);
+			rq->ctx->engine[ring->id].lrc_vma = NULL;
+			rq->ctx->engine[ring->id].lrc_desc = 0;
 		}
 	}
 }
@@ -1938,6 +1965,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 		ring->status_page.obj = NULL;
 	}
 
+	ring->disable_lite_restore_wa = false;
+	ring->ctx_desc_template = 0;
+
 	lrc_destroy_wa_ctx_obj(ring);
 	ring->dev = NULL;
 }
@@ -1988,6 +2018,8 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
 	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
 	spin_lock_init(&ring->execlist_lock);
 
+	logical_ring_init_platform_invariants(ring);
+
 	ret = i915_cmd_parser_init_ring(ring);
 	if (ret)
 		goto error;
@@ -1997,10 +2029,7 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
 		goto error;
 
 	/* As this is the default context, always pin it */
-	ret = intel_lr_context_do_pin(
-			ring,
-			ring->default_context->engine[ring->id].state,
-			ring->default_context->engine[ring->id].ringbuf);
+	ret = intel_lr_context_do_pin(ring, ring->default_context);
 	if (ret) {
 		DRM_ERROR(
 			"Failed to pin and map ringbuffer %s: %d\n",
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index de41ad6cd63d..49af638f6213 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -107,13 +107,15 @@ void intel_lr_context_reset(struct drm_device *dev,
 uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
 				     struct intel_engine_cs *ring);
 
+u32 intel_execlists_ctx_id(struct intel_context *ctx,
+			   struct intel_engine_cs *ring);
+
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
 struct i915_execbuffer_params;
 int intel_execlists_submission(struct i915_execbuffer_params *params,
 			       struct drm_i915_gem_execbuffer2 *args,
 			       struct list_head *vmas);
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 
 void intel_lrc_irq_handler(struct intel_engine_cs *ring);
 void intel_execlists_retire_requests(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7349d9258191..85ce2272f92c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -269,6 +269,8 @@ struct  intel_engine_cs {
 	struct list_head execlist_queue;
 	struct list_head execlist_retired_req_list;
 	u8 next_context_status_buffer;
+	bool disable_lite_restore_wa;
+	u32 ctx_desc_template;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 	int		(*emit_request)(struct drm_i915_gem_request *request);
 	int		(*emit_flush)(struct drm_i915_gem_request *request,
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/3] drm/i915: Cache ringbuffer GTT VMA
  2016-01-15 15:10 [PATCH 1/3] drm/i915: Do not call API requiring struct_mutex where it is not available Tvrtko Ursulin
@ 2016-01-15 15:10 ` Tvrtko Ursulin
  2016-01-15 15:10 ` [PATCH 3/3] drm/i915: Cache LRC state page in the context Tvrtko Ursulin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 13+ messages in thread
From: Tvrtko Ursulin @ 2016-01-15 15:10 UTC (permalink / raw)
  To: Intel-gfx; +Cc: Daniel Vetter

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Purpose is to avoid calling i915_gem_obj_ggtt_offset from the
interrupt context without the big lock held.

v2: Renamed gtt_start to gtt_offset. (Daniel Vetter)
v3: Cache the VMA instead of address. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 3 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1a0d85325072..545173c1b5c5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -391,7 +391,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 	struct intel_engine_cs *ring = rq->ring;
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
 	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-	struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
 	struct page *page;
 	uint32_t *reg_state;
 
@@ -401,7 +400,7 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 	reg_state = kmap_atomic(page);
 
 	reg_state[CTX_RING_TAIL+1] = rq->tail;
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
+	reg_state[CTX_RING_BUFFER_START+1] = rq->ringbuf->vma->node.start;
 
 	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
 		/* True 32b PPGTT with dynamic page allocation: update PDP
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4060acf0601a..4150a240d47d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1997,6 +1997,7 @@ void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 	else
 		iounmap(ringbuf->virtual_start);
 	ringbuf->virtual_start = NULL;
+	ringbuf->vma = NULL;
 	i915_gem_object_ggtt_unpin(ringbuf->obj);
 }
 
@@ -2063,6 +2064,8 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
 		}
 	}
 
+	ringbuf->vma = i915_gem_obj_to_ggtt(obj);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 85ce2272f92c..ede57954080e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -99,6 +99,7 @@ struct intel_ring_hangcheck {
 struct intel_ringbuffer {
 	struct drm_i915_gem_object *obj;
 	void __iomem *virtual_start;
+	struct i915_vma *vma;
 
 	struct intel_engine_cs *ring;
 	struct list_head link;
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/3] drm/i915: Cache LRC state page in the context
  2016-01-15 15:10 [PATCH 1/3] drm/i915: Do not call API requiring struct_mutex where it is not available Tvrtko Ursulin
  2016-01-15 15:10 ` [PATCH 2/3] drm/i915: Cache ringbuffer GTT VMA Tvrtko Ursulin
@ 2016-01-15 15:10 ` Tvrtko Ursulin
  2016-01-15 16:01   ` Chris Wilson
  2016-01-15 16:20 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available Patchwork
  2016-01-16  7:49 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available (rev3) Patchwork
  3 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2016-01-15 15:10 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

LRC lifetime is well defined so we can cache the page pointing
to the object backing store in the context in order to avoid
walking over the object SG page list from the interrupt context
without the big lock held.

v2: Also cache the mapping. (Chris Wilson)
v3: Unmap on the error path.
v4: No need to cache the page. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/intel_lrc.c | 53 ++++++++++++++++++++++++++--------------
 2 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index acff98b9c148..af301482e6f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -890,6 +890,7 @@ struct intel_context {
 		int pin_count;
 		struct i915_vma *lrc_vma;
 		u64 lrc_desc;
+		uint32_t *lrc_reg_state;
 	} engine[I915_NUM_RINGS];
 
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 545173c1b5c5..70bd28cc8887 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -390,14 +390,7 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 {
 	struct intel_engine_cs *ring = rq->ring;
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-	struct page *page;
-	uint32_t *reg_state;
-
-	BUG_ON(!ctx_obj);
-
-	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-	reg_state = kmap_atomic(page);
+	uint32_t *reg_state = rq->ctx->engine[ring->id].lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = rq->tail;
 	reg_state[CTX_RING_BUFFER_START+1] = rq->ringbuf->vma->node.start;
@@ -414,8 +407,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 	}
 
-	kunmap_atomic(reg_state);
-
 	return 0;
 }
 
@@ -1067,6 +1058,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
 	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+	struct page *lrc_state_page;
+	uint32_t *lrc_reg_state;
 	int ret;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -1076,12 +1069,25 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
+	lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
+	if (WARN_ON(!lrc_state_page)) {
+		ret = -ENODEV;
+		goto unpin_ctx_obj;
+	}
+
+	lrc_reg_state = kmap(lrc_state_page);
+	if (!lrc_reg_state) {
+		ret = -ENOMEM;
+		goto unpin_ctx_obj;
+	}
+
 	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
 	if (ret)
-		goto unpin_ctx_obj;
+		goto unmap_state_page;
 
 	ctx->engine[ring->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
 	intel_lr_context_descriptor_update(ctx, ring);
+	ctx->engine[ring->id].lrc_reg_state = lrc_reg_state;
 	ctx_obj->dirty = true;
 
 	/* Invalidate GuC TLB. */
@@ -1090,6 +1096,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 
 	return ret;
 
+unmap_state_page:
+	kunmap(lrc_state_page);
 unpin_ctx_obj:
 	i915_gem_object_ggtt_unpin(ctx_obj);
 
@@ -1118,15 +1126,22 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
 	struct intel_engine_cs *ring = rq->ring;
 	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
 	struct intel_ringbuffer *ringbuf = rq->ringbuf;
+	struct page *lrc_state_page;
 
-	if (ctx_obj) {
-		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-		if (--rq->ctx->engine[ring->id].pin_count == 0) {
-			intel_unpin_ringbuffer_obj(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
-			rq->ctx->engine[ring->id].lrc_vma = NULL;
-			rq->ctx->engine[ring->id].lrc_desc = 0;
-		}
+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
+	if (!ctx_obj)
+		return;
+
+	if (--rq->ctx->engine[ring->id].pin_count == 0) {
+		lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj,
+								LRC_STATE_PN);
+		kunmap(lrc_state_page);
+		intel_unpin_ringbuffer_obj(ringbuf);
+		i915_gem_object_ggtt_unpin(ctx_obj);
+		rq->ctx->engine[ring->id].lrc_vma = NULL;
+		rq->ctx->engine[ring->id].lrc_desc = 0;
+		rq->ctx->engine[ring->id].lrc_reg_state = NULL;
 	}
 }
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/i915: Cache LRC state page in the context
  2016-01-15 15:10 ` [PATCH 3/3] drm/i915: Cache LRC state page in the context Tvrtko Ursulin
@ 2016-01-15 16:01   ` Chris Wilson
  2016-01-15 16:05     ` Tvrtko Ursulin
  0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2016-01-15 16:01 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Fri, Jan 15, 2016 at 03:10:29PM +0000, Tvrtko Ursulin wrote:
> @@ -1118,15 +1126,22 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> +	if (--rq->ctx->engine[ring->id].pin_count == 0) {
> +		lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj,
> +								LRC_STATE_PN);

Interesting choice. We called set_page_dirty() when we took the mapping.
Should that page flag be preserved whilst we hold the kmap - I think so,
i.e. the mm cannot flush the page whilst it has an elevated mapcount. So
calling set_page_dirty() again is redundant, right?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/i915: Cache LRC state page in the context
  2016-01-15 16:01   ` Chris Wilson
@ 2016-01-15 16:05     ` Tvrtko Ursulin
  2016-01-15 16:17       ` Chris Wilson
  0 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2016-01-15 16:05 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx, Tvrtko Ursulin, Dave Gordon


On 15/01/16 16:01, Chris Wilson wrote:
> On Fri, Jan 15, 2016 at 03:10:29PM +0000, Tvrtko Ursulin wrote:
>> @@ -1118,15 +1126,22 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>> +	if (--rq->ctx->engine[ring->id].pin_count == 0) {
>> +		lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj,
>> +								LRC_STATE_PN);
>
> Interesting choice. We called set_page_dirty() when we took the mapping.
> Should that page flag be preserved whilst we hold the kmap - I think so,
> i.e. the mm cannot flush the page whilst it has an elevated mapcount. So
> calling set_page_dirty() again is redundant, right?

If you call mindless copy & paste interesting. :D

Any other concerns or I can respin with that only?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/i915: Cache LRC state page in the context
  2016-01-15 16:05     ` Tvrtko Ursulin
@ 2016-01-15 16:17       ` Chris Wilson
  2016-01-15 16:39         ` [PATCH v5] " Tvrtko Ursulin
  0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2016-01-15 16:17 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Fri, Jan 15, 2016 at 04:05:24PM +0000, Tvrtko Ursulin wrote:
> 
> On 15/01/16 16:01, Chris Wilson wrote:
> >On Fri, Jan 15, 2016 at 03:10:29PM +0000, Tvrtko Ursulin wrote:
> >>@@ -1118,15 +1126,22 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
> >>+	if (--rq->ctx->engine[ring->id].pin_count == 0) {
> >>+		lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj,
> >>+								LRC_STATE_PN);
> >
> >Interesting choice. We called set_page_dirty() when we took the mapping.
> >Should that page flag be preserved whilst we hold the kmap - I think so,
> >i.e. the mm cannot flush the page whilst it has an elevated mapcount. So
> >calling set_page_dirty() again is redundant, right?
> 
> If you call mindless copy & paste interesting. :D
> 
> Any other concerns or I can respin with that only?

No. I was quibbling over the excess clearing of state on unpinning :)

Pity we have to respin even for innoculous changes just to get a CI
tick.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available
  2016-01-15 15:10 [PATCH 1/3] drm/i915: Do not call API requiring struct_mutex where it is not available Tvrtko Ursulin
  2016-01-15 15:10 ` [PATCH 2/3] drm/i915: Cache ringbuffer GTT VMA Tvrtko Ursulin
  2016-01-15 15:10 ` [PATCH 3/3] drm/i915: Cache LRC state page in the context Tvrtko Ursulin
@ 2016-01-15 16:20 ` Patchwork
  2016-01-16  7:49 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available (rev3) Patchwork
  3 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2016-01-15 16:20 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Summary ==

Built on 615fbad7f4cea73b1a8eccdcc942c8ca1a708dab drm-intel-nightly: 2016y-01m-15d-09h-46m-32s UTC integration manifest

Test gem_storedw_loop:
        Subgroup basic-render:
                pass       -> DMESG-WARN (skl-i7k-2) UNSTABLE
Test kms_flip:
        Subgroup basic-flip-vs-modeset:
                dmesg-warn -> PASS       (skl-i5k-2)
Test kms_pipe_crc_basic:
        Subgroup read-crc-pipe-b:
                pass       -> DMESG-WARN (ilk-hp8440p)

bdw-nuci7        total:138  pass:128  dwarn:1   dfail:0   fail:0   skip:9  
bdw-ultra        total:138  pass:132  dwarn:0   dfail:0   fail:0   skip:6  
bsw-nuc-2        total:141  pass:115  dwarn:2   dfail:0   fail:0   skip:24 
byt-nuc          total:141  pass:123  dwarn:3   dfail:0   fail:0   skip:15 
hsw-brixbox      total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7  
ilk-hp8440p      total:141  pass:100  dwarn:4   dfail:0   fail:0   skip:37 
ivb-t430s        total:135  pass:122  dwarn:3   dfail:4   fail:0   skip:6  
skl-i5k-2        total:141  pass:132  dwarn:1   dfail:0   fail:0   skip:8  
skl-i7k-2        total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8  
snb-dellxps      total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14 
snb-x220t        total:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13 

HANGED hsw-gt2 in igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a

Results at /archive/results/CI_IGT_test/Patchwork_1199/

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v5] drm/i915: Cache LRC state page in the context
  2016-01-15 16:17       ` Chris Wilson
@ 2016-01-15 16:39         ` Tvrtko Ursulin
  2016-01-15 17:02           ` Chris Wilson
  0 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2016-01-15 16:39 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

LRC lifetime is well defined so we can cache the page pointing
to the object backing store in the context in order to avoid
walking over the object SG page list from the interrupt context
without the big lock held.

v2: Also cache the mapping. (Chris Wilson)
v3: Unmap on the error path.
v4: No need to cache the page. (Chris Wilson)
v5: No need to dirty the page on unpin. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/intel_lrc.c | 53 ++++++++++++++++++++++++++--------------
 2 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index acff98b9c148..af301482e6f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -890,6 +890,7 @@ struct intel_context {
 		int pin_count;
 		struct i915_vma *lrc_vma;
 		u64 lrc_desc;
+		uint32_t *lrc_reg_state;
 	} engine[I915_NUM_RINGS];
 
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 545173c1b5c5..dc3ea03a887d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -390,14 +390,7 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 {
 	struct intel_engine_cs *ring = rq->ring;
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-	struct page *page;
-	uint32_t *reg_state;
-
-	BUG_ON(!ctx_obj);
-
-	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-	reg_state = kmap_atomic(page);
+	uint32_t *reg_state = rq->ctx->engine[ring->id].lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = rq->tail;
 	reg_state[CTX_RING_BUFFER_START+1] = rq->ringbuf->vma->node.start;
@@ -414,8 +407,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 	}
 
-	kunmap_atomic(reg_state);
-
 	return 0;
 }
 
@@ -1067,6 +1058,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
 	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+	struct page *lrc_state_page;
+	uint32_t *lrc_reg_state;
 	int ret;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -1076,12 +1069,25 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
+	lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
+	if (WARN_ON(!lrc_state_page)) {
+		ret = -ENODEV;
+		goto unpin_ctx_obj;
+	}
+
+	lrc_reg_state = kmap(lrc_state_page);
+	if (!lrc_reg_state) {
+		ret = -ENOMEM;
+		goto unpin_ctx_obj;
+	}
+
 	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
 	if (ret)
-		goto unpin_ctx_obj;
+		goto unmap_state_page;
 
 	ctx->engine[ring->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
 	intel_lr_context_descriptor_update(ctx, ring);
+	ctx->engine[ring->id].lrc_reg_state = lrc_reg_state;
 	ctx_obj->dirty = true;
 
 	/* Invalidate GuC TLB. */
@@ -1090,6 +1096,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 
 	return ret;
 
+unmap_state_page:
+	kunmap(lrc_state_page);
 unpin_ctx_obj:
 	i915_gem_object_ggtt_unpin(ctx_obj);
 
@@ -1118,15 +1126,22 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
 	struct intel_engine_cs *ring = rq->ring;
 	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
 	struct intel_ringbuffer *ringbuf = rq->ringbuf;
+	struct page *lrc_state_page;
 
-	if (ctx_obj) {
-		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-		if (--rq->ctx->engine[ring->id].pin_count == 0) {
-			intel_unpin_ringbuffer_obj(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
-			rq->ctx->engine[ring->id].lrc_vma = NULL;
-			rq->ctx->engine[ring->id].lrc_desc = 0;
-		}
+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
+	if (!ctx_obj)
+		return;
+
+	if (--rq->ctx->engine[ring->id].pin_count == 0) {
+		lrc_state_page = i915_gem_object_get_page(ctx_obj,
+							  LRC_STATE_PN);
+		kunmap(lrc_state_page);
+		intel_unpin_ringbuffer_obj(ringbuf);
+		i915_gem_object_ggtt_unpin(ctx_obj);
+		rq->ctx->engine[ring->id].lrc_vma = NULL;
+		rq->ctx->engine[ring->id].lrc_desc = 0;
+		rq->ctx->engine[ring->id].lrc_reg_state = NULL;
 	}
 }
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v5] drm/i915: Cache LRC state page in the context
  2016-01-15 16:39         ` [PATCH v5] " Tvrtko Ursulin
@ 2016-01-15 17:02           ` Chris Wilson
  2016-01-15 17:12             ` [PATCH v6] " Tvrtko Ursulin
  0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2016-01-15 17:02 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Fri, Jan 15, 2016 at 04:39:29PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> LRC lifetime is well defined so we can cache the page pointing
> to the object backing store in the context in order to avoid
> walking over the object SG page list from the interrupt context
> without the big lock held.
> 
> v2: Also cache the mapping. (Chris Wilson)
> v3: Unmap on the error path.
> v4: No need to cache the page. (Chris Wilson)
> v5: No need to dirty the page on unpin. (Chris Wilson)

Sorry.

> @@ -1076,12 +1069,25 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
>  	if (ret)
>  		return ret;
>  
> +	lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
> +	if (WARN_ON(!lrc_state_page)) {
> +		ret = -ENODEV;
> +		goto unpin_ctx_obj;
> +	}
> +
> +	lrc_reg_state = kmap(lrc_state_page);

kmap() cannot fail.

> +	if (!lrc_reg_state) {
> +		ret = -ENOMEM;
> +		goto unpin_ctx_obj;
> +	}
> +	if (--rq->ctx->engine[ring->id].pin_count == 0) {

I was wondering if we should just use

kunmap(kmap_to_page(ce->lrc_reg_state));

Except until we get struct intel_context_engine that line will exceed
80cols!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v6] drm/i915: Cache LRC state page in the context
  2016-01-15 17:02           ` Chris Wilson
@ 2016-01-15 17:12             ` Tvrtko Ursulin
  2016-01-15 20:51               ` Chris Wilson
  0 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2016-01-15 17:12 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

LRC lifetime is well defined so we can cache the page pointing
to the object backing store in the context in order to avoid
walking over the object SG page list from the interrupt context
without the big lock held.

v2: Also cache the mapping. (Chris Wilson)
v3: Unmap on the error path.
v4: No need to cache the page. (Chris Wilson)
v5: No need to dirty the page on unpin. (Chris Wilson)
v6: kmap() cannot fail and use kmap_to_page to simplify unpin.
    (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/intel_lrc.c | 39 +++++++++++++++++++++------------------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index acff98b9c148..af301482e6f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -890,6 +890,7 @@ struct intel_context {
 		int pin_count;
 		struct i915_vma *lrc_vma;
 		u64 lrc_desc;
+		uint32_t *lrc_reg_state;
 	} engine[I915_NUM_RINGS];
 
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 545173c1b5c5..285e7f26760c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -390,14 +390,7 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 {
 	struct intel_engine_cs *ring = rq->ring;
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-	struct page *page;
-	uint32_t *reg_state;
-
-	BUG_ON(!ctx_obj);
-
-	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-	reg_state = kmap_atomic(page);
+	uint32_t *reg_state = rq->ctx->engine[ring->id].lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = rq->tail;
 	reg_state[CTX_RING_BUFFER_START+1] = rq->ringbuf->vma->node.start;
@@ -414,8 +407,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 	}
 
-	kunmap_atomic(reg_state);
-
 	return 0;
 }
 
@@ -1067,6 +1058,7 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
 	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+	struct page *lrc_state_page;
 	int ret;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -1076,12 +1068,19 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
+	lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
+	if (WARN_ON(!lrc_state_page)) {
+		ret = -ENODEV;
+		goto unpin_ctx_obj;
+	}
+
 	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
 	if (ret)
 		goto unpin_ctx_obj;
 
 	ctx->engine[ring->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
 	intel_lr_context_descriptor_update(ctx, ring);
+	ctx->engine[ring->id].lrc_reg_state = kmap(lrc_state_page);
 	ctx_obj->dirty = true;
 
 	/* Invalidate GuC TLB. */
@@ -1119,14 +1118,18 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
 	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
 	struct intel_ringbuffer *ringbuf = rq->ringbuf;
 
-	if (ctx_obj) {
-		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-		if (--rq->ctx->engine[ring->id].pin_count == 0) {
-			intel_unpin_ringbuffer_obj(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
-			rq->ctx->engine[ring->id].lrc_vma = NULL;
-			rq->ctx->engine[ring->id].lrc_desc = 0;
-		}
+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
+	if (!ctx_obj)
+		return;
+
+	if (--rq->ctx->engine[ring->id].pin_count == 0) {
+		kunmap(kmap_to_page(rq->ctx->engine[ring->id].lrc_reg_state));
+		intel_unpin_ringbuffer_obj(ringbuf);
+		i915_gem_object_ggtt_unpin(ctx_obj);
+		rq->ctx->engine[ring->id].lrc_vma = NULL;
+		rq->ctx->engine[ring->id].lrc_desc = 0;
+		rq->ctx->engine[ring->id].lrc_reg_state = NULL;
 	}
 }
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v6] drm/i915: Cache LRC state page in the context
  2016-01-15 17:12             ` [PATCH v6] " Tvrtko Ursulin
@ 2016-01-15 20:51               ` Chris Wilson
  0 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2016-01-15 20:51 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Fri, Jan 15, 2016 at 05:12:45PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> LRC lifetime is well defined so we can cache the page pointing
> to the object backing store in the context in order to avoid
> walking over the object SG page list from the interrupt context
> without the big lock held.
> 
> v2: Also cache the mapping. (Chris Wilson)
> v3: Unmap on the error path.
> v4: No need to cache the page. (Chris Wilson)
> v5: No need to dirty the page on unpin. (Chris Wilson)
> v6: kmap() cannot fail and use kmap_to_page to simplify unpin.
>     (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available (rev3)
  2016-01-15 15:10 [PATCH 1/3] drm/i915: Do not call API requiring struct_mutex where it is not available Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2016-01-15 16:20 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available Patchwork
@ 2016-01-16  7:49 ` Patchwork
  2016-01-18 10:04   ` Tvrtko Ursulin
  3 siblings, 1 reply; 13+ messages in thread
From: Patchwork @ 2016-01-16  7:49 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Summary ==

Built on 0ea9000bcb6f394edde5111494a92b0607214cfa drm-intel-nightly: 2016y-01m-15d-19h-09m-45s UTC integration manifest

Test gem_ctx_basic:
                pass       -> FAIL       (bdw-ultra)
Test gem_storedw_loop:
        Subgroup basic-render:
                dmesg-warn -> PASS       (bdw-ultra) UNSTABLE
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                dmesg-warn -> PASS       (snb-dellxps)
Test kms_pipe_crc_basic:
        Subgroup hang-read-crc-pipe-a:
                pass       -> DMESG-WARN (snb-dellxps)
Test pm_rpm:
        Subgroup basic-rte:
                dmesg-warn -> PASS       (byt-nuc) UNSTABLE

bdw-nuci7        total:138  pass:128  dwarn:1   dfail:0   fail:0   skip:9  
bdw-ultra        total:138  pass:131  dwarn:0   dfail:0   fail:1   skip:6  
byt-nuc          total:141  pass:124  dwarn:2   dfail:0   fail:0   skip:15 
hsw-brixbox      total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7  
hsw-gt2          total:141  pass:137  dwarn:0   dfail:0   fail:0   skip:4  
ivb-t430s        total:135  pass:122  dwarn:3   dfail:4   fail:0   skip:6  
skl-i5k-2        total:141  pass:132  dwarn:1   dfail:0   fail:0   skip:8  
skl-i7k-2        total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8  
snb-dellxps      total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14 
snb-x220t        total:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13 

Results at /archive/results/CI_IGT_test/Patchwork_1203/

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available (rev3)
  2016-01-16  7:49 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available (rev3) Patchwork
@ 2016-01-18 10:04   ` Tvrtko Ursulin
  0 siblings, 0 replies; 13+ messages in thread
From: Tvrtko Ursulin @ 2016-01-18 10:04 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx


On 16/01/16 07:49, Patchwork wrote:
> == Summary ==
>
> Built on 0ea9000bcb6f394edde5111494a92b0607214cfa drm-intel-nightly: 2016y-01m-15d-19h-09m-45s UTC integration manifest
>
> Test gem_ctx_basic:
>                  pass       -> FAIL       (bdw-ultra)
> Test gem_storedw_loop:
>          Subgroup basic-render:
>                  dmesg-warn -> PASS       (bdw-ultra) UNSTABLE
> Test kms_flip:
>          Subgroup basic-flip-vs-wf_vblank:
>                  dmesg-warn -> PASS       (snb-dellxps)
> Test kms_pipe_crc_basic:
>          Subgroup hang-read-crc-pipe-a:
>                  pass       -> DMESG-WARN (snb-dellxps)
> Test pm_rpm:
>          Subgroup basic-rte:
>                  dmesg-warn -> PASS       (byt-nuc) UNSTABLE

I've merged this series since the failures above are known and unrelated.

Regards,

Tvrtko

>
> bdw-nuci7        total:138  pass:128  dwarn:1   dfail:0   fail:0   skip:9
> bdw-ultra        total:138  pass:131  dwarn:0   dfail:0   fail:1   skip:6
> byt-nuc          total:141  pass:124  dwarn:2   dfail:0   fail:0   skip:15
> hsw-brixbox      total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7
> hsw-gt2          total:141  pass:137  dwarn:0   dfail:0   fail:0   skip:4
> ivb-t430s        total:135  pass:122  dwarn:3   dfail:4   fail:0   skip:6
> skl-i5k-2        total:141  pass:132  dwarn:1   dfail:0   fail:0   skip:8
> skl-i7k-2        total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8
> snb-dellxps      total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14
> snb-x220t        total:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13
>
> Results at /archive/results/CI_IGT_test/Patchwork_1203/
>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-01-18 10:04 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-15 15:10 [PATCH 1/3] drm/i915: Do not call API requiring struct_mutex where it is not available Tvrtko Ursulin
2016-01-15 15:10 ` [PATCH 2/3] drm/i915: Cache ringbuffer GTT VMA Tvrtko Ursulin
2016-01-15 15:10 ` [PATCH 3/3] drm/i915: Cache LRC state page in the context Tvrtko Ursulin
2016-01-15 16:01   ` Chris Wilson
2016-01-15 16:05     ` Tvrtko Ursulin
2016-01-15 16:17       ` Chris Wilson
2016-01-15 16:39         ` [PATCH v5] " Tvrtko Ursulin
2016-01-15 17:02           ` Chris Wilson
2016-01-15 17:12             ` [PATCH v6] " Tvrtko Ursulin
2016-01-15 20:51               ` Chris Wilson
2016-01-15 16:20 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available Patchwork
2016-01-16  7:49 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Do not call API requiring struct_mutex where it is not available (rev3) Patchwork
2016-01-18 10:04   ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.