All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] GuC premature LRC unpin
@ 2016-04-15 11:54 Tvrtko Ursulin
  2016-04-15 11:54 ` [PATCH 1/3] drm/i915: Refactor execlists default context pinning Tvrtko Ursulin
                   ` (4 more replies)
  0 siblings, 5 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-15 11:54 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Just exercising the fix for GuC premature LRC unpin on the CI.

Chris Wilson (1):
  drm/i915: Refactor execlists default context pinning

Tvrtko Ursulin (2):
  drm/i915/guc: Keep the previous context pinned until the next one has
    been completed
  DO NOT MERGE: drm/i915: Enable GuC submission

 drivers/gpu/drm/i915/i915_debugfs.c |   5 +-
 drivers/gpu/drm/i915/i915_drv.h     |   4 +-
 drivers/gpu/drm/i915/i915_gem.c     |   8 ++-
 drivers/gpu/drm/i915/i915_params.c  |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c    | 126 ++++++++++++++++--------------------
 5 files changed, 69 insertions(+), 76 deletions(-)

-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 1/3] drm/i915: Refactor execlists default context pinning
  2016-04-15 11:54 [PATCH 0/3] GuC premature LRC unpin Tvrtko Ursulin
@ 2016-04-15 11:54 ` Tvrtko Ursulin
  2016-04-15 12:16   ` Chris Wilson
  2016-04-15 11:54 ` [PATCH 2/3] drm/i915/guc: Keep the previous context pinned until the next one has been completed Tvrtko Ursulin
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-15 11:54 UTC (permalink / raw)
  To: Intel-gfx

From: Chris Wilson <chris@chris-wilson.co.uk>

Refactor pinning and unpinning of contexts, such that the default
context for an engine is pinned during initialisation and unpinned
during teardown (pinning of the context handles the reference counting).
Thus we can eliminate the special case handling of the default context
that was required to mask that it was not being pinned normally.

v2:
  * Rebase on nightly;
  * put back pin in execlists_context_queue. (Tvrtko Ursulin)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   5 +-
 drivers/gpu/drm/i915/i915_gem.c     |   3 +-
 drivers/gpu/drm/i915/intel_lrc.c    | 111 ++++++++++++++----------------------
 3 files changed, 45 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 46cc03b60183..1de3ee746641 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2067,9 +2067,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 		return ret;
 
 	list_for_each_entry(ctx, &dev_priv->context_list, link)
-		if (ctx != dev_priv->kernel_context)
-			for_each_engine(engine, dev_priv)
-				i915_dump_lrc_obj(m, ctx, engine);
+		for_each_engine(engine, dev_priv)
+			i915_dump_lrc_obj(m, ctx, engine);
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6ce2c31b9a81..e9db56c7a31f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2719,8 +2719,7 @@ void i915_gem_request_free(struct kref *req_ref)
 		i915_gem_request_remove_from_client(req);
 
 	if (ctx) {
-		if (i915.enable_execlists && ctx != req->i915->kernel_context)
-			intel_lr_context_unpin(ctx, req->engine);
+		intel_lr_context_unpin(ctx, req->engine);
 
 		i915_gem_context_unreference(ctx);
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1562a75ac9d1..2f49dfae5560 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -612,8 +612,7 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
 	struct drm_i915_gem_request *cursor;
 	int num_elements = 0;
 
-	if (request->ctx != request->i915->kernel_context)
-		intel_lr_context_pin(request->ctx, engine);
+	intel_lr_context_pin(request->ctx, engine);
 
 	i915_gem_request_reference(request);
 
@@ -715,10 +714,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 			return ret;
 	}
 
-	if (request->ctx != request->i915->kernel_context)
-		ret = intel_lr_context_pin(request->ctx, request->engine);
-
-	return ret;
+	return intel_lr_context_pin(request->ctx, request->engine);
 }
 
 static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
@@ -798,12 +794,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	if (engine->last_context != request->ctx) {
 		if (engine->last_context)
 			intel_lr_context_unpin(engine->last_context, engine);
-		if (request->ctx != request->i915->kernel_context) {
-			intel_lr_context_pin(request->ctx, engine);
-			engine->last_context = request->ctx;
-		} else {
-			engine->last_context = NULL;
-		}
+		intel_lr_context_pin(request->ctx, engine);
+		engine->last_context = request->ctx;
 	}
 
 	if (dev_priv->guc.execbuf_client)
@@ -1026,12 +1018,7 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
 	spin_unlock_bh(&engine->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		struct intel_context *ctx = req->ctx;
-		struct drm_i915_gem_object *ctx_obj =
-				ctx->engine[engine->id].state;
-
-		if (ctx_obj && (ctx != req->i915->kernel_context))
-			intel_lr_context_unpin(ctx, engine);
+		intel_lr_context_unpin(req->ctx, engine);
 
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
@@ -1076,23 +1063,26 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-static int intel_lr_context_do_pin(struct intel_context *ctx,
-				   struct intel_engine_cs *engine)
+static int intel_lr_context_pin(struct intel_context *ctx,
+				struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
-	struct intel_ringbuffer *ringbuf = ctx->engine[engine->id].ringbuf;
+	struct drm_i915_private *dev_priv = to_i915(engine->dev);
+	struct drm_i915_gem_object *ctx_obj;
+	struct intel_ringbuffer *ringbuf;
 	void *vaddr;
 	u32 *lrc_reg_state;
 	int ret;
 
-	WARN_ON(!mutex_is_locked(&engine->dev->struct_mutex));
+	lockdep_assert_held(&dev_priv->dev->struct_mutex);
+
+	if (ctx->engine[engine->id].pin_count++)
+		return 0;
 
+	ctx_obj = ctx->engine[engine->id].state;
 	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
-			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+				    PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
 	if (ret)
-		return ret;
+		goto err;
 
 	vaddr = i915_gem_object_pin_map(ctx_obj);
 	if (IS_ERR(vaddr)) {
@@ -1102,10 +1092,13 @@ static int intel_lr_context_do_pin(struct intel_context *ctx,
 
 	lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 
+	ringbuf = ctx->engine[engine->id].ringbuf;
 	ret = intel_pin_and_map_ringbuffer_obj(engine->dev, ringbuf);
 	if (ret)
 		goto unpin_map;
 
+	i915_gem_context_reference(ctx);
+
 	ctx->engine[engine->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
 	intel_lr_context_descriptor_update(ctx, engine);
 	lrc_reg_state[CTX_RING_BUFFER_START+1] = ringbuf->vma->node.start;
@@ -1116,31 +1109,13 @@ static int intel_lr_context_do_pin(struct intel_context *ctx,
 	if (i915.enable_guc_submission)
 		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 
-	return ret;
+	return 0;
 
 unpin_map:
 	i915_gem_object_unpin_map(ctx_obj);
 unpin_ctx_obj:
 	i915_gem_object_ggtt_unpin(ctx_obj);
-
-	return ret;
-}
-
-static int intel_lr_context_pin(struct intel_context *ctx,
-				struct intel_engine_cs *engine)
-{
-	int ret = 0;
-
-	if (ctx->engine[engine->id].pin_count++ == 0) {
-		ret = intel_lr_context_do_pin(ctx, engine);
-		if (ret)
-			goto reset_pin_count;
-
-		i915_gem_context_reference(ctx);
-	}
-	return ret;
-
-reset_pin_count:
+err:
 	ctx->engine[engine->id].pin_count = 0;
 	return ret;
 }
@@ -1150,17 +1125,21 @@ void intel_lr_context_unpin(struct intel_context *ctx,
 {
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
 
-	WARN_ON(!mutex_is_locked(&ctx->i915->dev->struct_mutex));
-	if (--ctx->engine[engine->id].pin_count == 0) {
-		i915_gem_object_unpin_map(ctx_obj);
-		intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
-		i915_gem_object_ggtt_unpin(ctx_obj);
-		ctx->engine[engine->id].lrc_vma = NULL;
-		ctx->engine[engine->id].lrc_desc = 0;
-		ctx->engine[engine->id].lrc_reg_state = NULL;
+	lockdep_assert_held(&ctx->i915->dev->struct_mutex);
 
-		i915_gem_context_unreference(ctx);
-	}
+	if (--ctx->engine[engine->id].pin_count)
+		return;
+
+	i915_gem_object_unpin_map(ctx_obj);
+	intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
+
+	i915_gem_object_ggtt_unpin(ctx_obj);
+
+	ctx->engine[engine->id].lrc_vma = NULL;
+	ctx->engine[engine->id].lrc_desc = 0;
+	ctx->engine[engine->id].lrc_reg_state = NULL;
+
+	i915_gem_context_unreference(ctx);
 }
 
 static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
@@ -2059,6 +2038,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 		engine->status_page.obj = NULL;
 	}
 
+	intel_lr_context_unpin(dev_priv->kernel_context, engine);
+
 	engine->idle_lite_restore_wa = 0;
 	engine->disable_lite_restore_wa = false;
 	engine->ctx_desc_template = 0;
@@ -2161,11 +2142,10 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *engine)
 		goto error;
 
 	/* As this is the default context, always pin it */
-	ret = intel_lr_context_do_pin(dctx, engine);
+	ret = intel_lr_context_pin(dctx, engine);
 	if (ret) {
-		DRM_ERROR(
-			"Failed to pin and map ringbuffer %s: %d\n",
-			engine->name, ret);
+		DRM_ERROR("Failed to pin context for %s: %d\n",
+			  engine->name, ret);
 		goto error;
 	}
 
@@ -2580,20 +2560,13 @@ void intel_lr_context_free(struct intel_context *ctx)
 	int i;
 
 	for (i = I915_NUM_ENGINES; --i >= 0; ) {
-		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
 		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
 
 		if (!ctx_obj)
 			continue;
 
-		if (ctx == ctx->i915->kernel_context) {
-			intel_unpin_ringbuffer_obj(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
-			i915_gem_object_unpin_map(ctx_obj);
-		}
-
 		WARN_ON(ctx->engine[i].pin_count);
-		intel_ringbuffer_free(ringbuf);
+		intel_ringbuffer_free(ctx->engine[i].ringbuf);
 		drm_gem_object_unreference(&ctx_obj->base);
 	}
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/3] drm/i915/guc: Keep the previous context pinned until the next one has been completed
  2016-04-15 11:54 [PATCH 0/3] GuC premature LRC unpin Tvrtko Ursulin
  2016-04-15 11:54 ` [PATCH 1/3] drm/i915: Refactor execlists default context pinning Tvrtko Ursulin
@ 2016-04-15 11:54 ` Tvrtko Ursulin
  2016-04-15 12:12   ` Chris Wilson
  2016-04-15 11:54 ` [PATCH 3/3] DO NOT MERGE: drm/i915: Enable GuC submission Tvrtko Ursulin
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-15 11:54 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

In GuC mode submitting requests is only putting them into the
GuC queue with the actual submission to hardware following at
some future point. This makes the per engine last context
tracking insufficient for closing the premature context
unpin race.

Instead we need to make requests track and pin the previous
context on the same engine, and only unpin them when they
themselves are retired. This will ensure contexts remain pinned
in all cases until the are fully saved by the GPU.

v2: Only track contexts.
v3: Limit to GuC for now.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Alex Dai <yu.dai@intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
Cc: Nick Hoath <nicholas.hoath@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h  |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c  |  5 +++++
 drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b9ed1b305beb..80f2d959ed7b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2290,6 +2290,9 @@ struct drm_i915_gem_request {
 	struct intel_context *ctx;
 	struct intel_ringbuffer *ringbuf;
 
+	/** Context preceding this one on the same engine. Can be NULL. */
+	struct intel_context *prev_ctx;
+
 	/** Batch buffer related to this request if any (used for
 	    error state dump only) */
 	struct drm_i915_gem_object *batch_obj;
@@ -2325,7 +2328,6 @@ struct drm_i915_gem_request {
 
 	/** Execlists no. of times this request has been sent to the ELSP */
 	int elsp_submitted;
-
 };
 
 struct drm_i915_gem_request * __must_check
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e9db56c7a31f..18e747132ce5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1413,6 +1413,11 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	list_del_init(&request->list);
 	i915_gem_request_remove_from_client(request);
 
+	if (request->prev_ctx) {
+		intel_lr_context_unpin(request->prev_ctx, request->engine);
+		request->prev_ctx = NULL;
+	}
+
 	i915_gem_request_unreference(request);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2f49dfae5560..73be9eac8ecc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -791,6 +791,21 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	if (intel_engine_stopped(engine))
 		return 0;
 
+	/*
+	 * Pin the previous context on this engine to ensure it is not
+	 * prematurely unpinned in GuC mode.
+	 * Previous context will be unpinned when this request is retired,
+	 * ensuring the GPU has switched out from the previous context and
+	 * into a new context at that point.
+	 */
+	if (dev_priv->guc.execbuf_client && engine->last_context) {
+		intel_lr_context_pin(engine->last_context, engine);
+		request->prev_ctx = engine->last_context;
+	}
+
+	/*
+	 * Track and pin the last context submitted on an engine.
+	 */
 	if (engine->last_context != request->ctx) {
 		if (engine->last_context)
 			intel_lr_context_unpin(engine->last_context, engine);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/3] DO NOT MERGE: drm/i915: Enable GuC submission
  2016-04-15 11:54 [PATCH 0/3] GuC premature LRC unpin Tvrtko Ursulin
  2016-04-15 11:54 ` [PATCH 1/3] drm/i915: Refactor execlists default context pinning Tvrtko Ursulin
  2016-04-15 11:54 ` [PATCH 2/3] drm/i915/guc: Keep the previous context pinned until the next one has been completed Tvrtko Ursulin
@ 2016-04-15 11:54 ` Tvrtko Ursulin
  2016-04-15 15:24 ` ✗ Fi.CI.BAT: warning for GuC premature LRC unpin Patchwork
  2016-04-19  6:49 ` Premature " Chris Wilson
  4 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-15 11:54 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_params.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 1779f02e6df8..128506347134 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -54,7 +54,7 @@ struct i915_params i915 __read_mostly = {
 	.verbose_state_checks = 1,
 	.nuclear_pageflip = 0,
 	.edp_vswing = 0,
-	.enable_guc_submission = false,
+	.enable_guc_submission = true,
 	.guc_log_level = -1,
 	.enable_dp_mst = true,
 	.inject_load_failure = 0,
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/3] drm/i915/guc: Keep the previous context pinned until the next one has been completed
  2016-04-15 11:54 ` [PATCH 2/3] drm/i915/guc: Keep the previous context pinned until the next one has been completed Tvrtko Ursulin
@ 2016-04-15 12:12   ` Chris Wilson
  2016-04-15 13:16     ` Tvrtko Ursulin
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-15 12:12 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Fri, Apr 15, 2016 at 12:54:34PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> In GuC mode submitting requests is only putting them into the
> GuC queue with the actual submission to hardware following at
> some future point. This makes the per engine last context
> tracking insufficient for closing the premature context
> unpin race.
> 
> Instead we need to make requests track and pin the previous
> context on the same engine, and only unpin them when they
> themselves are retired. This will ensure contexts remain pinned
> in all cases until the are fully saved by the GPU.
> 
> v2: Only track contexts.
> v3: Limit to GuC for now.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Alex Dai <yu.dai@intel.com>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Cc: Nick Hoath <nicholas.hoath@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  4 +++-
>  drivers/gpu/drm/i915/i915_gem.c  |  5 +++++
>  drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++++++
>  3 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b9ed1b305beb..80f2d959ed7b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2290,6 +2290,9 @@ struct drm_i915_gem_request {
>  	struct intel_context *ctx;
>  	struct intel_ringbuffer *ringbuf;
>  
> +	/** Context preceding this one on the same engine. Can be NULL. */
> +	struct intel_context *prev_ctx;
> +
>  	/** Batch buffer related to this request if any (used for
>  	    error state dump only) */
>  	struct drm_i915_gem_object *batch_obj;
> @@ -2325,7 +2328,6 @@ struct drm_i915_gem_request {
>  
>  	/** Execlists no. of times this request has been sent to the ELSP */
>  	int elsp_submitted;
> -
>  };
>  
>  struct drm_i915_gem_request * __must_check
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e9db56c7a31f..18e747132ce5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1413,6 +1413,11 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>  	list_del_init(&request->list);
>  	i915_gem_request_remove_from_client(request);
>  
> +	if (request->prev_ctx) {
> +		intel_lr_context_unpin(request->prev_ctx, request->engine);
> +		request->prev_ctx = NULL;
> +	}
> +
>  	i915_gem_request_unreference(request);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 2f49dfae5560..73be9eac8ecc 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -791,6 +791,21 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>  	if (intel_engine_stopped(engine))
>  		return 0;
>  
> +	/*
> +	 * Pin the previous context on this engine to ensure it is not
> +	 * prematurely unpinned in GuC mode.
> +	 * Previous context will be unpinned when this request is retired,
> +	 * ensuring the GPU has switched out from the previous context and
> +	 * into a new context at that point.
> +	 */
> +	if (dev_priv->guc.execbuf_client && engine->last_context) {
> +		intel_lr_context_pin(engine->last_context, engine);
> +		request->prev_ctx = engine->last_context;
> +	}

Nak.

We just want,

request->pinned_ctx = engine->last_context;
engine->last_context = request->ctx;

as that works for everyone (and replaces the current last_context dance)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] drm/i915: Refactor execlists default context pinning
  2016-04-15 11:54 ` [PATCH 1/3] drm/i915: Refactor execlists default context pinning Tvrtko Ursulin
@ 2016-04-15 12:16   ` Chris Wilson
  2016-04-15 13:21     ` Tvrtko Ursulin
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-15 12:16 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Fri, Apr 15, 2016 at 12:54:33PM +0100, Tvrtko Ursulin wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Refactor pinning and unpinning of contexts, such that the default
> context for an engine is pinned during initialisation and unpinned
> during teardown (pinning of the context handles the reference counting).
> Thus we can eliminate the special case handling of the default context
> that was required to mask that it was not being pinned normally.
> 
> v2:
>   * Rebase on nightly;
>   * put back pin in execlists_context_queue. (Tvrtko Ursulin)

No, we don't want to pin in execlists_context_queue!

We pin to acquire the request and then want to keep that pin for as long
as possible. For execlists, this means only dropping it when the
hardware is finished (which will be in the following request's
retirement).

Since we can handle an error during queue, the pin has to be upfront.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/3] drm/i915/guc: Keep the previous context pinned until the next one has been completed
  2016-04-15 12:12   ` Chris Wilson
@ 2016-04-15 13:16     ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-15 13:16 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx, Tvrtko Ursulin, Alex Dai, Dave Gordon,
	Nick Hoath


On 15/04/16 13:12, Chris Wilson wrote:
> On Fri, Apr 15, 2016 at 12:54:34PM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> In GuC mode submitting requests is only putting them into the
>> GuC queue with the actual submission to hardware following at
>> some future point. This makes the per engine last context
>> tracking insufficient for closing the premature context
>> unpin race.
>>
>> Instead we need to make requests track and pin the previous
>> context on the same engine, and only unpin them when they
>> themselves are retired. This will ensure contexts remain pinned
>> in all cases until the are fully saved by the GPU.
>>
>> v2: Only track contexts.
>> v3: Limit to GuC for now.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Alex Dai <yu.dai@intel.com>
>> Cc: Dave Gordon <david.s.gordon@intel.com>
>> Cc: Nick Hoath <nicholas.hoath@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h  |  4 +++-
>>   drivers/gpu/drm/i915/i915_gem.c  |  5 +++++
>>   drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++++++
>>   3 files changed, 23 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index b9ed1b305beb..80f2d959ed7b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2290,6 +2290,9 @@ struct drm_i915_gem_request {
>>   	struct intel_context *ctx;
>>   	struct intel_ringbuffer *ringbuf;
>>
>> +	/** Context preceding this one on the same engine. Can be NULL. */
>> +	struct intel_context *prev_ctx;
>> +
>>   	/** Batch buffer related to this request if any (used for
>>   	    error state dump only) */
>>   	struct drm_i915_gem_object *batch_obj;
>> @@ -2325,7 +2328,6 @@ struct drm_i915_gem_request {
>>
>>   	/** Execlists no. of times this request has been sent to the ELSP */
>>   	int elsp_submitted;
>> -
>>   };
>>
>>   struct drm_i915_gem_request * __must_check
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index e9db56c7a31f..18e747132ce5 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -1413,6 +1413,11 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>>   	list_del_init(&request->list);
>>   	i915_gem_request_remove_from_client(request);
>>
>> +	if (request->prev_ctx) {
>> +		intel_lr_context_unpin(request->prev_ctx, request->engine);
>> +		request->prev_ctx = NULL;
>> +	}
>> +
>>   	i915_gem_request_unreference(request);
>>   }
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index 2f49dfae5560..73be9eac8ecc 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -791,6 +791,21 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>>   	if (intel_engine_stopped(engine))
>>   		return 0;
>>
>> +	/*
>> +	 * Pin the previous context on this engine to ensure it is not
>> +	 * prematurely unpinned in GuC mode.
>> +	 * Previous context will be unpinned when this request is retired,
>> +	 * ensuring the GPU has switched out from the previous context and
>> +	 * into a new context at that point.
>> +	 */
>> +	if (dev_priv->guc.execbuf_client && engine->last_context) {
>> +		intel_lr_context_pin(engine->last_context, engine);
>> +		request->prev_ctx = engine->last_context;
>> +	}
>
> Nak.
>
> We just want,
>
> request->pinned_ctx = engine->last_context;
> engine->last_context = request->ctx;
>
> as that works for everyone (and replaces the current last_context dance)

Not sure if you are against the implementation or the idea.

It is deliberately limited to GuC for now, until the lazy ringbuffer 
unpin happens for all platforms.

Then - engine needs to hold the pin on the last context in case there 
are no subsequent requests to keep it pinned.

In your snippet where do you pin it before assigning to any object?

I could optimize my implementation by not doing the pin-unpin in the 
case when the context changes between current and last, but other than 
that it looks correct to me.

And I don't think pinned_ctx is a good name. Previous context is 
descriptive and self documenting so wins easily IMHO.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] drm/i915: Refactor execlists default context pinning
  2016-04-15 12:16   ` Chris Wilson
@ 2016-04-15 13:21     ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-15 13:21 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx, Tvrtko Ursulin


On 15/04/16 13:16, Chris Wilson wrote:
> On Fri, Apr 15, 2016 at 12:54:33PM +0100, Tvrtko Ursulin wrote:
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Refactor pinning and unpinning of contexts, such that the default
>> context for an engine is pinned during initialisation and unpinned
>> during teardown (pinning of the context handles the reference counting).
>> Thus we can eliminate the special case handling of the default context
>> that was required to mask that it was not being pinned normally.
>>
>> v2:
>>    * Rebase on nightly;
>>    * put back pin in execlists_context_queue. (Tvrtko Ursulin)
>
> No, we don't want to pin in execlists_context_queue!
>
> We pin to acquire the request and then want to keep that pin for as long
> as possible. For execlists, this means only dropping it when the
> hardware is finished (which will be in the following request's
> retirement).
>
> Since we can handle an error during queue, the pin has to be upfront.

As longs as we unpin in both request free _and_ retire worker 
(intel_execlists_retire_requests) I don't see how pinning only in 
alloc_extras work.

So it looked to me your patch was unbalanced in that respect.

I have patches under the "eliminate the execlists retired queue" 
umbrella which will do what you are saying but that has some more 
prerequisites and we agreed to handle the two series separately.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* ✗ Fi.CI.BAT: warning for GuC premature LRC unpin
  2016-04-15 11:54 [PATCH 0/3] GuC premature LRC unpin Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2016-04-15 11:54 ` [PATCH 3/3] DO NOT MERGE: drm/i915: Enable GuC submission Tvrtko Ursulin
@ 2016-04-15 15:24 ` Patchwork
  2016-04-19  6:49 ` Premature " Chris Wilson
  4 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2016-04-15 15:24 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: GuC premature LRC unpin
URL   : https://patchwork.freedesktop.org/series/5776/
State : warning

== Summary ==

Series 5776v1 GuC premature LRC unpin
http://patchwork.freedesktop.org/api/1.0/series/5776/revisions/1/mbox/

Test gem_busy:
        Subgroup basic-blt:
                skip       -> PASS       (bsw-nuc-2)
Test gem_ctx_switch:
        Subgroup basic-default:
                pass       -> DMESG-WARN (skl-nuci5)
                pass       -> DMESG-WARN (skl-i7k-2)
Test kms_force_connector_basic:
        Subgroup force-load-detect:
                skip       -> PASS       (ivb-t430s)

bdw-nuci7        total:203  pass:191  dwarn:0   dfail:0   fail:0   skip:12 
bdw-ultra        total:203  pass:180  dwarn:0   dfail:0   fail:0   skip:23 
bsw-nuc-2        total:202  pass:163  dwarn:0   dfail:0   fail:0   skip:39 
byt-nuc          total:202  pass:164  dwarn:0   dfail:0   fail:0   skip:38 
hsw-brixbox      total:203  pass:179  dwarn:0   dfail:0   fail:0   skip:24 
hsw-gt2          total:203  pass:184  dwarn:0   dfail:0   fail:0   skip:19 
ilk-hp8440p      total:203  pass:135  dwarn:0   dfail:0   fail:0   skip:68 
ivb-t430s        total:203  pass:175  dwarn:0   dfail:0   fail:0   skip:28 
skl-i7k-2        total:203  pass:177  dwarn:1   dfail:0   fail:0   skip:25 
skl-nuci5        total:203  pass:191  dwarn:1   dfail:0   fail:0   skip:11 
snb-dellxps      total:203  pass:165  dwarn:0   dfail:0   fail:0   skip:38 
snb-x220t        total:203  pass:165  dwarn:0   dfail:0   fail:1   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_1916/

d9131d62d18ba94fb3ca019f1156c22b5f4ce23c drm-intel-nightly: 2016y-04m-15d-13h-53m-44s UTC integration manifest
cc8b07d DO NOT MERGE: drm/i915: Enable GuC submission
20d4ba8 drm/i915/guc: Keep the previous context pinned until the next one has been completed
9b9448f drm/i915: Refactor execlists default context pinning

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Premature LRC unpin
  2016-04-15 11:54 [PATCH 0/3] GuC premature LRC unpin Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  2016-04-15 15:24 ` ✗ Fi.CI.BAT: warning for GuC premature LRC unpin Patchwork
@ 2016-04-19  6:49 ` Chris Wilson
  2016-04-19  6:49   ` [PATCH 1/9] drm/i915: Assign every HW context a unique ID Chris Wilson
                     ` (8 more replies)
  4 siblings, 9 replies; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

So, I spent some time reorganising the patches I had for this into a
usable state and found that we needed some more work to keep Braswell
happy with the earlier unpinning. In particular, it needs a stable ctx
descriptor id to keep igt/gem_ctx_switch from quicking hanging.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 1/9] drm/i915: Assign every HW context a unique ID
  2016-04-19  6:49 ` Premature " Chris Wilson
@ 2016-04-19  6:49   ` Chris Wilson
  2016-04-19  8:57     ` Tvrtko Ursulin
  2016-04-19  6:49   ` [PATCH 2/9] drm/i915: Replace the pinned context address with its " Chris Wilson
                     ` (7 subsequent siblings)
  8 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

The hardware tracks contexts and expects all live contexts (those active
on the hardware) to have a unique identifier. This is used by the
hardware to assign pagefaults and the like to a particular context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
 drivers/gpu/drm/i915/i915_drv.h         |  4 ++++
 drivers/gpu/drm/i915/i915_gem_context.c | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ffc4734cb2dc..cacbe45b7938 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2014,7 +2014,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		    ctx->legacy_hw_ctx.rcs_state == NULL)
 			continue;
 
-		seq_puts(m, "HW context ");
+		seq_printf(m, "HW context %d ", ctx->hw_id);
 		describe_ctx(m, ctx);
 		if (ctx == dev_priv->kernel_context)
 			seq_printf(m, "(kernel context) ");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dc3bb5c10957..368fd7090189 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -851,6 +851,7 @@ struct intel_context {
 	struct i915_ctx_hang_stats hang_stats;
 	struct i915_hw_ppgtt *ppgtt;
 
+	unsigned hw_id;
 	unsigned flags;
 #define CONTEXT_NO_ZEROMAP		(1<<0)
 #define CONTEXT_NO_ERROR_CAPTURE	(1<<1)
@@ -1810,6 +1811,9 @@ struct drm_i915_private {
 	DECLARE_HASHTABLE(mm_structs, 7);
 	struct mutex mm_lock;
 
+	struct ida context_hw_ida;
+#define MAX_CONTEXT_HW_ID (1<<20)
+
 	/* Kernel Modesetting */
 
 	struct drm_crtc *plane_to_crtc_mapping[I915_MAX_PIPES];
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d7d9763a9818..db53d811370d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -169,6 +169,8 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	if (ctx->legacy_hw_ctx.rcs_state)
 		drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base);
 	list_del(&ctx->link);
+
+	ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
 	kfree(ctx);
 }
 
@@ -209,6 +211,28 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
 	return obj;
 }
 
+static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
+{
+	int ret;
+
+	ret = ida_simple_get(&dev_priv->context_hw_ida,
+			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
+	if (ret < 0) {
+		/* Contexts are only released when no longer active.
+		 * Flush any pending retires to hopefully release some
+		 * stale contexts and try again.
+		 */
+		i915_gem_retire_requests(dev_priv->dev);
+		ret = ida_simple_get(&dev_priv->context_hw_ida,
+				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	*out = ret;
+	return 0;
+}
+
 static struct intel_context *
 __create_hw_context(struct drm_device *dev,
 		    struct drm_i915_file_private *file_priv)
@@ -225,6 +249,12 @@ __create_hw_context(struct drm_device *dev,
 	list_add_tail(&ctx->link, &dev_priv->context_list);
 	ctx->i915 = dev_priv;
 
+	ret = assign_hw_id(dev_priv, &ctx->hw_id);
+	if (ret) {
+		kfree(ctx);
+		return ERR_PTR(ret);
+	}
+
 	if (dev_priv->hw_context_size) {
 		struct drm_i915_gem_object *obj =
 				i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
@@ -375,6 +405,8 @@ int i915_gem_context_init(struct drm_device *dev)
 		}
 	}
 
+	ida_init(&dev_priv->context_hw_ida);
+
 	if (i915.enable_execlists) {
 		/* NB: intentionally left blank. We will allocate our own
 		 * backing objects as we need them, thank you very much */
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/9] drm/i915: Replace the pinned context address with its unique ID
  2016-04-19  6:49 ` Premature " Chris Wilson
  2016-04-19  6:49   ` [PATCH 1/9] drm/i915: Assign every HW context a unique ID Chris Wilson
@ 2016-04-19  6:49   ` Chris Wilson
  2016-04-19  9:03     ` Tvrtko Ursulin
  2016-04-19  6:49   ` [PATCH 3/9] drm/i915: Refactor execlists default context pinning Chris Wilson
                     ` (6 subsequent siblings)
  8 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

Rather than reuse the current location of the context in the global GTT,
use its unique ID assigned to it for the lifetime of that context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 12 +++++-------
 drivers/gpu/drm/i915/intel_lrc.c    | 36 ++++++------------------------------
 drivers/gpu/drm/i915/intel_lrc.h    |  3 ---
 3 files changed, 11 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cacbe45b7938..bd409bfa6e61 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2056,15 +2056,13 @@ static void i915_dump_lrc_obj(struct seq_file *m,
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
 	unsigned long ggtt_offset = 0;
 
+	seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id);
+
 	if (ctx_obj == NULL) {
-		seq_printf(m, "Context on %s with no gem object\n",
-			   engine->name);
+		seq_printf(m, "\tNot allocated\n");
 		return;
 	}
 
-	seq_printf(m, "CONTEXT: %s %u\n", engine->name,
-		   intel_execlists_ctx_id(ctx, engine));
-
 	if (!i915_gem_obj_ggtt_bound(ctx_obj))
 		seq_puts(m, "\tNot bound in GGTT\n");
 	else
@@ -2183,8 +2181,8 @@ static int i915_execlists(struct seq_file *m, void *data)
 
 		seq_printf(m, "\t%d requests in queue\n", count);
 		if (head_req) {
-			seq_printf(m, "\tHead request id: %u\n",
-				   intel_execlists_ctx_id(head_req->ctx, engine));
+			seq_printf(m, "\tHead request context: %u\n",
+				   head_req->ctx->hw_id);
 			seq_printf(m, "\tHead request tail: %u\n",
 				   head_req->tail);
 		}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 72c72bd97568..cec112449b31 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -313,14 +313,12 @@ static void
 intel_lr_context_descriptor_update(struct intel_context *ctx,
 				   struct intel_engine_cs *engine)
 {
-	uint64_t lrca, desc;
+	u64 desc;
 
-	lrca = ctx->engine[engine->id].lrc_vma->node.start +
-	       LRC_PPHWSP_PN * PAGE_SIZE;
-
-	desc = engine->ctx_desc_template;			   /* bits  0-11 */
-	desc |= lrca;					   /* bits 12-31 */
-	desc |= (lrca >> PAGE_SHIFT) << GEN8_CTX_ID_SHIFT; /* bits 32-51 */
+	desc = engine->ctx_desc_template; /* bits  0-11 */
+	desc |= ctx->engine[engine->id].lrc_vma->node.start +
+	       LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */
+	desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-51 */
 
 	ctx->engine[engine->id].lrc_desc = desc;
 }
@@ -331,28 +329,6 @@ uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
 	return ctx->engine[engine->id].lrc_desc;
 }
 
-/**
- * intel_execlists_ctx_id() - get the Execlists Context ID
- * @ctx: Context to get the ID for
- * @ring: Engine to get the ID for
- *
- * Do not confuse with ctx->id! Unfortunately we have a name overload
- * here: the old context ID we pass to userspace as a handler so that
- * they can refer to a context, and the new context ID we pass to the
- * ELSP so that the GPU can inform us of the context status via
- * interrupts.
- *
- * The context ID is a portion of the context descriptor, so we can
- * just extract the required part from the cached descriptor.
- *
- * Return: 20-bits globally unique context ID.
- */
-u32 intel_execlists_ctx_id(struct intel_context *ctx,
-			   struct intel_engine_cs *engine)
-{
-	return intel_lr_context_descriptor(ctx, engine) >> GEN8_CTX_ID_SHIFT;
-}
-
 static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 				 struct drm_i915_gem_request *rq1)
 {
@@ -496,7 +472,7 @@ execlists_check_remove_request(struct intel_engine_cs *engine, u32 request_id)
 	if (!head_req)
 		return 0;
 
-	if (unlikely(intel_execlists_ctx_id(head_req->ctx, engine) != request_id))
+	if (unlikely(head_req->ctx->hw_id != request_id))
 		return 0;
 
 	WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 461f1ef9b5c1..b17ab79333aa 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -114,9 +114,6 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv,
 uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
 				     struct intel_engine_cs *engine);
 
-u32 intel_execlists_ctx_id(struct intel_context *ctx,
-			   struct intel_engine_cs *engine);
-
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
 struct i915_execbuffer_params;
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/9] drm/i915: Refactor execlists default context pinning
  2016-04-19  6:49 ` Premature " Chris Wilson
  2016-04-19  6:49   ` [PATCH 1/9] drm/i915: Assign every HW context a unique ID Chris Wilson
  2016-04-19  6:49   ` [PATCH 2/9] drm/i915: Replace the pinned context address with its " Chris Wilson
@ 2016-04-19  6:49   ` Chris Wilson
  2016-04-19  9:16     ` Tvrtko Ursulin
  2016-04-19  6:49   ` [PATCH 4/9] drm/i915: Remove early l3-remap Chris Wilson
                     ` (5 subsequent siblings)
  8 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

Refactor pinning and unpinning of contexts, such that the default
context for an engine is pinned during initialisation and unpinned
during teardown (pinning of the context handles the reference counting).
Thus we can eliminate the special case handling of the default context
that was required to mask that it was not being pinned normally.

v2: Rebalance context_queue after rebasing.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   5 +-
 drivers/gpu/drm/i915/i915_gem_request.c |   6 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 103 ++++++++++++--------------------
 3 files changed, 41 insertions(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index bd409bfa6e61..a07c3478f0d8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2108,9 +2108,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 		return ret;
 
 	list_for_each_entry(ctx, &dev_priv->context_list, link)
-		if (ctx != dev_priv->kernel_context)
-			for_each_engine(engine, dev_priv)
-				i915_dump_lrc_obj(m, ctx, engine);
+		for_each_engine(engine, dev_priv)
+			i915_dump_lrc_obj(m, ctx, engine);
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a6f3e6e53b3b..33aacf1725dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -644,10 +644,8 @@ void i915_gem_request_free(struct kref *req_ref)
 		i915_gem_request_remove_from_client(req);
 
 	if (ctx) {
-		if (i915.enable_execlists) {
-			if (ctx != to_i915(req)->kernel_context)
-				intel_lr_context_unpin(ctx, req->engine);
-		}
+		if (i915.enable_execlists)
+			intel_lr_context_unpin(ctx, req->engine);
 
 		i915_gem_context_unreference(ctx);
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cec112449b31..79ac6f06a502 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -585,9 +585,7 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
 	struct drm_i915_gem_request *cursor;
 	int num_elements = 0;
 
-	if (request->ctx != to_i915(request)->kernel_context)
-		intel_lr_context_pin(request->ctx, engine);
-
+	intel_lr_context_pin(request->ctx, request->engine);
 	i915_gem_request_reference(request);
 
 	spin_lock_bh(&engine->execlist_lock);
@@ -688,10 +686,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 			return ret;
 	}
 
-	if (request->ctx != to_i915(request)->kernel_context)
-		ret = intel_lr_context_pin(request->ctx, request->engine);
-
-	return ret;
+	return intel_lr_context_pin(request->ctx, request->engine);
 }
 
 static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
@@ -768,12 +763,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	if (engine->last_context != request->ctx) {
 		if (engine->last_context)
 			intel_lr_context_unpin(engine->last_context, engine);
-		if (request->ctx != to_i915(request)->kernel_context) {
-			intel_lr_context_pin(request->ctx, engine);
-			engine->last_context = request->ctx;
-		} else {
-			engine->last_context = NULL;
-		}
+		intel_lr_context_pin(request->ctx, engine);
+		engine->last_context = request->ctx;
 	}
 
 	if (dev_priv->guc.execbuf_client)
@@ -996,12 +987,7 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
 	spin_unlock_bh(&engine->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		struct intel_context *ctx = req->ctx;
-		struct drm_i915_gem_object *ctx_obj =
-				ctx->engine[engine->id].state;
-
-		if (ctx_obj && (ctx != to_i915(req)->kernel_context))
-			intel_lr_context_unpin(ctx, engine);
+		intel_lr_context_unpin(req->ctx, engine);
 
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
@@ -1046,22 +1032,25 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-static int intel_lr_context_do_pin(struct intel_context *ctx,
-				   struct intel_engine_cs *engine)
+static int intel_lr_context_pin(struct intel_context *ctx,
+				struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = ctx->i915;
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
-	struct intel_ringbuffer *ringbuf = ctx->engine[engine->id].ringbuf;
+	struct drm_i915_gem_object *ctx_obj;
+	struct intel_ringbuffer *ringbuf;
 	void *vaddr;
-	u32 *lrc_reg_state;
+	uint32_t *lrc_reg_state;
 	int ret;
 
-	WARN_ON(!mutex_is_locked(&engine->dev->struct_mutex));
+	lockdep_assert_held(&ctx->i915->dev->struct_mutex);
+	if (ctx->engine[engine->id].pin_count++)
+		return 0;
 
+	ctx_obj = ctx->engine[engine->id].state;
 	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
 			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
 	if (ret)
-		return ret;
+		goto err;
 
 	vaddr = i915_gem_object_pin_map(ctx_obj);
 	if (IS_ERR(vaddr)) {
@@ -1071,10 +1060,12 @@ static int intel_lr_context_do_pin(struct intel_context *ctx,
 
 	lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 
+	ringbuf = ctx->engine[engine->id].ringbuf;
 	ret = intel_pin_and_map_ringbuffer_obj(engine->dev, ringbuf);
 	if (ret)
 		goto unpin_map;
 
+	i915_gem_context_reference(ctx);
 	ctx->engine[engine->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
 	intel_lr_context_descriptor_update(ctx, engine);
 	lrc_reg_state[CTX_RING_BUFFER_START+1] = ringbuf->vma->node.start;
@@ -1085,31 +1076,13 @@ static int intel_lr_context_do_pin(struct intel_context *ctx,
 	if (i915.enable_guc_submission)
 		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 
-	return ret;
+	return 0;
 
 unpin_map:
 	i915_gem_object_unpin_map(ctx_obj);
 unpin_ctx_obj:
 	i915_gem_object_ggtt_unpin(ctx_obj);
-
-	return ret;
-}
-
-static int intel_lr_context_pin(struct intel_context *ctx,
-				struct intel_engine_cs *engine)
-{
-	int ret = 0;
-
-	if (ctx->engine[engine->id].pin_count++ == 0) {
-		ret = intel_lr_context_do_pin(ctx, engine);
-		if (ret)
-			goto reset_pin_count;
-
-		i915_gem_context_reference(ctx);
-	}
-	return ret;
-
-reset_pin_count:
+err:
 	ctx->engine[engine->id].pin_count = 0;
 	return ret;
 }
@@ -1119,17 +1092,21 @@ void intel_lr_context_unpin(struct intel_context *ctx,
 {
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
 
-	WARN_ON(!mutex_is_locked(&ctx->i915->dev->struct_mutex));
-	if (--ctx->engine[engine->id].pin_count == 0) {
-		i915_gem_object_unpin_map(ctx_obj);
-		intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
-		i915_gem_object_ggtt_unpin(ctx_obj);
-		ctx->engine[engine->id].lrc_vma = NULL;
-		ctx->engine[engine->id].lrc_desc = 0;
-		ctx->engine[engine->id].lrc_reg_state = NULL;
+	lockdep_assert_held(&ctx->i915->dev->struct_mutex);
+	GEM_BUG_ON(ctx->engine[engine->id].pin_count == 0);
+	if (--ctx->engine[engine->id].pin_count)
+		return;
 
-		i915_gem_context_unreference(ctx);
-	}
+	i915_gem_object_unpin_map(ctx_obj);
+	intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
+
+	i915_gem_object_ggtt_unpin(ctx_obj);
+
+	ctx->engine[engine->id].lrc_vma = NULL;
+	ctx->engine[engine->id].lrc_desc = 0;
+	ctx->engine[engine->id].lrc_reg_state = NULL;
+
+	i915_gem_context_unreference(ctx);
 }
 
 static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
@@ -1982,6 +1959,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 		i915_gem_object_unpin_map(engine->status_page.obj);
 		engine->status_page.obj = NULL;
 	}
+	intel_lr_context_unpin(engine->i915->kernel_context, engine);
 
 	engine->idle_lite_restore_wa = 0;
 	engine->disable_lite_restore_wa = false;
@@ -2104,11 +2082,10 @@ logical_ring_init(struct intel_engine_cs *engine)
 		goto error;
 
 	/* As this is the default context, always pin it */
-	ret = intel_lr_context_do_pin(dctx, engine);
+	ret = intel_lr_context_pin(dctx, engine);
 	if (ret) {
-		DRM_ERROR(
-			"Failed to pin and map ringbuffer %s: %d\n",
-			engine->name, ret);
+		DRM_ERROR("Failed to pin context for %s: %d\n",
+			  engine->name, ret);
 		goto error;
 	}
 
@@ -2505,12 +2482,6 @@ void intel_lr_context_free(struct intel_context *ctx)
 		if (!ctx_obj)
 			continue;
 
-		if (ctx == ctx->i915->kernel_context) {
-			intel_unpin_ringbuffer_obj(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
-			i915_gem_object_unpin_map(ctx_obj);
-		}
-
 		WARN_ON(ctx->engine[i].pin_count);
 		intel_ringbuffer_free(ringbuf);
 		drm_gem_object_unreference(&ctx_obj->base);
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 4/9] drm/i915: Remove early l3-remap
  2016-04-19  6:49 ` Premature " Chris Wilson
                     ` (2 preceding siblings ...)
  2016-04-19  6:49   ` [PATCH 3/9] drm/i915: Refactor execlists default context pinning Chris Wilson
@ 2016-04-19  6:49   ` Chris Wilson
  2016-04-19  9:41     ` Tvrtko Ursulin
  2016-04-19  6:49   ` [PATCH 5/9] drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use Chris Wilson
                     ` (4 subsequent siblings)
  8 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

Since we do the l3-remap on context switch, and proceed to do a context
switch immediately after manually doing the l3-remap, we can remove the
redundant manual call.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 -
 drivers/gpu/drm/i915/i915_gem.c         | 39 +--------------------------------
 drivers/gpu/drm/i915/i915_gem_context.c | 31 +++++++++++++++++++++++++-
 3 files changed, 31 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 368fd7090189..4c55f480f60b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2983,7 +2983,6 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_init(struct drm_device *dev);
 int i915_gem_init_engines(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
-int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c9d7dfc8f80c..fd9a36badb45 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4077,35 +4077,6 @@ err:
 	return ret;
 }
 
-int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
-{
-	struct intel_engine_cs *engine = req->engine;
-	u32 *remap_info = to_i915(req)->l3_parity.remap_info[slice];
-	int i, ret;
-
-	if (!HAS_L3_DPF(req) || !remap_info)
-		return 0;
-
-	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
-	if (ret)
-		return ret;
-
-	/*
-	 * Note: We do not worry about the concurrent register cacheline hang
-	 * here because no other code should access these registers other than
-	 * at initialization time.
-	 */
-	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
-		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
-		intel_ring_emit(engine, remap_info[i]);
-	}
-
-	intel_ring_advance(engine);
-
-	return ret;
-}
-
 void i915_gem_init_swizzling(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4210,7 +4181,7 @@ i915_gem_init_hw(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *engine;
-	int ret, j;
+	int ret;
 
 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 		return -EIO;
@@ -4284,14 +4255,6 @@ i915_gem_init_hw(struct drm_device *dev)
 			break;
 		}
 
-		if (engine->id == RCS) {
-			for (j = 0; j < NUM_L3_SLICES(dev); j++) {
-				ret = i915_gem_l3_remap(req, j);
-				if (ret)
-					goto err_request;
-			}
-		}
-
 		ret = i915_ppgtt_init_ring(req);
 		if (ret)
 			goto err_request;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index db53d811370d..6870556a180b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -691,6 +691,35 @@ needs_pd_load_post(struct intel_context *to, u32 hw_flags)
 	return false;
 }
 
+static int remap_l3(struct drm_i915_gem_request *req, int slice)
+{
+	u32 *remap_info = to_i915(req)->l3_parity.remap_info[slice];
+	struct intel_engine_cs *engine = req->engine;
+	int i, ret;
+
+	if (!remap_info)
+		return 0;
+
+	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
+	if (ret)
+		return ret;
+
+	/*
+	 * Note: We do not worry about the concurrent register cacheline hang
+	 * here because no other code should access these registers other than
+	 * at initialization time.
+	 */
+	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
+	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
+		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
+		intel_ring_emit(engine, remap_info[i]);
+	}
+	intel_ring_emit(engine, MI_NOOP);
+	intel_ring_advance(engine);
+
+	return 0;
+}
+
 static int do_rcs_switch(struct drm_i915_gem_request *req)
 {
 	struct intel_context *to = req->ctx;
@@ -810,7 +839,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		if (!(to->remap_slice & (1<<i)))
 			continue;
 
-		ret = i915_gem_l3_remap(req, i);
+		ret = remap_l3(req, i);
 		if (ret)
 			return ret;
 
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 5/9] drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use
  2016-04-19  6:49 ` Premature " Chris Wilson
                     ` (3 preceding siblings ...)
  2016-04-19  6:49   ` [PATCH 4/9] drm/i915: Remove early l3-remap Chris Wilson
@ 2016-04-19  6:49   ` Chris Wilson
  2016-04-19  9:52     ` Tvrtko Ursulin
  2016-04-19  6:49   ` [PATCH 6/9] drm/i915: Move context initialisation to first-use Chris Wilson
                     ` (3 subsequent siblings)
  8 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

The code to switch_mm() is already handled by i915_switch_context(), the
only difference required to setup the aliasing ppgtt is that we need to
emit te switch_mm() on the first context, i.e. when transitioning from
engine->last_context == NULL.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 -
 drivers/gpu/drm/i915/i915_gem.c         | 28 ---------------------
 drivers/gpu/drm/i915/i915_gem_context.c | 43 +++++++++------------------------
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 13 ----------
 drivers/gpu/drm/i915/i915_gem_gtt.h     |  1 -
 5 files changed, 12 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4c55f480f60b..77becfd5a09d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3125,7 +3125,6 @@ int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_fini(struct drm_device *dev);
 void i915_gem_context_reset(struct drm_device *dev);
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
-int i915_gem_context_enable(struct drm_i915_gem_request *req);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 int i915_switch_context(struct drm_i915_gem_request *req);
 struct intel_context *
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fd9a36badb45..4057c0febccd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4245,34 +4245,6 @@ i915_gem_init_hw(struct drm_device *dev)
 		}
 	}
 
-	/* Now it is safe to go back round and do everything else: */
-	for_each_engine(engine, dev_priv) {
-		struct drm_i915_gem_request *req;
-
-		req = i915_gem_request_alloc(engine, NULL);
-		if (IS_ERR(req)) {
-			ret = PTR_ERR(req);
-			break;
-		}
-
-		ret = i915_ppgtt_init_ring(req);
-		if (ret)
-			goto err_request;
-
-		ret = i915_gem_context_enable(req);
-		if (ret)
-			goto err_request;
-
-err_request:
-		i915_add_request_no_flush(req);
-		if (ret) {
-			DRM_ERROR("Failed to enable %s, error=%d\n",
-				  engine->name, ret);
-			i915_gem_cleanup_engines(dev);
-			break;
-		}
-	}
-
 out:
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 6870556a180b..cf84138de4ec 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -471,27 +471,6 @@ void i915_gem_context_fini(struct drm_device *dev)
 	dev_priv->kernel_context = NULL;
 }
 
-int i915_gem_context_enable(struct drm_i915_gem_request *req)
-{
-	struct intel_engine_cs *engine = req->engine;
-	int ret;
-
-	if (i915.enable_execlists) {
-		if (engine->init_context == NULL)
-			return 0;
-
-		ret = engine->init_context(req);
-	} else
-		ret = i915_switch_context(req);
-
-	if (ret) {
-		DRM_ERROR("ring init context: %d\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
 static int context_idr_cleanup(int id, void *p, void *data)
 {
 	struct intel_context *ctx = p;
@@ -661,7 +640,7 @@ static bool
 needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to)
 {
 	if (!to->ppgtt)
-		return false;
+		return engine->last_context == NULL;
 
 	if (engine->last_context == to &&
 	    !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
@@ -724,6 +703,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 {
 	struct intel_context *to = req->ctx;
 	struct intel_engine_cs *engine = req->engine;
+	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
 	struct intel_context *from;
 	u32 hw_flags;
 	int ret, i;
@@ -765,7 +745,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		 * Register Immediate commands in Ring Buffer before submitting
 		 * a context."*/
 		trace_switch_mm(engine, to);
-		ret = to->ppgtt->switch_mm(to->ppgtt, req);
+		ret = ppgtt->switch_mm(ppgtt, req);
 		if (ret)
 			goto unpin_out;
 	}
@@ -776,8 +756,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		 * space. This means we must enforce that a page table load
 		 * occur when this occurs. */
 		hw_flags = MI_RESTORE_INHIBIT;
-	else if (to->ppgtt &&
-		 intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)
+	else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
 		hw_flags = MI_FORCE_RESTORE;
 	else
 		hw_flags = 0;
@@ -822,7 +801,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 	 */
 	if (needs_pd_load_post(to, hw_flags)) {
 		trace_switch_mm(engine, to);
-		ret = to->ppgtt->switch_mm(to->ppgtt, req);
+		ret = ppgtt->switch_mm(to->ppgtt, req);
 		/* The hardware context switch is emitted, but we haven't
 		 * actually changed the state - so it's probably safe to bail
 		 * here. Still, let the user know something dangerous has
@@ -832,8 +811,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 			return ret;
 	}
 
-	if (to->ppgtt)
-		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+	if (ppgtt)
+		ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
 
 	for (i = 0; i < MAX_L3_SLICES; i++) {
 		if (!(to->remap_slice & (1<<i)))
@@ -887,15 +866,17 @@ int i915_switch_context(struct drm_i915_gem_request *req)
 		struct intel_context *to = req->ctx;
 
 		if (needs_pd_load_pre(engine, to)) {
+			struct i915_hw_ppgtt *ppgtt;
 			int ret;
 
+			ppgtt = to->ppgtt ?: to_i915(req)->mm.aliasing_ppgtt;
+
 			trace_switch_mm(engine, to);
-			ret = to->ppgtt->switch_mm(to->ppgtt, req);
+			ret = ppgtt->switch_mm(ppgtt, req);
 			if (ret)
 				return ret;
 
-			/* Doing a PD load always reloads the page dirs */
-			to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
 		}
 
 		if (to != engine->last_context) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 780e3ad3ca10..50bdba5cb6d2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2180,19 +2180,6 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
 	return 0;
 }
 
-int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
-{
-	struct i915_hw_ppgtt *ppgtt = to_i915(req)->mm.aliasing_ppgtt;
-
-	if (i915.enable_execlists)
-		return 0;
-
-	if (!ppgtt)
-		return 0;
-
-	return ppgtt->switch_mm(ppgtt, req);
-}
-
 struct i915_hw_ppgtt *
 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index d7dd3d8a8758..333a2fc62b43 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -519,7 +519,6 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev);
 
 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
 int i915_ppgtt_init_hw(struct drm_device *dev);
-int i915_ppgtt_init_ring(struct drm_i915_gem_request *req);
 void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
 					struct drm_i915_file_private *fpriv);
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 6/9] drm/i915: Move context initialisation to first-use
  2016-04-19  6:49 ` Premature " Chris Wilson
                     ` (4 preceding siblings ...)
  2016-04-19  6:49   ` [PATCH 5/9] drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use Chris Wilson
@ 2016-04-19  6:49   ` Chris Wilson
  2016-04-19  9:57     ` Tvrtko Ursulin
  2016-04-19 10:15     ` Tvrtko Ursulin
  2016-04-19  6:49   ` [PATCH 7/9] drm/i915: Move the magical deferred context allocation into the request Chris Wilson
                     ` (2 subsequent siblings)
  8 siblings, 2 replies; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

Instead of allocating a new request when allocating a context, use the
request that initiated the allocation to emit the context
initialisation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/intel_lrc.c | 39 ++++++++++++++++++---------------------
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 77becfd5a09d..b65e3b16e1b0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -870,6 +870,7 @@ struct intel_context {
 		struct i915_vma *lrc_vma;
 		u64 lrc_desc;
 		uint32_t *lrc_reg_state;
+		bool initialised;
 	} engine[I915_NUM_ENGINES];
 
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 79ac6f06a502..c104015813d3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -669,9 +669,10 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
 
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
-	int ret = 0;
+	struct intel_engine_cs *engine = request->engine;
+	int ret;
 
-	request->ringbuf = request->ctx->engine[request->engine->id].ringbuf;
+	request->ringbuf = request->ctx->engine[engine->id].ringbuf;
 
 	if (i915.enable_guc_submission) {
 		/*
@@ -686,7 +687,20 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 			return ret;
 	}
 
-	return intel_lr_context_pin(request->ctx, request->engine);
+	ret = intel_lr_context_pin(request->ctx, engine);
+	if (ret)
+		return ret;
+
+	if (!request->ctx->engine[engine->id].initialised) {
+		ret = engine->init_context(request);
+		if (ret) {
+			intel_lr_context_unpin(request->ctx, engine);
+			return ret;
+		}
+		request->ctx->engine[engine->id].initialised = true;
+	}
+
+	return 0;
 }
 
 static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
@@ -2576,25 +2590,8 @@ int intel_lr_context_deferred_alloc(struct intel_context *ctx,
 
 	ctx->engine[engine->id].ringbuf = ringbuf;
 	ctx->engine[engine->id].state = ctx_obj;
+	ctx->engine[engine->id].initialised = engine->init_context == NULL;
 
-	if (ctx != ctx->i915->kernel_context && engine->init_context) {
-		struct drm_i915_gem_request *req;
-
-		req = i915_gem_request_alloc(engine, ctx);
-		if (IS_ERR(req)) {
-			ret = PTR_ERR(req);
-			DRM_ERROR("ring create req: %d\n", ret);
-			goto error_ringbuf;
-		}
-
-		ret = engine->init_context(req);
-		i915_add_request_no_flush(req);
-		if (ret) {
-			DRM_ERROR("ring init context: %d\n",
-				ret);
-			goto error_ringbuf;
-		}
-	}
 	return 0;
 
 error_ringbuf:
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 7/9] drm/i915: Move the magical deferred context allocation into the request
  2016-04-19  6:49 ` Premature " Chris Wilson
                     ` (5 preceding siblings ...)
  2016-04-19  6:49   ` [PATCH 6/9] drm/i915: Move context initialisation to first-use Chris Wilson
@ 2016-04-19  6:49   ` Chris Wilson
  2016-04-19 10:28     ` Tvrtko Ursulin
  2016-04-19  6:49   ` [PATCH 8/9] drm/i915: Track the previous pinned context inside " Chris Wilson
  2016-04-19  6:49   ` [PATCH 9/9] drm/i915: Move releasing of the GEM request from free to retire/cancel Chris Wilson
  8 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

We can hide more details of execlists from higher level code by removing
the explicit call to create an execlist context into its first use.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 --------
 drivers/gpu/drm/i915/intel_lrc.c           | 21 ++++++++++++++-------
 drivers/gpu/drm/i915/intel_lrc.h           |  2 --
 3 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0d210c383487..3da5978ac0f7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1080,14 +1080,6 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
 		return ERR_PTR(-EIO);
 	}
 
-	if (i915.enable_execlists && !ctx->engine[engine->id].state) {
-		int ret = intel_lr_context_deferred_alloc(ctx, engine);
-		if (ret) {
-			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
-			return ERR_PTR(ret);
-		}
-	}
-
 	return ctx;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c104015813d3..b0d20af38574 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -227,6 +227,8 @@ enum {
 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x17
 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x26
 
+static int execlists_context_deferred_alloc(struct intel_context *ctx,
+					    struct intel_engine_cs *engine);
 static int intel_lr_context_pin(struct intel_context *ctx,
 				struct intel_engine_cs *engine);
 
@@ -672,8 +674,6 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 	struct intel_engine_cs *engine = request->engine;
 	int ret;
 
-	request->ringbuf = request->ctx->engine[engine->id].ringbuf;
-
 	if (i915.enable_guc_submission) {
 		/*
 		 * Check that the GuC has space for the request before
@@ -687,6 +687,14 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 			return ret;
 	}
 
+	if (request->ctx->engine[engine->id].state == NULL) {
+		ret = execlists_context_deferred_alloc(request->ctx, engine);
+		if (ret)
+			return ret;
+	}
+
+	request->ringbuf = request->ctx->engine[engine->id].ringbuf;
+
 	ret = intel_lr_context_pin(request->ctx, engine);
 	if (ret)
 		return ret;
@@ -2091,7 +2099,7 @@ logical_ring_init(struct intel_engine_cs *engine)
 	if (ret)
 		goto error;
 
-	ret = intel_lr_context_deferred_alloc(dctx, engine);
+	ret = execlists_context_deferred_alloc(dctx, engine);
 	if (ret)
 		goto error;
 
@@ -2541,7 +2549,7 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
 }
 
 /**
- * intel_lr_context_deferred_alloc() - create the LRC specific bits of a context
+ * execlists_context_deferred_alloc() - create the LRC specific bits of a context
  * @ctx: LR context to create.
  * @ring: engine to be used with the context.
  *
@@ -2553,9 +2561,8 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
  *
  * Return: non-zero on error.
  */
-
-int intel_lr_context_deferred_alloc(struct intel_context *ctx,
-				    struct intel_engine_cs *engine)
+static int execlists_context_deferred_alloc(struct intel_context *ctx,
+					    struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *ctx_obj;
 	uint32_t context_size;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index b17ab79333aa..8bea937973f6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -102,8 +102,6 @@ static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf,
 
 void intel_lr_context_free(struct intel_context *ctx);
 uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
-int intel_lr_context_deferred_alloc(struct intel_context *ctx,
-				    struct intel_engine_cs *engine);
 void intel_lr_context_unpin(struct intel_context *ctx,
 			    struct intel_engine_cs *engine);
 
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 8/9] drm/i915: Track the previous pinned context inside the request
  2016-04-19  6:49 ` Premature " Chris Wilson
                     ` (6 preceding siblings ...)
  2016-04-19  6:49   ` [PATCH 7/9] drm/i915: Move the magical deferred context allocation into the request Chris Wilson
@ 2016-04-19  6:49   ` Chris Wilson
  2016-04-19 12:02     ` Tvrtko Ursulin
  2016-04-19  6:49   ` [PATCH 9/9] drm/i915: Move releasing of the GEM request from free to retire/cancel Chris Wilson
  8 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

As the contexts are accessed by the hardware until the switch is completed
to a new context, the hardware may still be writing to the context object
after the breadcrumb is visible. We must not unpin/unbind/prune that
object whilst still active and so we keep the previous context pinned until
the following request. If we move this tracking onto the request, we can
simplify the code and enable freeing of the request without the
struct_mutex in subsequent patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c |  8 ++++----
 drivers/gpu/drm/i915/i915_gem_request.h | 11 +++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 12 +++++-------
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 33aacf1725dd..8d7c415f1896 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -643,12 +643,12 @@ void i915_gem_request_free(struct kref *req_ref)
 	if (req->file_priv)
 		i915_gem_request_remove_from_client(req);
 
-	if (ctx) {
+	if (req->pinned_context) {
 		if (i915.enable_execlists)
-			intel_lr_context_unpin(ctx, req->engine);
-
-		i915_gem_context_unreference(ctx);
+			intel_lr_context_unpin(req->pinned_context,
+					       req->engine);
 	}
 
+	i915_gem_context_unreference(ctx);
 	kmem_cache_free(to_i915(req)->requests, req);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 69a4d4e2c97b..389813cbc19a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -85,6 +85,17 @@ struct drm_i915_gem_request {
 	struct intel_context *ctx;
 	struct intel_ringbuffer *ringbuf;
 
+	/**
+	 * Context related to the previous request.
+	 * As the contexts are accessed by the hardware until the switch is
+	 * completed to a new context, the hardware may still be writing
+	 * to the context object after the breadcrumb is visible. We must
+	 * not unpin/unbind/prune that object whilst still active and so
+	 * we keep the previous context pinned until the following (this)
+	 * request is retired.
+	 */
+	struct intel_context *pinned_context;
+
 	/** Batch buffer related to this request if any (used for
 	    error state dump only) */
 	struct drm_i915_gem_object *batch_obj;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b0d20af38574..0e55f206e592 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -708,6 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 		request->ctx->engine[engine->id].initialised = true;
 	}
 
+	request->pinned_context = request->ctx;
 	return 0;
 }
 
@@ -782,12 +783,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
 	intel_logical_ring_advance(ringbuf);
 
-	if (engine->last_context != request->ctx) {
-		if (engine->last_context)
-			intel_lr_context_unpin(engine->last_context, engine);
-		intel_lr_context_pin(request->ctx, engine);
-		engine->last_context = request->ctx;
-	}
+	request->pinned_context = engine->last_context;
+	engine->last_context = request->ctx;
 
 	if (dev_priv->guc.execbuf_client)
 		i915_guc_submit(dev_priv->guc.execbuf_client, request);
@@ -1009,7 +1006,8 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
 	spin_unlock_bh(&engine->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		intel_lr_context_unpin(req->ctx, engine);
+		if (req->pinned_context)
+			intel_lr_context_unpin(req->pinned_context, engine);
 
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 9/9] drm/i915: Move releasing of the GEM request from free to retire/cancel
  2016-04-19  6:49 ` Premature " Chris Wilson
                     ` (7 preceding siblings ...)
  2016-04-19  6:49   ` [PATCH 8/9] drm/i915: Track the previous pinned context inside " Chris Wilson
@ 2016-04-19  6:49   ` Chris Wilson
  2016-04-19 12:16     ` Tvrtko Ursulin
  8 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  6:49 UTC (permalink / raw)
  To: intel-gfx, Tvrtko Ursulin

If we move the release of the GEM request (i.e. decoupling it from the
various lists used for client and context tracking) after it is complete
(either by the GPU retiring the request, or by the caller cancelling the
request), we can remove the requirement that the final unreference of
the GEM request need to be under the struct_mutex.

v2,v3: Rebalance execlists by moving the context unpinning.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c          |  4 ++--
 drivers/gpu/drm/i915/i915_gem_request.c  | 25 ++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_request.h  | 14 --------------
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  2 +-
 drivers/gpu/drm/i915/intel_display.c     |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c         |  4 ----
 drivers/gpu/drm/i915/intel_pm.c          |  2 +-
 7 files changed, 15 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4057c0febccd..1f5434f7a294 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2542,7 +2542,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			ret = __i915_wait_request(req[i], true,
 						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
 						  to_rps_client(file));
-		i915_gem_request_unreference__unlocked(req[i]);
+		i915_gem_request_unreference(req[i]);
 	}
 	return ret;
 
@@ -3548,7 +3548,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 		return 0;
 
 	ret = __i915_wait_request(target, true, NULL, NULL);
-	i915_gem_request_unreference__unlocked(target);
+	i915_gem_request_unreference(target);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8d7c415f1896..c14dca3d8b87 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -250,6 +250,7 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
 	trace_i915_gem_request_retire(request);
+	list_del_init(&request->list);
 
 	/* We know the GPU must have read the request to have
 	 * sent us the seqno + interrupt, so use the position
@@ -261,9 +262,15 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	 */
 	request->ringbuf->last_retired_head = request->postfix;
 
-	list_del_init(&request->list);
 	i915_gem_request_remove_from_client(request);
 
+	if (request->pinned_context) {
+		if (i915.enable_execlists)
+			intel_lr_context_unpin(request->pinned_context,
+					       request->engine);
+	}
+
+	i915_gem_context_unreference(request->ctx);
 	i915_gem_request_unreference(request);
 }
 
@@ -636,19 +643,7 @@ int i915_wait_request(struct drm_i915_gem_request *req)
 
 void i915_gem_request_free(struct kref *req_ref)
 {
-	struct drm_i915_gem_request *req = container_of(req_ref,
-						 typeof(*req), ref);
-	struct intel_context *ctx = req->ctx;
-
-	if (req->file_priv)
-		i915_gem_request_remove_from_client(req);
-
-	if (req->pinned_context) {
-		if (i915.enable_execlists)
-			intel_lr_context_unpin(req->pinned_context,
-					       req->engine);
-	}
-
-	i915_gem_context_unreference(ctx);
+	struct drm_i915_gem_request *req =
+		container_of(req_ref, typeof(*req), ref);
 	kmem_cache_free(to_i915(req)->requests, req);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 389813cbc19a..48bff7dc4f90 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -170,23 +170,9 @@ i915_gem_request_reference(struct drm_i915_gem_request *req)
 static inline void
 i915_gem_request_unreference(struct drm_i915_gem_request *req)
 {
-	WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex));
 	kref_put(&req->ref, i915_gem_request_free);
 }
 
-static inline void
-i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req)
-{
-	struct drm_device *dev;
-
-	if (!req)
-		return;
-
-	dev = req->engine->dev;
-	if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex))
-		mutex_unlock(&dev->struct_mutex);
-}
-
 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 					   struct drm_i915_gem_request *src)
 {
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 79f175853548..d7d19e17318e 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -379,7 +379,7 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			intel_engine_remove_wait(engine, &signal->wait);
 
-			i915_gem_request_unreference__unlocked(signal->request);
+			i915_gem_request_unreference(signal->request);
 
 			/* Find the next oldest signal. Note that as we have
 			 * not been holding the lock, another client may
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index caff656417c2..07dfd55fff91 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11370,7 +11370,7 @@ static void intel_mmio_flip_work_func(struct work_struct *work)
 		WARN_ON(__i915_wait_request(mmio_flip->req,
 					    false, NULL,
 					    &mmio_flip->i915->rps.mmioflips));
-		i915_gem_request_unreference__unlocked(mmio_flip->req);
+		i915_gem_request_unreference(mmio_flip->req);
 	}
 
 	/* For framebuffer backed by dmabuf, wait for fence */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0e55f206e592..0eaa74fab87b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -587,7 +587,6 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
 	struct drm_i915_gem_request *cursor;
 	int num_elements = 0;
 
-	intel_lr_context_pin(request->ctx, request->engine);
 	i915_gem_request_reference(request);
 
 	spin_lock_bh(&engine->execlist_lock);
@@ -1006,9 +1005,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
 	spin_unlock_bh(&engine->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		if (req->pinned_context)
-			intel_lr_context_unpin(req->pinned_context, engine);
-
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
 	}
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b964c448bc03..565b725cd817 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7364,7 +7364,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
 	if (!i915_gem_request_completed(req))
 		gen6_rps_boost(to_i915(req), NULL, req->emitted_jiffies);
 
-	i915_gem_request_unreference__unlocked(req);
+	i915_gem_request_unreference(req);
 	kfree(boost);
 }
 
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/9] drm/i915: Assign every HW context a unique ID
  2016-04-19  6:49   ` [PATCH 1/9] drm/i915: Assign every HW context a unique ID Chris Wilson
@ 2016-04-19  8:57     ` Tvrtko Ursulin
  2016-04-19  9:04       ` Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19  8:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> The hardware tracks contexts and expects all live contexts (those active
> on the hardware) to have a unique identifier. This is used by the
> hardware to assign pagefaults and the like to a particular context.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
>   drivers/gpu/drm/i915/i915_drv.h         |  4 ++++
>   drivers/gpu/drm/i915/i915_gem_context.c | 32 ++++++++++++++++++++++++++++++++
>   3 files changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index ffc4734cb2dc..cacbe45b7938 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2014,7 +2014,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   		    ctx->legacy_hw_ctx.rcs_state == NULL)
>   			continue;
>
> -		seq_puts(m, "HW context ");
> +		seq_printf(m, "HW context %d ", ctx->hw_id);

Should be %u.

>   		describe_ctx(m, ctx);
>   		if (ctx == dev_priv->kernel_context)
>   			seq_printf(m, "(kernel context) ");
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dc3bb5c10957..368fd7090189 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -851,6 +851,7 @@ struct intel_context {
>   	struct i915_ctx_hang_stats hang_stats;
>   	struct i915_hw_ppgtt *ppgtt;
>
> +	unsigned hw_id;
>   	unsigned flags;
>   #define CONTEXT_NO_ZEROMAP		(1<<0)
>   #define CONTEXT_NO_ERROR_CAPTURE	(1<<1)
> @@ -1810,6 +1811,9 @@ struct drm_i915_private {
>   	DECLARE_HASHTABLE(mm_structs, 7);
>   	struct mutex mm_lock;
>
> +	struct ida context_hw_ida;
> +#define MAX_CONTEXT_HW_ID (1<<20)

BUILD_BUG_ON somewhere if someone in the future is tempted to increase 
this without realizing ids returned by ida_simple_get are limited to 
0x80000000 ?

> +
>   	/* Kernel Modesetting */
>
>   	struct drm_crtc *plane_to_crtc_mapping[I915_MAX_PIPES];
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index d7d9763a9818..db53d811370d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -169,6 +169,8 @@ void i915_gem_context_free(struct kref *ctx_ref)
>   	if (ctx->legacy_hw_ctx.rcs_state)
>   		drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base);
>   	list_del(&ctx->link);
> +
> +	ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
>   	kfree(ctx);
>   }
>
> @@ -209,6 +211,28 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
>   	return obj;
>   }
>
> +static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
> +{
> +	int ret;
> +
> +	ret = ida_simple_get(&dev_priv->context_hw_ida,
> +			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
> +	if (ret < 0) {
> +		/* Contexts are only released when no longer active.
> +		 * Flush any pending retires to hopefully release some
> +		 * stale contexts and try again.
> +		 */
> +		i915_gem_retire_requests(dev_priv->dev);
> +		ret = ida_simple_get(&dev_priv->context_hw_ida,
> +				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	*out = ret;
> +	return 0;
> +}
> +
>   static struct intel_context *
>   __create_hw_context(struct drm_device *dev,
>   		    struct drm_i915_file_private *file_priv)
> @@ -225,6 +249,12 @@ __create_hw_context(struct drm_device *dev,
>   	list_add_tail(&ctx->link, &dev_priv->context_list);
>   	ctx->i915 = dev_priv;
>
> +	ret = assign_hw_id(dev_priv, &ctx->hw_id);
> +	if (ret) {
> +		kfree(ctx);
> +		return ERR_PTR(ret);
> +	}
> +
>   	if (dev_priv->hw_context_size) {
>   		struct drm_i915_gem_object *obj =
>   				i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
> @@ -375,6 +405,8 @@ int i915_gem_context_init(struct drm_device *dev)
>   		}
>   	}
>
> +	ida_init(&dev_priv->context_hw_ida);
> +
>   	if (i915.enable_execlists) {
>   		/* NB: intentionally left blank. We will allocate our own
>   		 * backing objects as we need them, thank you very much */
>

Otherwise looks fine. With the two minors above added:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/9] drm/i915: Replace the pinned context address with its unique ID
  2016-04-19  6:49   ` [PATCH 2/9] drm/i915: Replace the pinned context address with its " Chris Wilson
@ 2016-04-19  9:03     ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19  9:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> Rather than reuse the current location of the context in the global GTT,
> use its unique ID assigned to it for the lifetime of that context.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 12 +++++-------
>   drivers/gpu/drm/i915/intel_lrc.c    | 36 ++++++------------------------------
>   drivers/gpu/drm/i915/intel_lrc.h    |  3 ---
>   3 files changed, 11 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index cacbe45b7938..bd409bfa6e61 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2056,15 +2056,13 @@ static void i915_dump_lrc_obj(struct seq_file *m,
>   	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
>   	unsigned long ggtt_offset = 0;
>
> +	seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id);
> +
>   	if (ctx_obj == NULL) {
> -		seq_printf(m, "Context on %s with no gem object\n",
> -			   engine->name);
> +		seq_printf(m, "\tNot allocated\n");

Extremely minor - seq_puts.

>   		return;
>   	}
>
> -	seq_printf(m, "CONTEXT: %s %u\n", engine->name,
> -		   intel_execlists_ctx_id(ctx, engine));
> -
>   	if (!i915_gem_obj_ggtt_bound(ctx_obj))
>   		seq_puts(m, "\tNot bound in GGTT\n");
>   	else
> @@ -2183,8 +2181,8 @@ static int i915_execlists(struct seq_file *m, void *data)
>
>   		seq_printf(m, "\t%d requests in queue\n", count);
>   		if (head_req) {
> -			seq_printf(m, "\tHead request id: %u\n",
> -				   intel_execlists_ctx_id(head_req->ctx, engine));
> +			seq_printf(m, "\tHead request context: %u\n",
> +				   head_req->ctx->hw_id);
>   			seq_printf(m, "\tHead request tail: %u\n",
>   				   head_req->tail);
>   		}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 72c72bd97568..cec112449b31 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -313,14 +313,12 @@ static void
>   intel_lr_context_descriptor_update(struct intel_context *ctx,
>   				   struct intel_engine_cs *engine)
>   {
> -	uint64_t lrca, desc;
> +	u64 desc;
>
> -	lrca = ctx->engine[engine->id].lrc_vma->node.start +
> -	       LRC_PPHWSP_PN * PAGE_SIZE;
> -
> -	desc = engine->ctx_desc_template;			   /* bits  0-11 */
> -	desc |= lrca;					   /* bits 12-31 */
> -	desc |= (lrca >> PAGE_SHIFT) << GEN8_CTX_ID_SHIFT; /* bits 32-51 */
> +	desc = engine->ctx_desc_template; /* bits  0-11 */
> +	desc |= ctx->engine[engine->id].lrc_vma->node.start +
> +	       LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */
> +	desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-51 */
>
>   	ctx->engine[engine->id].lrc_desc = desc;
>   }
> @@ -331,28 +329,6 @@ uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>   	return ctx->engine[engine->id].lrc_desc;
>   }
>
> -/**
> - * intel_execlists_ctx_id() - get the Execlists Context ID
> - * @ctx: Context to get the ID for
> - * @ring: Engine to get the ID for
> - *
> - * Do not confuse with ctx->id! Unfortunately we have a name overload
> - * here: the old context ID we pass to userspace as a handler so that
> - * they can refer to a context, and the new context ID we pass to the
> - * ELSP so that the GPU can inform us of the context status via
> - * interrupts.
> - *
> - * The context ID is a portion of the context descriptor, so we can
> - * just extract the required part from the cached descriptor.
> - *
> - * Return: 20-bits globally unique context ID.
> - */
> -u32 intel_execlists_ctx_id(struct intel_context *ctx,
> -			   struct intel_engine_cs *engine)
> -{
> -	return intel_lr_context_descriptor(ctx, engine) >> GEN8_CTX_ID_SHIFT;
> -}
> -
>   static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
>   				 struct drm_i915_gem_request *rq1)
>   {
> @@ -496,7 +472,7 @@ execlists_check_remove_request(struct intel_engine_cs *engine, u32 request_id)
>   	if (!head_req)
>   		return 0;
>
> -	if (unlikely(intel_execlists_ctx_id(head_req->ctx, engine) != request_id))
> +	if (unlikely(head_req->ctx->hw_id != request_id))
>   		return 0;
>
>   	WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 461f1ef9b5c1..b17ab79333aa 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -114,9 +114,6 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv,
>   uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
>   				     struct intel_engine_cs *engine);
>
> -u32 intel_execlists_ctx_id(struct intel_context *ctx,
> -			   struct intel_engine_cs *engine);
> -
>   /* Execlists */
>   int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
>   struct i915_execbuffer_params;
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/9] drm/i915: Assign every HW context a unique ID
  2016-04-19  8:57     ` Tvrtko Ursulin
@ 2016-04-19  9:04       ` Chris Wilson
  2016-04-19  9:20         ` Tvrtko Ursulin
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  9:04 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Tue, Apr 19, 2016 at 09:57:14AM +0100, Tvrtko Ursulin wrote:
> 
> On 19/04/16 07:49, Chris Wilson wrote:
> >The hardware tracks contexts and expects all live contexts (those active
> >on the hardware) to have a unique identifier. This is used by the
> >hardware to assign pagefaults and the like to a particular context.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
> >  drivers/gpu/drm/i915/i915_drv.h         |  4 ++++
> >  drivers/gpu/drm/i915/i915_gem_context.c | 32 ++++++++++++++++++++++++++++++++
> >  3 files changed, 37 insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >index ffc4734cb2dc..cacbe45b7938 100644
> >--- a/drivers/gpu/drm/i915/i915_debugfs.c
> >+++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >@@ -2014,7 +2014,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
> >  		    ctx->legacy_hw_ctx.rcs_state == NULL)
> >  			continue;
> >
> >-		seq_puts(m, "HW context ");
> >+		seq_printf(m, "HW context %d ", ctx->hw_id);
> 
> Should be %u.
> 
> >  		describe_ctx(m, ctx);
> >  		if (ctx == dev_priv->kernel_context)
> >  			seq_printf(m, "(kernel context) ");
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index dc3bb5c10957..368fd7090189 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -851,6 +851,7 @@ struct intel_context {
> >  	struct i915_ctx_hang_stats hang_stats;
> >  	struct i915_hw_ppgtt *ppgtt;
> >
> >+	unsigned hw_id;
> >  	unsigned flags;
> >  #define CONTEXT_NO_ZEROMAP		(1<<0)
> >  #define CONTEXT_NO_ERROR_CAPTURE	(1<<1)
> >@@ -1810,6 +1811,9 @@ struct drm_i915_private {
> >  	DECLARE_HASHTABLE(mm_structs, 7);
> >  	struct mutex mm_lock;
> >
> >+	struct ida context_hw_ida;
> >+#define MAX_CONTEXT_HW_ID (1<<20)
> 
> BUILD_BUG_ON somewhere if someone in the future is tempted to
> increase this without realizing ids returned by ida_simple_get are
> limited to 0x80000000 ?

How do you fancy a BUILD_BUG_ON? (Just not seeing what I should assert.)
The max-value chosen here is lifted from the ctx-descriptor (as you
know). A comment is definitely in order to explain the limit.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 3/9] drm/i915: Refactor execlists default context pinning
  2016-04-19  6:49   ` [PATCH 3/9] drm/i915: Refactor execlists default context pinning Chris Wilson
@ 2016-04-19  9:16     ` Tvrtko Ursulin
  2016-04-19  9:55       ` [PATCH] " Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19  9:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> Refactor pinning and unpinning of contexts, such that the default
> context for an engine is pinned during initialisation and unpinned
> during teardown (pinning of the context handles the reference counting).
> Thus we can eliminate the special case handling of the default context
> that was required to mask that it was not being pinned normally.
>
> v2: Rebalance context_queue after rebasing.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c     |   5 +-
>   drivers/gpu/drm/i915/i915_gem_request.c |   6 +-
>   drivers/gpu/drm/i915/intel_lrc.c        | 103 ++++++++++++--------------------
>   3 files changed, 41 insertions(+), 73 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index bd409bfa6e61..a07c3478f0d8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2108,9 +2108,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
>   		return ret;
>
>   	list_for_each_entry(ctx, &dev_priv->context_list, link)
> -		if (ctx != dev_priv->kernel_context)
> -			for_each_engine(engine, dev_priv)
> -				i915_dump_lrc_obj(m, ctx, engine);
> +		for_each_engine(engine, dev_priv)
> +			i915_dump_lrc_obj(m, ctx, engine);
>
>   	mutex_unlock(&dev->struct_mutex);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index a6f3e6e53b3b..33aacf1725dd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -644,10 +644,8 @@ void i915_gem_request_free(struct kref *req_ref)
>   		i915_gem_request_remove_from_client(req);
>
>   	if (ctx) {
> -		if (i915.enable_execlists) {
> -			if (ctx != to_i915(req)->kernel_context)
> -				intel_lr_context_unpin(ctx, req->engine);
> -		}
> +		if (i915.enable_execlists)
> +			intel_lr_context_unpin(ctx, req->engine);
>
>   		i915_gem_context_unreference(ctx);
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index cec112449b31..79ac6f06a502 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -585,9 +585,7 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
>   	struct drm_i915_gem_request *cursor;
>   	int num_elements = 0;
>
> -	if (request->ctx != to_i915(request)->kernel_context)
> -		intel_lr_context_pin(request->ctx, engine);
> -
> +	intel_lr_context_pin(request->ctx, request->engine);
>   	i915_gem_request_reference(request);
>
>   	spin_lock_bh(&engine->execlist_lock);
> @@ -688,10 +686,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>   			return ret;
>   	}
>
> -	if (request->ctx != to_i915(request)->kernel_context)
> -		ret = intel_lr_context_pin(request->ctx, request->engine);
> -
> -	return ret;
> +	return intel_lr_context_pin(request->ctx, request->engine);
>   }
>
>   static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
> @@ -768,12 +763,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   	if (engine->last_context != request->ctx) {
>   		if (engine->last_context)
>   			intel_lr_context_unpin(engine->last_context, engine);
> -		if (request->ctx != to_i915(request)->kernel_context) {
> -			intel_lr_context_pin(request->ctx, engine);
> -			engine->last_context = request->ctx;
> -		} else {
> -			engine->last_context = NULL;
> -		}
> +		intel_lr_context_pin(request->ctx, engine);
> +		engine->last_context = request->ctx;
>   	}
>
>   	if (dev_priv->guc.execbuf_client)
> @@ -996,12 +987,7 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
>   	spin_unlock_bh(&engine->execlist_lock);
>
>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> -		struct intel_context *ctx = req->ctx;
> -		struct drm_i915_gem_object *ctx_obj =
> -				ctx->engine[engine->id].state;
> -
> -		if (ctx_obj && (ctx != to_i915(req)->kernel_context))
> -			intel_lr_context_unpin(ctx, engine);
> +		intel_lr_context_unpin(req->ctx, engine);
>
>   		list_del(&req->execlist_link);
>   		i915_gem_request_unreference(req);
> @@ -1046,22 +1032,25 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
>   	return 0;
>   }
>
> -static int intel_lr_context_do_pin(struct intel_context *ctx,
> -				   struct intel_engine_cs *engine)
> +static int intel_lr_context_pin(struct intel_context *ctx,
> +				struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *dev_priv = ctx->i915;
> -	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
> -	struct intel_ringbuffer *ringbuf = ctx->engine[engine->id].ringbuf;
> +	struct drm_i915_gem_object *ctx_obj;
> +	struct intel_ringbuffer *ringbuf;
>   	void *vaddr;
> -	u32 *lrc_reg_state;
> +	uint32_t *lrc_reg_state;

I prefer u32, and you preferred u64 in "drm/i915: Replace the pinned 
context address with its unique ID". :) Doesn't matter really, it is too 
late for consistency anyway.

>   	int ret;
>
> -	WARN_ON(!mutex_is_locked(&engine->dev->struct_mutex));
> +	lockdep_assert_held(&ctx->i915->dev->struct_mutex);
> +	if (ctx->engine[engine->id].pin_count++)
> +		return 0;
>
> +	ctx_obj = ctx->engine[engine->id].state;
>   	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
>   			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
>   	if (ret)
> -		return ret;
> +		goto err;
>
>   	vaddr = i915_gem_object_pin_map(ctx_obj);
>   	if (IS_ERR(vaddr)) {
> @@ -1071,10 +1060,12 @@ static int intel_lr_context_do_pin(struct intel_context *ctx,
>
>   	lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
>
> +	ringbuf = ctx->engine[engine->id].ringbuf;
>   	ret = intel_pin_and_map_ringbuffer_obj(engine->dev, ringbuf);
>   	if (ret)
>   		goto unpin_map;
>
> +	i915_gem_context_reference(ctx);
>   	ctx->engine[engine->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
>   	intel_lr_context_descriptor_update(ctx, engine);
>   	lrc_reg_state[CTX_RING_BUFFER_START+1] = ringbuf->vma->node.start;
> @@ -1085,31 +1076,13 @@ static int intel_lr_context_do_pin(struct intel_context *ctx,
>   	if (i915.enable_guc_submission)
>   		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
>
> -	return ret;
> +	return 0;
>
>   unpin_map:
>   	i915_gem_object_unpin_map(ctx_obj);
>   unpin_ctx_obj:
>   	i915_gem_object_ggtt_unpin(ctx_obj);
> -
> -	return ret;
> -}
> -
> -static int intel_lr_context_pin(struct intel_context *ctx,
> -				struct intel_engine_cs *engine)
> -{
> -	int ret = 0;
> -
> -	if (ctx->engine[engine->id].pin_count++ == 0) {
> -		ret = intel_lr_context_do_pin(ctx, engine);
> -		if (ret)
> -			goto reset_pin_count;
> -
> -		i915_gem_context_reference(ctx);
> -	}
> -	return ret;
> -
> -reset_pin_count:
> +err:
>   	ctx->engine[engine->id].pin_count = 0;
>   	return ret;
>   }
> @@ -1119,17 +1092,21 @@ void intel_lr_context_unpin(struct intel_context *ctx,
>   {
>   	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
>
> -	WARN_ON(!mutex_is_locked(&ctx->i915->dev->struct_mutex));
> -	if (--ctx->engine[engine->id].pin_count == 0) {
> -		i915_gem_object_unpin_map(ctx_obj);
> -		intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
> -		i915_gem_object_ggtt_unpin(ctx_obj);
> -		ctx->engine[engine->id].lrc_vma = NULL;
> -		ctx->engine[engine->id].lrc_desc = 0;
> -		ctx->engine[engine->id].lrc_reg_state = NULL;
> +	lockdep_assert_held(&ctx->i915->dev->struct_mutex);
> +	GEM_BUG_ON(ctx->engine[engine->id].pin_count == 0);

I thought it is more readable to leave space between asserts and code. 
And in pin as well if you decide to do it.

> +	if (--ctx->engine[engine->id].pin_count)
> +		return;
>
> -		i915_gem_context_unreference(ctx);
> -	}

Could move ctx_obj assignment just before use here.

> +	i915_gem_object_unpin_map(ctx_obj);
> +	intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
> +
> +	i915_gem_object_ggtt_unpin(ctx_obj);
> +
> +	ctx->engine[engine->id].lrc_vma = NULL;
> +	ctx->engine[engine->id].lrc_desc = 0;
> +	ctx->engine[engine->id].lrc_reg_state = NULL;
> +
> +	i915_gem_context_unreference(ctx);
>   }
>
>   static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
> @@ -1982,6 +1959,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
>   		i915_gem_object_unpin_map(engine->status_page.obj);
>   		engine->status_page.obj = NULL;
>   	}
> +	intel_lr_context_unpin(engine->i915->kernel_context, engine);

I don't see engine->i915 upstream yet unless I am missing something. 
dev_priv is already there on the other hand.

>
>   	engine->idle_lite_restore_wa = 0;
>   	engine->disable_lite_restore_wa = false;
> @@ -2104,11 +2082,10 @@ logical_ring_init(struct intel_engine_cs *engine)
>   		goto error;
>
>   	/* As this is the default context, always pin it */
> -	ret = intel_lr_context_do_pin(dctx, engine);
> +	ret = intel_lr_context_pin(dctx, engine);
>   	if (ret) {
> -		DRM_ERROR(
> -			"Failed to pin and map ringbuffer %s: %d\n",
> -			engine->name, ret);
> +		DRM_ERROR("Failed to pin context for %s: %d\n",
> +			  engine->name, ret);
>   		goto error;
>   	}
>
> @@ -2505,12 +2482,6 @@ void intel_lr_context_free(struct intel_context *ctx)
>   		if (!ctx_obj)
>   			continue;
>
> -		if (ctx == ctx->i915->kernel_context) {
> -			intel_unpin_ringbuffer_obj(ringbuf);
> -			i915_gem_object_ggtt_unpin(ctx_obj);
> -			i915_gem_object_unpin_map(ctx_obj);
> -		}
> -
>   		WARN_ON(ctx->engine[i].pin_count);
>   		intel_ringbuffer_free(ringbuf);
>   		drm_gem_object_unreference(&ctx_obj->base);
>

Just some minors then and correct rebase. Otherwise I think it is correct.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/9] drm/i915: Assign every HW context a unique ID
  2016-04-19  9:04       ` Chris Wilson
@ 2016-04-19  9:20         ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19  9:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 10:04, Chris Wilson wrote:
> On Tue, Apr 19, 2016 at 09:57:14AM +0100, Tvrtko Ursulin wrote:
>>
>> On 19/04/16 07:49, Chris Wilson wrote:
>>> The hardware tracks contexts and expects all live contexts (those active
>>> on the hardware) to have a unique identifier. This is used by the
>>> hardware to assign pagefaults and the like to a particular context.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
>>>   drivers/gpu/drm/i915/i915_drv.h         |  4 ++++
>>>   drivers/gpu/drm/i915/i915_gem_context.c | 32 ++++++++++++++++++++++++++++++++
>>>   3 files changed, 37 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index ffc4734cb2dc..cacbe45b7938 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -2014,7 +2014,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
>>>   		    ctx->legacy_hw_ctx.rcs_state == NULL)
>>>   			continue;
>>>
>>> -		seq_puts(m, "HW context ");
>>> +		seq_printf(m, "HW context %d ", ctx->hw_id);
>>
>> Should be %u.
>>
>>>   		describe_ctx(m, ctx);
>>>   		if (ctx == dev_priv->kernel_context)
>>>   			seq_printf(m, "(kernel context) ");
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index dc3bb5c10957..368fd7090189 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -851,6 +851,7 @@ struct intel_context {
>>>   	struct i915_ctx_hang_stats hang_stats;
>>>   	struct i915_hw_ppgtt *ppgtt;
>>>
>>> +	unsigned hw_id;
>>>   	unsigned flags;
>>>   #define CONTEXT_NO_ZEROMAP		(1<<0)
>>>   #define CONTEXT_NO_ERROR_CAPTURE	(1<<1)
>>> @@ -1810,6 +1811,9 @@ struct drm_i915_private {
>>>   	DECLARE_HASHTABLE(mm_structs, 7);
>>>   	struct mutex mm_lock;
>>>
>>> +	struct ida context_hw_ida;
>>> +#define MAX_CONTEXT_HW_ID (1<<20)
>>
>> BUILD_BUG_ON somewhere if someone in the future is tempted to
>> increase this without realizing ids returned by ida_simple_get are
>> limited to 0x80000000 ?
>
> How do you fancy a BUILD_BUG_ON? (Just not seeing what I should assert.)
> The max-value chosen here is lifted from the ctx-descriptor (as you
> know). A comment is definitely in order to explain the limit.

I thought BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX) is obvious, no?

Probably best at the ida_simple_init call site?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/9] drm/i915: Remove early l3-remap
  2016-04-19  6:49   ` [PATCH 4/9] drm/i915: Remove early l3-remap Chris Wilson
@ 2016-04-19  9:41     ` Tvrtko Ursulin
  2016-04-19 10:07       ` [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19  9:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> Since we do the l3-remap on context switch, and proceed to do a context
> switch immediately after manually doing the l3-remap, we can remove the
> redundant manual call.

Looks like it should say it is removing the l3-remap on driver load, not 
"manual".

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  1 -
>   drivers/gpu/drm/i915/i915_gem.c         | 39 +--------------------------------
>   drivers/gpu/drm/i915/i915_gem_context.c | 31 +++++++++++++++++++++++++-
>   3 files changed, 31 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 368fd7090189..4c55f480f60b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2983,7 +2983,6 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>   int __must_check i915_gem_init(struct drm_device *dev);
>   int i915_gem_init_engines(struct drm_device *dev);
>   int __must_check i915_gem_init_hw(struct drm_device *dev);
> -int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
>   void i915_gem_init_swizzling(struct drm_device *dev);
>   void i915_gem_cleanup_engines(struct drm_device *dev);
>   int __must_check i915_gpu_idle(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c9d7dfc8f80c..fd9a36badb45 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4077,35 +4077,6 @@ err:
>   	return ret;
>   }
>
> -int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -	u32 *remap_info = to_i915(req)->l3_parity.remap_info[slice];

Wrong base.

> -	int i, ret;
> -
> -	if (!HAS_L3_DPF(req) || !remap_info)
> -		return 0;
> -
> -	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
> -	if (ret)
> -		return ret;
> -
> -	/*
> -	 * Note: We do not worry about the concurrent register cacheline hang
> -	 * here because no other code should access these registers other than
> -	 * at initialization time.
> -	 */
> -	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
> -		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
> -		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
> -		intel_ring_emit(engine, remap_info[i]);
> -	}
> -
> -	intel_ring_advance(engine);
> -
> -	return ret;
> -}
> -
>   void i915_gem_init_swizzling(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -4210,7 +4181,7 @@ i915_gem_init_hw(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *engine;
> -	int ret, j;
> +	int ret;
>
>   	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
>   		return -EIO;
> @@ -4284,14 +4255,6 @@ i915_gem_init_hw(struct drm_device *dev)
>   			break;
>   		}
>
> -		if (engine->id == RCS) {
> -			for (j = 0; j < NUM_L3_SLICES(dev); j++) {
> -				ret = i915_gem_l3_remap(req, j);
> -				if (ret)
> -					goto err_request;
> -			}
> -		}
> -
>   		ret = i915_ppgtt_init_ring(req);
>   		if (ret)
>   			goto err_request;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index db53d811370d..6870556a180b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -691,6 +691,35 @@ needs_pd_load_post(struct intel_context *to, u32 hw_flags)
>   	return false;
>   }
>
> +static int remap_l3(struct drm_i915_gem_request *req, int slice)
> +{
> +	u32 *remap_info = to_i915(req)->l3_parity.remap_info[slice];
> +	struct intel_engine_cs *engine = req->engine;
> +	int i, ret;
> +
> +	if (!remap_info)
> +		return 0;

HAS_L3_DPF check is gone. Ok it looks correct from the call site, but 
best would be not to change it when moving around.

> +
> +	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);

More changes - should you perhaps add a patch which simplifies l3 remap 
to use since lri ahead of this one?

> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Note: We do not worry about the concurrent register cacheline hang
> +	 * here because no other code should access these registers other than
> +	 * at initialization time.
> +	 */
> +	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
> +	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
> +		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
> +		intel_ring_emit(engine, remap_info[i]);
> +	}
> +	intel_ring_emit(engine, MI_NOOP);
> +	intel_ring_advance(engine);
> +
> +	return 0;
> +}
> +
>   static int do_rcs_switch(struct drm_i915_gem_request *req)
>   {
>   	struct intel_context *to = req->ctx;
> @@ -810,7 +839,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>   		if (!(to->remap_slice & (1<<i)))
>   			continue;
>
> -		ret = i915_gem_l3_remap(req, i);
> +		ret = remap_l3(req, i);
>   		if (ret)
>   			return ret;
>
>

Otherwise looks safe, although I am not sure why it belongs in this series.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 5/9] drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use
  2016-04-19  6:49   ` [PATCH 5/9] drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use Chris Wilson
@ 2016-04-19  9:52     ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19  9:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> The code to switch_mm() is already handled by i915_switch_context(), the
> only difference required to setup the aliasing ppgtt is that we need to
> emit te switch_mm() on the first context, i.e. when transitioning from
> engine->last_context == NULL.

Explanation feels a bit short for the amount of change.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  1 -
>   drivers/gpu/drm/i915/i915_gem.c         | 28 ---------------------
>   drivers/gpu/drm/i915/i915_gem_context.c | 43 +++++++++------------------------
>   drivers/gpu/drm/i915/i915_gem_gtt.c     | 13 ----------
>   drivers/gpu/drm/i915/i915_gem_gtt.h     |  1 -
>   5 files changed, 12 insertions(+), 74 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4c55f480f60b..77becfd5a09d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3125,7 +3125,6 @@ int __must_check i915_gem_context_init(struct drm_device *dev);
>   void i915_gem_context_fini(struct drm_device *dev);
>   void i915_gem_context_reset(struct drm_device *dev);
>   int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> -int i915_gem_context_enable(struct drm_i915_gem_request *req);
>   void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
>   int i915_switch_context(struct drm_i915_gem_request *req);
>   struct intel_context *
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fd9a36badb45..4057c0febccd 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4245,34 +4245,6 @@ i915_gem_init_hw(struct drm_device *dev)
>   		}
>   	}
>
> -	/* Now it is safe to go back round and do everything else: */
> -	for_each_engine(engine, dev_priv) {
> -		struct drm_i915_gem_request *req;
> -
> -		req = i915_gem_request_alloc(engine, NULL);
> -		if (IS_ERR(req)) {
> -			ret = PTR_ERR(req);
> -			break;
> -		}
> -
> -		ret = i915_ppgtt_init_ring(req);
> -		if (ret)
> -			goto err_request;
> -
> -		ret = i915_gem_context_enable(req);
> -		if (ret)
> -			goto err_request;
> -
> -err_request:
> -		i915_add_request_no_flush(req);
> -		if (ret) {
> -			DRM_ERROR("Failed to enable %s, error=%d\n",
> -				  engine->name, ret);
> -			i915_gem_cleanup_engines(dev);
> -			break;
> -		}
> -	}
> -
>   out:
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   	return ret;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 6870556a180b..cf84138de4ec 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -471,27 +471,6 @@ void i915_gem_context_fini(struct drm_device *dev)
>   	dev_priv->kernel_context = NULL;
>   }
>
> -int i915_gem_context_enable(struct drm_i915_gem_request *req)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -	int ret;
> -
> -	if (i915.enable_execlists) {
> -		if (engine->init_context == NULL)
> -			return 0;
> -
> -		ret = engine->init_context(req);
> -	} else
> -		ret = i915_switch_context(req);
> -
> -	if (ret) {
> -		DRM_ERROR("ring init context: %d\n", ret);
> -		return ret;
> -	}
> -
> -	return 0;
> -}
> -

So default context is not switched to at driver load, or before the 
non-default context even, any more? And that is fine?

>   static int context_idr_cleanup(int id, void *p, void *data)
>   {
>   	struct intel_context *ctx = p;
> @@ -661,7 +640,7 @@ static bool
>   needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to)
>   {
>   	if (!to->ppgtt)
> -		return false;
> +		return engine->last_context == NULL;
>
>   	if (engine->last_context == to &&
>   	    !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
> @@ -724,6 +703,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>   {
>   	struct intel_context *to = req->ctx;
>   	struct intel_engine_cs *engine = req->engine;
> +	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
>   	struct intel_context *from;
>   	u32 hw_flags;
>   	int ret, i;
> @@ -765,7 +745,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>   		 * Register Immediate commands in Ring Buffer before submitting
>   		 * a context."*/
>   		trace_switch_mm(engine, to);
> -		ret = to->ppgtt->switch_mm(to->ppgtt, req);
> +		ret = ppgtt->switch_mm(ppgtt, req);
>   		if (ret)
>   			goto unpin_out;
>   	}
> @@ -776,8 +756,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>   		 * space. This means we must enforce that a page table load
>   		 * occur when this occurs. */
>   		hw_flags = MI_RESTORE_INHIBIT;
> -	else if (to->ppgtt &&
> -		 intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)
> +	else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
>   		hw_flags = MI_FORCE_RESTORE;
>   	else
>   		hw_flags = 0;
> @@ -822,7 +801,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>   	 */
>   	if (needs_pd_load_post(to, hw_flags)) {
>   		trace_switch_mm(engine, to);
> -		ret = to->ppgtt->switch_mm(to->ppgtt, req);
> +		ret = ppgtt->switch_mm(to->ppgtt, req);

to->ppgtt should be ppgtt I think.

>   		/* The hardware context switch is emitted, but we haven't
>   		 * actually changed the state - so it's probably safe to bail
>   		 * here. Still, let the user know something dangerous has
> @@ -832,8 +811,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>   			return ret;
>   	}
>
> -	if (to->ppgtt)
> -		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> +	if (ppgtt)
> +		ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
>
>   	for (i = 0; i < MAX_L3_SLICES; i++) {
>   		if (!(to->remap_slice & (1<<i)))
> @@ -887,15 +866,17 @@ int i915_switch_context(struct drm_i915_gem_request *req)
>   		struct intel_context *to = req->ctx;
>
>   		if (needs_pd_load_pre(engine, to)) {
> +			struct i915_hw_ppgtt *ppgtt;
>   			int ret;
>
> +			ppgtt = to->ppgtt ?: to_i915(req)->mm.aliasing_ppgtt;

Wrong base again.

> +
>   			trace_switch_mm(engine, to);
> -			ret = to->ppgtt->switch_mm(to->ppgtt, req);
> +			ret = ppgtt->switch_mm(ppgtt, req);
>   			if (ret)
>   				return ret;
>
> -			/* Doing a PD load always reloads the page dirs */
> -			to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> +			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
>   		}
>
>   		if (to != engine->last_context) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 780e3ad3ca10..50bdba5cb6d2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2180,19 +2180,6 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
>   	return 0;
>   }
>
> -int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
> -{
> -	struct i915_hw_ppgtt *ppgtt = to_i915(req)->mm.aliasing_ppgtt;
> -
> -	if (i915.enable_execlists)
> -		return 0;
> -
> -	if (!ppgtt)
> -		return 0;
> -
> -	return ppgtt->switch_mm(ppgtt, req);
> -}
> -
>   struct i915_hw_ppgtt *
>   i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index d7dd3d8a8758..333a2fc62b43 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -519,7 +519,6 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev);
>
>   int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
>   int i915_ppgtt_init_hw(struct drm_device *dev);
> -int i915_ppgtt_init_ring(struct drm_i915_gem_request *req);
>   void i915_ppgtt_release(struct kref *kref);
>   struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
>   					struct drm_i915_file_private *fpriv);
>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH] drm/i915: Refactor execlists default context pinning
  2016-04-19  9:16     ` Tvrtko Ursulin
@ 2016-04-19  9:55       ` Chris Wilson
  0 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-04-19  9:55 UTC (permalink / raw)
  To: intel-gfx

Refactor pinning and unpinning of contexts, such that the default
context for an engine is pinned during initialisation and unpinned
during teardown (pinning of the context handles the reference counting).
Thus we can eliminate the special case handling of the default context
that was required to mask that it was not being pinned normally.

v2: Rebalance context_queue after rebasing.
v3: Rebase to -nightly (not 40 patches in)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   5 +-
 drivers/gpu/drm/i915/i915_gem.c     |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c    | 107 ++++++++++++++----------------------
 3 files changed, 43 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f775451bd0b6..e81a7504656e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2095,9 +2095,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 		return ret;
 
 	list_for_each_entry(ctx, &dev_priv->context_list, link)
-		if (ctx != dev_priv->kernel_context)
-			for_each_engine(engine, dev_priv)
-				i915_dump_lrc_obj(m, ctx, engine);
+		for_each_engine(engine, dev_priv)
+			i915_dump_lrc_obj(m, ctx, engine);
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6ce2c31b9a81..9ff73bf0e4ea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2719,7 +2719,7 @@ void i915_gem_request_free(struct kref *req_ref)
 		i915_gem_request_remove_from_client(req);
 
 	if (ctx) {
-		if (i915.enable_execlists && ctx != req->i915->kernel_context)
+		if (i915.enable_execlists)
 			intel_lr_context_unpin(ctx, req->engine);
 
 		i915_gem_context_unreference(ctx);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dedd82aea386..e064a6ae2d97 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -588,9 +588,7 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
 	struct drm_i915_gem_request *cursor;
 	int num_elements = 0;
 
-	if (request->ctx != request->i915->kernel_context)
-		intel_lr_context_pin(request->ctx, engine);
-
+	intel_lr_context_pin(request->ctx, request->engine);
 	i915_gem_request_reference(request);
 
 	spin_lock_bh(&engine->execlist_lock);
@@ -691,10 +689,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 			return ret;
 	}
 
-	if (request->ctx != request->i915->kernel_context)
-		ret = intel_lr_context_pin(request->ctx, request->engine);
-
-	return ret;
+	return intel_lr_context_pin(request->ctx, request->engine);
 }
 
 static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
@@ -774,12 +769,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	if (engine->last_context != request->ctx) {
 		if (engine->last_context)
 			intel_lr_context_unpin(engine->last_context, engine);
-		if (request->ctx != request->i915->kernel_context) {
-			intel_lr_context_pin(request->ctx, engine);
-			engine->last_context = request->ctx;
-		} else {
-			engine->last_context = NULL;
-		}
+		intel_lr_context_pin(request->ctx, engine);
+		engine->last_context = request->ctx;
 	}
 
 	if (dev_priv->guc.execbuf_client)
@@ -1002,12 +993,7 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
 	spin_unlock_bh(&engine->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		struct intel_context *ctx = req->ctx;
-		struct drm_i915_gem_object *ctx_obj =
-				ctx->engine[engine->id].state;
-
-		if (ctx_obj && (ctx != req->i915->kernel_context))
-			intel_lr_context_unpin(ctx, engine);
+		intel_lr_context_unpin(req->ctx, engine);
 
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
@@ -1052,23 +1038,26 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-static int intel_lr_context_do_pin(struct intel_context *ctx,
-				   struct intel_engine_cs *engine)
+static int intel_lr_context_pin(struct intel_context *ctx,
+				struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
-	struct intel_ringbuffer *ringbuf = ctx->engine[engine->id].ringbuf;
+	struct drm_i915_private *dev_priv = ctx->i915;
+	struct drm_i915_gem_object *ctx_obj;
+	struct intel_ringbuffer *ringbuf;
 	void *vaddr;
 	u32 *lrc_reg_state;
 	int ret;
 
-	WARN_ON(!mutex_is_locked(&engine->dev->struct_mutex));
+	lockdep_assert_held(&ctx->i915->dev->struct_mutex);
 
+	if (ctx->engine[engine->id].pin_count++)
+		return 0;
+
+	ctx_obj = ctx->engine[engine->id].state;
 	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
 			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
 	if (ret)
-		return ret;
+		goto err;
 
 	vaddr = i915_gem_object_pin_map(ctx_obj);
 	if (IS_ERR(vaddr)) {
@@ -1078,10 +1067,12 @@ static int intel_lr_context_do_pin(struct intel_context *ctx,
 
 	lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 
+	ringbuf = ctx->engine[engine->id].ringbuf;
 	ret = intel_pin_and_map_ringbuffer_obj(engine->dev, ringbuf);
 	if (ret)
 		goto unpin_map;
 
+	i915_gem_context_reference(ctx);
 	ctx->engine[engine->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
 	intel_lr_context_descriptor_update(ctx, engine);
 	lrc_reg_state[CTX_RING_BUFFER_START+1] = ringbuf->vma->node.start;
@@ -1092,51 +1083,39 @@ static int intel_lr_context_do_pin(struct intel_context *ctx,
 	if (i915.enable_guc_submission)
 		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 
-	return ret;
+	return 0;
 
 unpin_map:
 	i915_gem_object_unpin_map(ctx_obj);
 unpin_ctx_obj:
 	i915_gem_object_ggtt_unpin(ctx_obj);
-
+err:
+	ctx->engine[engine->id].pin_count = 0;
 	return ret;
 }
 
-static int intel_lr_context_pin(struct intel_context *ctx,
-				struct intel_engine_cs *engine)
+void intel_lr_context_unpin(struct intel_context *ctx,
+			    struct intel_engine_cs *engine)
 {
-	int ret = 0;
+	struct drm_i915_gem_object *ctx_obj;
 
-	if (ctx->engine[engine->id].pin_count++ == 0) {
-		ret = intel_lr_context_do_pin(ctx, engine);
-		if (ret)
-			goto reset_pin_count;
+	lockdep_assert_held(&ctx->i915->dev->struct_mutex);
+	GEM_BUG_ON(ctx->engine[engine->id].pin_count == 0);
 
-		i915_gem_context_reference(ctx);
-	}
-	return ret;
+	if (--ctx->engine[engine->id].pin_count)
+		return;
 
-reset_pin_count:
-	ctx->engine[engine->id].pin_count = 0;
-	return ret;
-}
+	intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
 
-void intel_lr_context_unpin(struct intel_context *ctx,
-			    struct intel_engine_cs *engine)
-{
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
+	ctx_obj = ctx->engine[engine->id].state;
+	i915_gem_object_unpin_map(ctx_obj);
+	i915_gem_object_ggtt_unpin(ctx_obj);
 
-	WARN_ON(!mutex_is_locked(&ctx->i915->dev->struct_mutex));
-	if (--ctx->engine[engine->id].pin_count == 0) {
-		i915_gem_object_unpin_map(ctx_obj);
-		intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
-		i915_gem_object_ggtt_unpin(ctx_obj);
-		ctx->engine[engine->id].lrc_vma = NULL;
-		ctx->engine[engine->id].lrc_desc = 0;
-		ctx->engine[engine->id].lrc_reg_state = NULL;
+	ctx->engine[engine->id].lrc_vma = NULL;
+	ctx->engine[engine->id].lrc_desc = 0;
+	ctx->engine[engine->id].lrc_reg_state = NULL;
 
-		i915_gem_context_unreference(ctx);
-	}
+	i915_gem_context_unreference(ctx);
 }
 
 static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
@@ -2034,6 +2013,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 		i915_gem_object_unpin_map(engine->status_page.obj);
 		engine->status_page.obj = NULL;
 	}
+	intel_lr_context_unpin(dev_priv->kernel_context, engine);
 
 	engine->idle_lite_restore_wa = 0;
 	engine->disable_lite_restore_wa = false;
@@ -2137,11 +2117,10 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *engine)
 		goto error;
 
 	/* As this is the default context, always pin it */
-	ret = intel_lr_context_do_pin(dctx, engine);
+	ret = intel_lr_context_pin(dctx, engine);
 	if (ret) {
-		DRM_ERROR(
-			"Failed to pin and map ringbuffer %s: %d\n",
-			engine->name, ret);
+		DRM_ERROR("Failed to pin context for %s: %d\n",
+			  engine->name, ret);
 		goto error;
 	}
 
@@ -2562,12 +2541,6 @@ void intel_lr_context_free(struct intel_context *ctx)
 		if (!ctx_obj)
 			continue;
 
-		if (ctx == ctx->i915->kernel_context) {
-			intel_unpin_ringbuffer_obj(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
-			i915_gem_object_unpin_map(ctx_obj);
-		}
-
 		WARN_ON(ctx->engine[i].pin_count);
 		intel_ringbuffer_free(ringbuf);
 		drm_gem_object_unreference(&ctx_obj->base);
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 6/9] drm/i915: Move context initialisation to first-use
  2016-04-19  6:49   ` [PATCH 6/9] drm/i915: Move context initialisation to first-use Chris Wilson
@ 2016-04-19  9:57     ` Tvrtko Ursulin
  2016-04-19 10:15     ` Tvrtko Ursulin
  1 sibling, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19  9:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> Instead of allocating a new request when allocating a context, use the
> request that initiated the allocation to emit the context
> initialisation.

A few words on pros and cons?

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h  |  1 +
>   drivers/gpu/drm/i915/intel_lrc.c | 39 ++++++++++++++++++---------------------
>   2 files changed, 19 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 77becfd5a09d..b65e3b16e1b0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -870,6 +870,7 @@ struct intel_context {
>   		struct i915_vma *lrc_vma;
>   		u64 lrc_desc;
>   		uint32_t *lrc_reg_state;
> +		bool initialised;
>   	} engine[I915_NUM_ENGINES];
>
>   	struct list_head link;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 79ac6f06a502..c104015813d3 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -669,9 +669,10 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>
>   int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
>   {
> -	int ret = 0;
> +	struct intel_engine_cs *engine = request->engine;
> +	int ret;
>
> -	request->ringbuf = request->ctx->engine[request->engine->id].ringbuf;
> +	request->ringbuf = request->ctx->engine[engine->id].ringbuf;
>
>   	if (i915.enable_guc_submission) {
>   		/*
> @@ -686,7 +687,20 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>   			return ret;
>   	}
>
> -	return intel_lr_context_pin(request->ctx, request->engine);
> +	ret = intel_lr_context_pin(request->ctx, engine);
> +	if (ret)
> +		return ret;
> +
> +	if (!request->ctx->engine[engine->id].initialised) {
> +		ret = engine->init_context(request);
> +		if (ret) {
> +			intel_lr_context_unpin(request->ctx, engine);
> +			return ret;
> +		}
> +		request->ctx->engine[engine->id].initialised = true;
> +	}
> +
> +	return 0;
>   }
>
>   static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
> @@ -2576,25 +2590,8 @@ int intel_lr_context_deferred_alloc(struct intel_context *ctx,
>
>   	ctx->engine[engine->id].ringbuf = ringbuf;
>   	ctx->engine[engine->id].state = ctx_obj;
> +	ctx->engine[engine->id].initialised = engine->init_context == NULL;
>
> -	if (ctx != ctx->i915->kernel_context && engine->init_context) {
> -		struct drm_i915_gem_request *req;
> -
> -		req = i915_gem_request_alloc(engine, ctx);
> -		if (IS_ERR(req)) {
> -			ret = PTR_ERR(req);
> -			DRM_ERROR("ring create req: %d\n", ret);
> -			goto error_ringbuf;
> -		}
> -
> -		ret = engine->init_context(req);
> -		i915_add_request_no_flush(req);
> -		if (ret) {
> -			DRM_ERROR("ring init context: %d\n",
> -				ret);
> -			goto error_ringbuf;
> -		}
> -	}
>   	return 0;
>
>   error_ringbuf:
>

Otherwise looks correct. Just missing the motivation explained.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching
  2016-04-19  9:41     ` Tvrtko Ursulin
@ 2016-04-19 10:07       ` Chris Wilson
  2016-04-19 10:07         ` [PATCH 2/3] drm/i915: Consolidate L3 remapping LRI Chris Wilson
                           ` (2 more replies)
  0 siblings, 3 replies; 40+ messages in thread
From: Chris Wilson @ 2016-04-19 10:07 UTC (permalink / raw)
  To: intel-gfx

Move the i915_gem_l3_remap function such that it next to the context
switching, which is where we perform the L3 remap.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c         | 31 -------------------------------
 drivers/gpu/drm/i915/i915_gem_context.c | 31 +++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ff73bf0e4ea..59419f10e76a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4729,37 +4729,6 @@ err:
 	return ret;
 }
 
-int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
-{
-	struct intel_engine_cs *engine = req->engine;
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
-	int i, ret;
-
-	if (!HAS_L3_DPF(dev) || !remap_info)
-		return 0;
-
-	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
-	if (ret)
-		return ret;
-
-	/*
-	 * Note: We do not worry about the concurrent register cacheline hang
-	 * here because no other code should access these registers other than
-	 * at initialization time.
-	 */
-	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
-		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
-		intel_ring_emit(engine, remap_info[i]);
-	}
-
-	intel_ring_advance(engine);
-
-	return ret;
-}
-
 void i915_gem_init_swizzling(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 59d66b5bc8ad..68232d384902 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -643,6 +643,37 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 	return ret;
 }
 
+int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
+{
+	struct intel_engine_cs *engine = req->engine;
+	struct drm_device *dev = engine->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
+	int i, ret;
+
+	if (!HAS_L3_DPF(dev) || !remap_info)
+		return 0;
+
+	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
+	if (ret)
+		return ret;
+
+	/*
+	 * Note: We do not worry about the concurrent register cacheline hang
+	 * here because no other code should access these registers other than
+	 * at initialization time.
+	 */
+	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
+		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
+		intel_ring_emit(engine, remap_info[i]);
+	}
+
+	intel_ring_advance(engine);
+
+	return ret;
+}
+
 static inline bool skip_rcs_switch(struct intel_engine_cs *engine,
 				   struct intel_context *to)
 {
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/3] drm/i915: Consolidate L3 remapping LRI
  2016-04-19 10:07       ` [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching Chris Wilson
@ 2016-04-19 10:07         ` Chris Wilson
  2016-04-19 10:21           ` Tvrtko Ursulin
  2016-04-19 10:07         ` [PATCH 3/3] drm/i915: Remove early l3-remap Chris Wilson
  2016-04-19 10:20         ` [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching Tvrtko Ursulin
  2 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19 10:07 UTC (permalink / raw)
  To: intel-gfx

We can use a single MI_LOAD_REGISTER_IMM command packet to write all the
L3 remapping registers, shrinking the number of bytes required to emit
the context switch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 68232d384902..b47ed8b6f4be 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -645,16 +645,14 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
 {
+	u32 *remap_info = req->i915->l3_parity.remap_info[slice];
 	struct intel_engine_cs *engine = req->engine;
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
 	int i, ret;
 
-	if (!HAS_L3_DPF(dev) || !remap_info)
+	if (!remap_info)
 		return 0;
 
-	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
+	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
 	if (ret)
 		return ret;
 
@@ -663,15 +661,15 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
 	 * here because no other code should access these registers other than
 	 * at initialization time.
 	 */
-	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
-		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
+	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
 		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
 		intel_ring_emit(engine, remap_info[i]);
 	}
-
+	intel_ring_emit(engine, MI_NOOP);
 	intel_ring_advance(engine);
 
-	return ret;
+	return 0;
 }
 
 static inline bool skip_rcs_switch(struct intel_engine_cs *engine,
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/3] drm/i915: Remove early l3-remap
  2016-04-19 10:07       ` [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching Chris Wilson
  2016-04-19 10:07         ` [PATCH 2/3] drm/i915: Consolidate L3 remapping LRI Chris Wilson
@ 2016-04-19 10:07         ` Chris Wilson
  2016-04-19 10:23           ` Tvrtko Ursulin
  2016-04-19 10:20         ` [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching Tvrtko Ursulin
  2 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2016-04-19 10:07 UTC (permalink / raw)
  To: intel-gfx

Since we do the l3-remap on context switch, we can remove the redundant
early call to set the mapping prior to performing the first context
switch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 -
 drivers/gpu/drm/i915/i915_gem.c         | 10 +---------
 drivers/gpu/drm/i915/i915_gem_context.c |  4 ++--
 3 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bfcaf98f2ed7..5b2a9f1bcd96 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3155,7 +3155,6 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_init(struct drm_device *dev);
 int i915_gem_init_engines(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
-int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 59419f10e76a..a0f485a1576b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4833,7 +4833,7 @@ i915_gem_init_hw(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *engine;
-	int ret, j;
+	int ret;
 
 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 		return -EIO;
@@ -4915,14 +4915,6 @@ i915_gem_init_hw(struct drm_device *dev)
 			break;
 		}
 
-		if (engine->id == RCS) {
-			for (j = 0; j < NUM_L3_SLICES(dev); j++) {
-				ret = i915_gem_l3_remap(req, j);
-				if (ret)
-					goto err_request;
-			}
-		}
-
 		ret = i915_ppgtt_init_ring(req);
 		if (ret)
 			goto err_request;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b47ed8b6f4be..e02a22d056cc 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -643,7 +643,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 	return ret;
 }
 
-int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
+static int remap_l3(struct drm_i915_gem_request *req, int slice)
 {
 	u32 *remap_info = req->i915->l3_parity.remap_info[slice];
 	struct intel_engine_cs *engine = req->engine;
@@ -841,7 +841,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		if (!(to->remap_slice & (1<<i)))
 			continue;
 
-		ret = i915_gem_l3_remap(req, i);
+		ret = remap_l3(req, i);
 		if (ret)
 			return ret;
 
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 6/9] drm/i915: Move context initialisation to first-use
  2016-04-19  6:49   ` [PATCH 6/9] drm/i915: Move context initialisation to first-use Chris Wilson
  2016-04-19  9:57     ` Tvrtko Ursulin
@ 2016-04-19 10:15     ` Tvrtko Ursulin
  2016-04-19 10:55       ` Chris Wilson
  1 sibling, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19 10:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> Instead of allocating a new request when allocating a context, use the
> request that initiated the allocation to emit the context
> initialisation.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h  |  1 +
>   drivers/gpu/drm/i915/intel_lrc.c | 39 ++++++++++++++++++---------------------
>   2 files changed, 19 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 77becfd5a09d..b65e3b16e1b0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -870,6 +870,7 @@ struct intel_context {
>   		struct i915_vma *lrc_vma;
>   		u64 lrc_desc;
>   		uint32_t *lrc_reg_state;
> +		bool initialised;
>   	} engine[I915_NUM_ENGINES];
>
>   	struct list_head link;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 79ac6f06a502..c104015813d3 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -669,9 +669,10 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>
>   int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
>   {
> -	int ret = 0;
> +	struct intel_engine_cs *engine = request->engine;
> +	int ret;
>
> -	request->ringbuf = request->ctx->engine[request->engine->id].ringbuf;
> +	request->ringbuf = request->ctx->engine[engine->id].ringbuf;
>
>   	if (i915.enable_guc_submission) {
>   		/*
> @@ -686,7 +687,20 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>   			return ret;
>   	}
>
> -	return intel_lr_context_pin(request->ctx, request->engine);
> +	ret = intel_lr_context_pin(request->ctx, engine);
> +	if (ret)
> +		return ret;
> +
> +	if (!request->ctx->engine[engine->id].initialised) {
> +		ret = engine->init_context(request);

Actually, change here is that previously this wasn't done for the kernel 
context. Needs to be explained why we either want that, or why it is fine.

> +		if (ret) {
> +			intel_lr_context_unpin(request->ctx, engine);
> +			return ret;
> +		}
> +		request->ctx->engine[engine->id].initialised = true;
> +	}
> +
> +	return 0;
>   }
>
>   static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
> @@ -2576,25 +2590,8 @@ int intel_lr_context_deferred_alloc(struct intel_context *ctx,
>
>   	ctx->engine[engine->id].ringbuf = ringbuf;
>   	ctx->engine[engine->id].state = ctx_obj;
> +	ctx->engine[engine->id].initialised = engine->init_context == NULL;
>
> -	if (ctx != ctx->i915->kernel_context && engine->init_context) {
> -		struct drm_i915_gem_request *req;
> -
> -		req = i915_gem_request_alloc(engine, ctx);
> -		if (IS_ERR(req)) {
> -			ret = PTR_ERR(req);
> -			DRM_ERROR("ring create req: %d\n", ret);
> -			goto error_ringbuf;
> -		}
> -
> -		ret = engine->init_context(req);
> -		i915_add_request_no_flush(req);
> -		if (ret) {
> -			DRM_ERROR("ring init context: %d\n",
> -				ret);
> -			goto error_ringbuf;
> -		}
> -	}
>   	return 0;
>
>   error_ringbuf:
>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching
  2016-04-19 10:07       ` [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching Chris Wilson
  2016-04-19 10:07         ` [PATCH 2/3] drm/i915: Consolidate L3 remapping LRI Chris Wilson
  2016-04-19 10:07         ` [PATCH 3/3] drm/i915: Remove early l3-remap Chris Wilson
@ 2016-04-19 10:20         ` Tvrtko Ursulin
  2 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19 10:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 11:07, Chris Wilson wrote:
> Move the i915_gem_l3_remap function such that it next to the context
> switching, which is where we perform the L3 remap.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c         | 31 -------------------------------
>   drivers/gpu/drm/i915/i915_gem_context.c | 31 +++++++++++++++++++++++++++++++
>   2 files changed, 31 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9ff73bf0e4ea..59419f10e76a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4729,37 +4729,6 @@ err:
>   	return ret;
>   }
>
> -int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
> -{
> -	struct intel_engine_cs *engine = req->engine;
> -	struct drm_device *dev = engine->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
> -	int i, ret;
> -
> -	if (!HAS_L3_DPF(dev) || !remap_info)
> -		return 0;
> -
> -	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
> -	if (ret)
> -		return ret;
> -
> -	/*
> -	 * Note: We do not worry about the concurrent register cacheline hang
> -	 * here because no other code should access these registers other than
> -	 * at initialization time.
> -	 */
> -	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
> -		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
> -		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
> -		intel_ring_emit(engine, remap_info[i]);
> -	}
> -
> -	intel_ring_advance(engine);
> -
> -	return ret;
> -}
> -
>   void i915_gem_init_swizzling(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 59d66b5bc8ad..68232d384902 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -643,6 +643,37 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
>   	return ret;
>   }
>
> +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
> +{
> +	struct intel_engine_cs *engine = req->engine;
> +	struct drm_device *dev = engine->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
> +	int i, ret;
> +
> +	if (!HAS_L3_DPF(dev) || !remap_info)
> +		return 0;
> +
> +	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Note: We do not worry about the concurrent register cacheline hang
> +	 * here because no other code should access these registers other than
> +	 * at initialization time.
> +	 */
> +	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
> +		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
> +		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
> +		intel_ring_emit(engine, remap_info[i]);
> +	}
> +
> +	intel_ring_advance(engine);
> +
> +	return ret;
> +}
> +
>   static inline bool skip_rcs_switch(struct intel_engine_cs *engine,
>   				   struct intel_context *to)
>   {
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/3] drm/i915: Consolidate L3 remapping LRI
  2016-04-19 10:07         ` [PATCH 2/3] drm/i915: Consolidate L3 remapping LRI Chris Wilson
@ 2016-04-19 10:21           ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19 10:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 11:07, Chris Wilson wrote:
> We can use a single MI_LOAD_REGISTER_IMM command packet to write all the
> L3 remapping registers, shrinking the number of bytes required to emit
> the context switch.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c | 16 +++++++---------
>   1 file changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 68232d384902..b47ed8b6f4be 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -645,16 +645,14 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
>
>   int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
>   {
> +	u32 *remap_info = req->i915->l3_parity.remap_info[slice];
>   	struct intel_engine_cs *engine = req->engine;
> -	struct drm_device *dev = engine->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
>   	int i, ret;
>
> -	if (!HAS_L3_DPF(dev) || !remap_info)
> +	if (!remap_info)
>   		return 0;
>
> -	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
> +	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
>   	if (ret)
>   		return ret;
>
> @@ -663,15 +661,15 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
>   	 * here because no other code should access these registers other than
>   	 * at initialization time.
>   	 */
> -	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
> -		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
> +	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
> +	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
>   		intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
>   		intel_ring_emit(engine, remap_info[i]);
>   	}
> -
> +	intel_ring_emit(engine, MI_NOOP);
>   	intel_ring_advance(engine);
>
> -	return ret;
> +	return 0;
>   }
>
>   static inline bool skip_rcs_switch(struct intel_engine_cs *engine,
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 3/3] drm/i915: Remove early l3-remap
  2016-04-19 10:07         ` [PATCH 3/3] drm/i915: Remove early l3-remap Chris Wilson
@ 2016-04-19 10:23           ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19 10:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 11:07, Chris Wilson wrote:
> Since we do the l3-remap on context switch, we can remove the redundant
> early call to set the mapping prior to performing the first context
> switch.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  1 -
>   drivers/gpu/drm/i915/i915_gem.c         | 10 +---------
>   drivers/gpu/drm/i915/i915_gem_context.c |  4 ++--
>   3 files changed, 3 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bfcaf98f2ed7..5b2a9f1bcd96 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3155,7 +3155,6 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>   int __must_check i915_gem_init(struct drm_device *dev);
>   int i915_gem_init_engines(struct drm_device *dev);
>   int __must_check i915_gem_init_hw(struct drm_device *dev);
> -int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
>   void i915_gem_init_swizzling(struct drm_device *dev);
>   void i915_gem_cleanup_engines(struct drm_device *dev);
>   int __must_check i915_gpu_idle(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 59419f10e76a..a0f485a1576b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4833,7 +4833,7 @@ i915_gem_init_hw(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *engine;
> -	int ret, j;
> +	int ret;
>
>   	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
>   		return -EIO;
> @@ -4915,14 +4915,6 @@ i915_gem_init_hw(struct drm_device *dev)
>   			break;
>   		}
>
> -		if (engine->id == RCS) {
> -			for (j = 0; j < NUM_L3_SLICES(dev); j++) {
> -				ret = i915_gem_l3_remap(req, j);
> -				if (ret)
> -					goto err_request;
> -			}
> -		}
> -
>   		ret = i915_ppgtt_init_ring(req);
>   		if (ret)
>   			goto err_request;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index b47ed8b6f4be..e02a22d056cc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -643,7 +643,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
>   	return ret;
>   }
>
> -int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
> +static int remap_l3(struct drm_i915_gem_request *req, int slice)
>   {
>   	u32 *remap_info = req->i915->l3_parity.remap_info[slice];
>   	struct intel_engine_cs *engine = req->engine;
> @@ -841,7 +841,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
>   		if (!(to->remap_slice & (1<<i)))
>   			continue;
>
> -		ret = i915_gem_l3_remap(req, i);
> +		ret = remap_l3(req, i);
>   		if (ret)
>   			return ret;
>
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 7/9] drm/i915: Move the magical deferred context allocation into the request
  2016-04-19  6:49   ` [PATCH 7/9] drm/i915: Move the magical deferred context allocation into the request Chris Wilson
@ 2016-04-19 10:28     ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19 10:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> We can hide more details of execlists from higher level code by removing
> the explicit call to create an execlist context into its first use.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 --------
>   drivers/gpu/drm/i915/intel_lrc.c           | 21 ++++++++++++++-------
>   drivers/gpu/drm/i915/intel_lrc.h           |  2 --
>   3 files changed, 14 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0d210c383487..3da5978ac0f7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1080,14 +1080,6 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
>   		return ERR_PTR(-EIO);
>   	}
>
> -	if (i915.enable_execlists && !ctx->engine[engine->id].state) {
> -		int ret = intel_lr_context_deferred_alloc(ctx, engine);
> -		if (ret) {
> -			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
> -			return ERR_PTR(ret);
> -		}
> -	}
> -
>   	return ctx;
>   }
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index c104015813d3..b0d20af38574 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -227,6 +227,8 @@ enum {
>   #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x17
>   #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x26
>
> +static int execlists_context_deferred_alloc(struct intel_context *ctx,
> +					    struct intel_engine_cs *engine);
>   static int intel_lr_context_pin(struct intel_context *ctx,
>   				struct intel_engine_cs *engine);
>
> @@ -672,8 +674,6 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>   	struct intel_engine_cs *engine = request->engine;
>   	int ret;
>
> -	request->ringbuf = request->ctx->engine[engine->id].ringbuf;
> -
>   	if (i915.enable_guc_submission) {
>   		/*
>   		 * Check that the GuC has space for the request before
> @@ -687,6 +687,14 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>   			return ret;
>   	}
>
> +	if (request->ctx->engine[engine->id].state == NULL) {
> +		ret = execlists_context_deferred_alloc(request->ctx, engine);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	request->ringbuf = request->ctx->engine[engine->id].ringbuf;
> +
>   	ret = intel_lr_context_pin(request->ctx, engine);
>   	if (ret)
>   		return ret;
> @@ -2091,7 +2099,7 @@ logical_ring_init(struct intel_engine_cs *engine)
>   	if (ret)
>   		goto error;
>
> -	ret = intel_lr_context_deferred_alloc(dctx, engine);
> +	ret = execlists_context_deferred_alloc(dctx, engine);
>   	if (ret)
>   		goto error;
>
> @@ -2541,7 +2549,7 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
>   }
>
>   /**
> - * intel_lr_context_deferred_alloc() - create the LRC specific bits of a context
> + * execlists_context_deferred_alloc() - create the LRC specific bits of a context
>    * @ctx: LR context to create.
>    * @ring: engine to be used with the context.
>    *
> @@ -2553,9 +2561,8 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
>    *
>    * Return: non-zero on error.
>    */
> -
> -int intel_lr_context_deferred_alloc(struct intel_context *ctx,
> -				    struct intel_engine_cs *engine)
> +static int execlists_context_deferred_alloc(struct intel_context *ctx,
> +					    struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_gem_object *ctx_obj;
>   	uint32_t context_size;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index b17ab79333aa..8bea937973f6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -102,8 +102,6 @@ static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf,
>
>   void intel_lr_context_free(struct intel_context *ctx);
>   uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
> -int intel_lr_context_deferred_alloc(struct intel_context *ctx,
> -				    struct intel_engine_cs *engine);
>   void intel_lr_context_unpin(struct intel_context *ctx,
>   			    struct intel_engine_cs *engine);
>
>

This one looks fine to me. (And very desirable to improve the layering 
like this.)

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 6/9] drm/i915: Move context initialisation to first-use
  2016-04-19 10:15     ` Tvrtko Ursulin
@ 2016-04-19 10:55       ` Chris Wilson
  0 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-04-19 10:55 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Tue, Apr 19, 2016 at 11:15:31AM +0100, Tvrtko Ursulin wrote:
> 
> On 19/04/16 07:49, Chris Wilson wrote:
> >Instead of allocating a new request when allocating a context, use the
> >request that initiated the allocation to emit the context
> >initialisation.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_drv.h  |  1 +
> >  drivers/gpu/drm/i915/intel_lrc.c | 39 ++++++++++++++++++---------------------
> >  2 files changed, 19 insertions(+), 21 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index 77becfd5a09d..b65e3b16e1b0 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -870,6 +870,7 @@ struct intel_context {
> >  		struct i915_vma *lrc_vma;
> >  		u64 lrc_desc;
> >  		uint32_t *lrc_reg_state;
> >+		bool initialised;
> >  	} engine[I915_NUM_ENGINES];
> >
> >  	struct list_head link;
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index 79ac6f06a502..c104015813d3 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -669,9 +669,10 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
> >
> >  int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
> >  {
> >-	int ret = 0;
> >+	struct intel_engine_cs *engine = request->engine;
> >+	int ret;
> >
> >-	request->ringbuf = request->ctx->engine[request->engine->id].ringbuf;
> >+	request->ringbuf = request->ctx->engine[engine->id].ringbuf;
> >
> >  	if (i915.enable_guc_submission) {
> >  		/*
> >@@ -686,7 +687,20 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
> >  			return ret;
> >  	}
> >
> >-	return intel_lr_context_pin(request->ctx, request->engine);
> >+	ret = intel_lr_context_pin(request->ctx, engine);
> >+	if (ret)
> >+		return ret;
> >+
> >+	if (!request->ctx->engine[engine->id].initialised) {
> >+		ret = engine->init_context(request);
> 
> Actually, change here is that previously this wasn't done for the
> kernel context. Needs to be explained why we either want that, or
> why it is fine.

We never execute the kernel context with execlists. It really is just a
gigantic placeholder for the HWS (and golden context state for GuC).

If we had, we would have noticed that we don't set the wa registers for
it. Hopefully, we would have noticed!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 8/9] drm/i915: Track the previous pinned context inside the request
  2016-04-19  6:49   ` [PATCH 8/9] drm/i915: Track the previous pinned context inside " Chris Wilson
@ 2016-04-19 12:02     ` Tvrtko Ursulin
  2016-04-19 12:14       ` Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19 12:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> As the contexts are accessed by the hardware until the switch is completed
> to a new context, the hardware may still be writing to the context object
> after the breadcrumb is visible. We must not unpin/unbind/prune that
> object whilst still active and so we keep the previous context pinned until
> the following request. If we move this tracking onto the request, we can
> simplify the code and enable freeing of the request without the
> struct_mutex in subsequent patches.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c |  8 ++++----
>   drivers/gpu/drm/i915/i915_gem_request.h | 11 +++++++++++
>   drivers/gpu/drm/i915/intel_lrc.c        | 12 +++++-------
>   3 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 33aacf1725dd..8d7c415f1896 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -643,12 +643,12 @@ void i915_gem_request_free(struct kref *req_ref)
>   	if (req->file_priv)
>   		i915_gem_request_remove_from_client(req);
>
> -	if (ctx) {
> +	if (req->pinned_context) {
>   		if (i915.enable_execlists)
> -			intel_lr_context_unpin(ctx, req->engine);
> -
> -		i915_gem_context_unreference(ctx);
> +			intel_lr_context_unpin(req->pinned_context,
> +					       req->engine);
>   	}
>
> +	i915_gem_context_unreference(ctx);
>   	kmem_cache_free(to_i915(req)->requests, req);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 69a4d4e2c97b..389813cbc19a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -85,6 +85,17 @@ struct drm_i915_gem_request {
>   	struct intel_context *ctx;
>   	struct intel_ringbuffer *ringbuf;
>
> +	/**
> +	 * Context related to the previous request.
> +	 * As the contexts are accessed by the hardware until the switch is
> +	 * completed to a new context, the hardware may still be writing
> +	 * to the context object after the breadcrumb is visible. We must
> +	 * not unpin/unbind/prune that object whilst still active and so
> +	 * we keep the previous context pinned until the following (this)
> +	 * request is retired.
> +	 */
> +	struct intel_context *pinned_context;
> +
>   	/** Batch buffer related to this request if any (used for
>   	    error state dump only) */
>   	struct drm_i915_gem_object *batch_obj;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b0d20af38574..0e55f206e592 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -708,6 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>   		request->ctx->engine[engine->id].initialised = true;
>   	}
>
> +	request->pinned_context = request->ctx;

Add a little bit of comment to the big one above explaining the 
possibility of pinned_context being, not the previous, but the current 
one before submission?

>   	return 0;
>   }
>
> @@ -782,12 +783,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   	intel_logical_ring_emit(ringbuf, MI_NOOP);
>   	intel_logical_ring_advance(ringbuf);
>
> -	if (engine->last_context != request->ctx) {
> -		if (engine->last_context)
> -			intel_lr_context_unpin(engine->last_context, engine);
> -		intel_lr_context_pin(request->ctx, engine);
> -		engine->last_context = request->ctx;
> -	}
> +	request->pinned_context = engine->last_context;
> +	engine->last_context = request->ctx;

I am not sure if this is very complicated or just very different from my 
approach. Either way after thinking long and hard I cannot fault it. 
Looks like it will work.

>
>   	if (dev_priv->guc.execbuf_client)
>   		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> @@ -1009,7 +1006,8 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
>   	spin_unlock_bh(&engine->execlist_lock);
>
>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> -		intel_lr_context_unpin(req->ctx, engine);
> +		if (req->pinned_context)
> +			intel_lr_context_unpin(req->pinned_context, engine);
>
>   		list_del(&req->execlist_link);
>   		i915_gem_request_unreference(req);
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

I suppose you did not see any performance effect since you decided to 
turn it on for both GuC and execlists? (Assuming vma iomap is in place.)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 8/9] drm/i915: Track the previous pinned context inside the request
  2016-04-19 12:02     ` Tvrtko Ursulin
@ 2016-04-19 12:14       ` Chris Wilson
  0 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2016-04-19 12:14 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Tue, Apr 19, 2016 at 01:02:26PM +0100, Tvrtko Ursulin wrote:
> On 19/04/16 07:49, Chris Wilson wrote:
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index b0d20af38574..0e55f206e592 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -708,6 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
> >  		request->ctx->engine[engine->id].initialised = true;
> >  	}
> >
> >+	request->pinned_context = request->ctx;
> 
> Add a little bit of comment to the big one above explaining the
> possibility of pinned_context being, not the previous, but the
> current one before submission?

This was here because we used to be able to cancel the context. Now that
we always go through intel_logical_ring_advance_and_submit, I've dopped
it. It makes a little nervous because we are not clearly tracking the
pinned_context now.

I also switched to request->previous_context, but I'm undecided as to
whether that is a better name (still worrying over lack of pinned
context tracking).

> >  	return 0;
> >  }
> >
> >@@ -782,12 +783,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
> >  	intel_logical_ring_emit(ringbuf, MI_NOOP);
> >  	intel_logical_ring_advance(ringbuf);
> >
> >-	if (engine->last_context != request->ctx) {
> >-		if (engine->last_context)
> >-			intel_lr_context_unpin(engine->last_context, engine);
> >-		intel_lr_context_pin(request->ctx, engine);
> >-		engine->last_context = request->ctx;
> >-	}
> >+	request->pinned_context = engine->last_context;
> >+	engine->last_context = request->ctx;
> 
> I am not sure if this is very complicated or just very different
> from my approach. Either way after thinking long and hard I cannot
> fault it. Looks like it will work.

Subtle enough that I gave it a comment.

> >  	if (dev_priv->guc.execbuf_client)
> >  		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> >@@ -1009,7 +1006,8 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
> >  	spin_unlock_bh(&engine->execlist_lock);
> >
> >  	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> >-		intel_lr_context_unpin(req->ctx, engine);
> >+		if (req->pinned_context)
> >+			intel_lr_context_unpin(req->pinned_context, engine);
> >
> >  		list_del(&req->execlist_link);
> >  		i915_gem_request_unreference(req);
> >
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> I suppose you did not see any performance effect since you decided
> to turn it on for both GuC and execlists? (Assuming vma iomap is in
> place.)

Context unpinning (with the caching in place) doesn't appear on the
profiles for me to worry about. There are easier low hanging fruit in
the needless locked atomic instructions and the like we do. (Besides
which there are lots of reasons why execlists doesn't yet outperform
legacy...)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 9/9] drm/i915: Move releasing of the GEM request from free to retire/cancel
  2016-04-19  6:49   ` [PATCH 9/9] drm/i915: Move releasing of the GEM request from free to retire/cancel Chris Wilson
@ 2016-04-19 12:16     ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2016-04-19 12:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/04/16 07:49, Chris Wilson wrote:
> If we move the release of the GEM request (i.e. decoupling it from the
> various lists used for client and context tracking) after it is complete
> (either by the GPU retiring the request, or by the caller cancelling the
> request), we can remove the requirement that the final unreference of
> the GEM request need to be under the struct_mutex.
> 
> v2,v3: Rebalance execlists by moving the context unpinning.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c          |  4 ++--
>   drivers/gpu/drm/i915/i915_gem_request.c  | 25 ++++++++++---------------
>   drivers/gpu/drm/i915/i915_gem_request.h  | 14 --------------
>   drivers/gpu/drm/i915/intel_breadcrumbs.c |  2 +-
>   drivers/gpu/drm/i915/intel_display.c     |  2 +-
>   drivers/gpu/drm/i915/intel_lrc.c         |  4 ----
>   drivers/gpu/drm/i915/intel_pm.c          |  2 +-
>   7 files changed, 15 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4057c0febccd..1f5434f7a294 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2542,7 +2542,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   			ret = __i915_wait_request(req[i], true,
>   						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
>   						  to_rps_client(file));
> -		i915_gem_request_unreference__unlocked(req[i]);
> +		i915_gem_request_unreference(req[i]);
>   	}
>   	return ret;
>   
> @@ -3548,7 +3548,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>   		return 0;
>   
>   	ret = __i915_wait_request(target, true, NULL, NULL);
> -	i915_gem_request_unreference__unlocked(target);
> +	i915_gem_request_unreference(target);
>   
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 8d7c415f1896..c14dca3d8b87 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -250,6 +250,7 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
>   static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>   {
>   	trace_i915_gem_request_retire(request);
> +	list_del_init(&request->list);
>   
>   	/* We know the GPU must have read the request to have
>   	 * sent us the seqno + interrupt, so use the position
> @@ -261,9 +262,15 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>   	 */
>   	request->ringbuf->last_retired_head = request->postfix;
>   
> -	list_del_init(&request->list);
>   	i915_gem_request_remove_from_client(request);
>   
> +	if (request->pinned_context) {
> +		if (i915.enable_execlists)
> +			intel_lr_context_unpin(request->pinned_context,
> +					       request->engine);
> +	}
> +
> +	i915_gem_context_unreference(request->ctx);
>   	i915_gem_request_unreference(request);
>   }
>   
> @@ -636,19 +643,7 @@ int i915_wait_request(struct drm_i915_gem_request *req)
>   
>   void i915_gem_request_free(struct kref *req_ref)
>   {
> -	struct drm_i915_gem_request *req = container_of(req_ref,
> -						 typeof(*req), ref);
> -	struct intel_context *ctx = req->ctx;
> -
> -	if (req->file_priv)
> -		i915_gem_request_remove_from_client(req);
> -
> -	if (req->pinned_context) {
> -		if (i915.enable_execlists)
> -			intel_lr_context_unpin(req->pinned_context,
> -					       req->engine);
> -	}
> -
> -	i915_gem_context_unreference(ctx);
> +	struct drm_i915_gem_request *req =
> +		container_of(req_ref, typeof(*req), ref);
>   	kmem_cache_free(to_i915(req)->requests, req);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 389813cbc19a..48bff7dc4f90 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -170,23 +170,9 @@ i915_gem_request_reference(struct drm_i915_gem_request *req)
>   static inline void
>   i915_gem_request_unreference(struct drm_i915_gem_request *req)
>   {
> -	WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex));
>   	kref_put(&req->ref, i915_gem_request_free);
>   }
>   
> -static inline void
> -i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req)
> -{
> -	struct drm_device *dev;
> -
> -	if (!req)
> -		return;
> -
> -	dev = req->engine->dev;
> -	if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex))
> -		mutex_unlock(&dev->struct_mutex);
> -}
> -
>   static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
>   					   struct drm_i915_gem_request *src)
>   {
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 79f175853548..d7d19e17318e 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c

! :)

> @@ -379,7 +379,7 @@ static int intel_breadcrumbs_signaler(void *arg)
>   			 */
>   			intel_engine_remove_wait(engine, &signal->wait);
>   
> -			i915_gem_request_unreference__unlocked(signal->request);
> +			i915_gem_request_unreference(signal->request);
>   
>   			/* Find the next oldest signal. Note that as we have
>   			 * not been holding the lock, another client may
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index caff656417c2..07dfd55fff91 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11370,7 +11370,7 @@ static void intel_mmio_flip_work_func(struct work_struct *work)
>   		WARN_ON(__i915_wait_request(mmio_flip->req,
>   					    false, NULL,
>   					    &mmio_flip->i915->rps.mmioflips));
> -		i915_gem_request_unreference__unlocked(mmio_flip->req);
> +		i915_gem_request_unreference(mmio_flip->req);
>   	}
>   
>   	/* For framebuffer backed by dmabuf, wait for fence */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 0e55f206e592..0eaa74fab87b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -587,7 +587,6 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
>   	struct drm_i915_gem_request *cursor;
>   	int num_elements = 0;
>   
> -	intel_lr_context_pin(request->ctx, request->engine);

I would prefer this step to be a separate patch from the
retire/free reorg.

Also, in my testing, another patch was required before this
step to make it work. Basically:

From 848df37ef90dfb7c7adbf59d155c39d8e4521b8e Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Fri, 15 Apr 2016 16:13:26 +0100
Subject: [PATCH 6/7] drm/i915: Store LRC hardware id in the context

This way in the following patch we can disconnect requests
from contexts.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  | 3 +++
 drivers/gpu/drm/i915/intel_lrc.c | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 79c23108cc35..d85c03425c54 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2334,6 +2334,9 @@ struct drm_i915_gem_request {
 
        /** Execlists no. of times this request has been sent to the ELSP */
        int elsp_submitted;
+
+       /** Execlists context hardware id. */
+       unsigned ctx_hw_id;
 };
 
 struct drm_i915_gem_request * __must_check
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8f8da1b01458..871f399f84f6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -475,7 +475,7 @@ execlists_check_remove_request(struct intel_engine_cs *engine, u32 request_id)
        if (!head_req)
                return 0;
 
-       if (unlikely(head_req->ctx->hw_id != request_id))
+       if (unlikely(head_req->ctx_hw_id != request_id))
                return 0;
 
        WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
@@ -614,6 +614,7 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
        }
 
        list_add_tail(&request->execlist_link, &engine->execlist_queue);
+       request->ctx_hw_id = request->ctx->hw_id;
        if (num_elements == 0)
                execlists_context_unqueue(engine);
 
-- 
1.9.1

Otherwise I was hitting requests with unpinned LRCs on the
execlists queue so check_remove was failing to spot progress.

I had a theory on why that was happening but I am not sure
about it any longer. Will try to stress it a bit with your
series and see if I can hit the problem.

But either way I think removing the extra execlist pin/unpin
removal from this patch would be better.

>   	i915_gem_request_reference(request);
>   
>   	spin_lock_bh(&engine->execlist_lock);
> @@ -1006,9 +1005,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
>   	spin_unlock_bh(&engine->execlist_lock);
>   
>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> -		if (req->pinned_context)
> -			intel_lr_context_unpin(req->pinned_context, engine);
> -
>   		list_del(&req->execlist_link);
>   		i915_gem_request_unreference(req);
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b964c448bc03..565b725cd817 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7364,7 +7364,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
>   	if (!i915_gem_request_completed(req))
>   		gen6_rps_boost(to_i915(req), NULL, req->emitted_jiffies);
>   
> -	i915_gem_request_unreference__unlocked(req);
> +	i915_gem_request_unreference(req);
>   	kfree(boost);
>   }
>   
> 

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2016-04-19 12:16 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-15 11:54 [PATCH 0/3] GuC premature LRC unpin Tvrtko Ursulin
2016-04-15 11:54 ` [PATCH 1/3] drm/i915: Refactor execlists default context pinning Tvrtko Ursulin
2016-04-15 12:16   ` Chris Wilson
2016-04-15 13:21     ` Tvrtko Ursulin
2016-04-15 11:54 ` [PATCH 2/3] drm/i915/guc: Keep the previous context pinned until the next one has been completed Tvrtko Ursulin
2016-04-15 12:12   ` Chris Wilson
2016-04-15 13:16     ` Tvrtko Ursulin
2016-04-15 11:54 ` [PATCH 3/3] DO NOT MERGE: drm/i915: Enable GuC submission Tvrtko Ursulin
2016-04-15 15:24 ` ✗ Fi.CI.BAT: warning for GuC premature LRC unpin Patchwork
2016-04-19  6:49 ` Premature " Chris Wilson
2016-04-19  6:49   ` [PATCH 1/9] drm/i915: Assign every HW context a unique ID Chris Wilson
2016-04-19  8:57     ` Tvrtko Ursulin
2016-04-19  9:04       ` Chris Wilson
2016-04-19  9:20         ` Tvrtko Ursulin
2016-04-19  6:49   ` [PATCH 2/9] drm/i915: Replace the pinned context address with its " Chris Wilson
2016-04-19  9:03     ` Tvrtko Ursulin
2016-04-19  6:49   ` [PATCH 3/9] drm/i915: Refactor execlists default context pinning Chris Wilson
2016-04-19  9:16     ` Tvrtko Ursulin
2016-04-19  9:55       ` [PATCH] " Chris Wilson
2016-04-19  6:49   ` [PATCH 4/9] drm/i915: Remove early l3-remap Chris Wilson
2016-04-19  9:41     ` Tvrtko Ursulin
2016-04-19 10:07       ` [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching Chris Wilson
2016-04-19 10:07         ` [PATCH 2/3] drm/i915: Consolidate L3 remapping LRI Chris Wilson
2016-04-19 10:21           ` Tvrtko Ursulin
2016-04-19 10:07         ` [PATCH 3/3] drm/i915: Remove early l3-remap Chris Wilson
2016-04-19 10:23           ` Tvrtko Ursulin
2016-04-19 10:20         ` [PATCH 1/3] drm/i915: L3 cache remapping is part of context switching Tvrtko Ursulin
2016-04-19  6:49   ` [PATCH 5/9] drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use Chris Wilson
2016-04-19  9:52     ` Tvrtko Ursulin
2016-04-19  6:49   ` [PATCH 6/9] drm/i915: Move context initialisation to first-use Chris Wilson
2016-04-19  9:57     ` Tvrtko Ursulin
2016-04-19 10:15     ` Tvrtko Ursulin
2016-04-19 10:55       ` Chris Wilson
2016-04-19  6:49   ` [PATCH 7/9] drm/i915: Move the magical deferred context allocation into the request Chris Wilson
2016-04-19 10:28     ` Tvrtko Ursulin
2016-04-19  6:49   ` [PATCH 8/9] drm/i915: Track the previous pinned context inside " Chris Wilson
2016-04-19 12:02     ` Tvrtko Ursulin
2016-04-19 12:14       ` Chris Wilson
2016-04-19  6:49   ` [PATCH 9/9] drm/i915: Move releasing of the GEM request from free to retire/cancel Chris Wilson
2016-04-19 12:16     ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.