All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/25] drm/i915: Move verify_wm_state() to heap
@ 2019-02-19 12:21 Chris Wilson
  2019-02-19 12:21 ` [PATCH 02/25] drm/i915: Use time based guilty context banning Chris Wilson
                   ` (28 more replies)
  0 siblings, 29 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:21 UTC (permalink / raw)
  To: intel-gfx

The stack usage exceeded 1024 bytes prompting warnings on conservative
setups, so move the temporary allocation for HW readback onto the heap.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 48 ++++++++++++++++++----------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3b4a6eeb4573..415d8968f2c5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12227,12 +12227,15 @@ static void verify_wm_state(struct drm_crtc *crtc,
 			    struct drm_crtc_state *new_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
-	struct skl_ddb_allocation hw_ddb, *sw_ddb;
-	struct skl_pipe_wm hw_wm, *sw_wm;
-	struct skl_plane_wm *hw_plane_wm, *sw_plane_wm;
+	struct skl_hw_state {
+		struct skl_ddb_entry ddb_y[I915_MAX_PLANES];
+		struct skl_ddb_entry ddb_uv[I915_MAX_PLANES];
+		struct skl_ddb_allocation ddb;
+		struct skl_pipe_wm wm;
+	} *hw;
+	struct skl_ddb_allocation *sw_ddb;
+	struct skl_pipe_wm *sw_wm;
 	struct skl_ddb_entry *hw_ddb_entry, *sw_ddb_entry;
-	struct skl_ddb_entry hw_ddb_y[I915_MAX_PLANES];
-	struct skl_ddb_entry hw_ddb_uv[I915_MAX_PLANES];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	const enum pipe pipe = intel_crtc->pipe;
 	int plane, level, max_level = ilk_wm_max_level(dev_priv);
@@ -12240,22 +12243,29 @@ static void verify_wm_state(struct drm_crtc *crtc,
 	if (INTEL_GEN(dev_priv) < 9 || !new_state->active)
 		return;
 
-	skl_pipe_wm_get_hw_state(intel_crtc, &hw_wm);
+	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
+	if (!hw)
+		return;
+
+	skl_pipe_wm_get_hw_state(intel_crtc, &hw->wm);
 	sw_wm = &to_intel_crtc_state(new_state)->wm.skl.optimal;
 
-	skl_pipe_ddb_get_hw_state(intel_crtc, hw_ddb_y, hw_ddb_uv);
+	skl_pipe_ddb_get_hw_state(intel_crtc, hw->ddb_y, hw->ddb_uv);
 
-	skl_ddb_get_hw_state(dev_priv, &hw_ddb);
+	skl_ddb_get_hw_state(dev_priv, &hw->ddb);
 	sw_ddb = &dev_priv->wm.skl_hw.ddb;
 
-	if (INTEL_GEN(dev_priv) >= 11)
-		if (hw_ddb.enabled_slices != sw_ddb->enabled_slices)
-			DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
-				  sw_ddb->enabled_slices,
-				  hw_ddb.enabled_slices);
+	if (INTEL_GEN(dev_priv) >= 11 &&
+	    hw->ddb.enabled_slices != sw_ddb->enabled_slices)
+		DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
+			  sw_ddb->enabled_slices,
+			  hw->ddb.enabled_slices);
+
 	/* planes */
 	for_each_universal_plane(dev_priv, pipe, plane) {
-		hw_plane_wm = &hw_wm.planes[plane];
+		struct skl_plane_wm *hw_plane_wm, *sw_plane_wm;
+
+		hw_plane_wm = &hw->wm.planes[plane];
 		sw_plane_wm = &sw_wm->planes[plane];
 
 		/* Watermarks */
@@ -12287,7 +12297,7 @@ static void verify_wm_state(struct drm_crtc *crtc,
 		}
 
 		/* DDB */
-		hw_ddb_entry = &hw_ddb_y[plane];
+		hw_ddb_entry = &hw->ddb_y[plane];
 		sw_ddb_entry = &to_intel_crtc_state(new_state)->wm.skl.plane_ddb_y[plane];
 
 		if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) {
@@ -12305,7 +12315,9 @@ static void verify_wm_state(struct drm_crtc *crtc,
 	 * once the plane becomes visible, we can skip this check
 	 */
 	if (1) {
-		hw_plane_wm = &hw_wm.planes[PLANE_CURSOR];
+		struct skl_plane_wm *hw_plane_wm, *sw_plane_wm;
+
+		hw_plane_wm = &hw->wm.planes[PLANE_CURSOR];
 		sw_plane_wm = &sw_wm->planes[PLANE_CURSOR];
 
 		/* Watermarks */
@@ -12337,7 +12349,7 @@ static void verify_wm_state(struct drm_crtc *crtc,
 		}
 
 		/* DDB */
-		hw_ddb_entry = &hw_ddb_y[PLANE_CURSOR];
+		hw_ddb_entry = &hw->ddb_y[PLANE_CURSOR];
 		sw_ddb_entry = &to_intel_crtc_state(new_state)->wm.skl.plane_ddb_y[PLANE_CURSOR];
 
 		if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) {
@@ -12347,6 +12359,8 @@ static void verify_wm_state(struct drm_crtc *crtc,
 				  hw_ddb_entry->start, hw_ddb_entry->end);
 		}
 	}
+
+	kfree(hw);
 }
 
 static void
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 02/25] drm/i915: Use time based guilty context banning
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
@ 2019-02-19 12:21 ` Chris Wilson
  2019-02-19 12:21 ` [PATCH 03/25] drm/i915: Prevent user context creation while wedged Chris Wilson
                   ` (27 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

Currently, we accumulate each time a context hangs the GPU, offset
against the number of requests it submits, and if that score exceeds a
certain threshold, we ban that context from submitting any more requests
(cancelling any work in flight). In contrast, we use a simple timer on
the file, that if we see more than a 9 hangs faster than 60s apart in
total across all of its contexts, we will ban the client from creating
any more contexts. This leads to a confusing situation where the file
may be banned before the context, so lets use a simple timer scheme for
each.

If the context submits 3 hanging requests within a 120s period, declare
it forbidden to ever send more requests.

This has the advantage of not being easy to repair by simply sending
empty requests, but has the disadvantage that if the context is idle
then it is forgiven. However, if the context is idle, it is not
disrupting the system, but a hog can evade the request counting and
cause much more severe disruption to the system.

Updating ban_score from request retirement is dubious as the retirement
is purposely not in sync with request submission (i.e. we try and batch
retirement to reduce overhead and avoid latency on submission), which
leads to surprising situations where we can forgive a hang immediately
due to a backlog of requests from before the hang being retired
afterwards.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c |  4 ++++
 drivers/gpu/drm/i915/i915_gem_context.h |  9 +++----
 drivers/gpu/drm/i915/i915_gpu_error.c   | 31 +++++++------------------
 drivers/gpu/drm/i915/i915_gpu_error.h   |  3 ---
 drivers/gpu/drm/i915/i915_request.c     |  2 --
 drivers/gpu/drm/i915/i915_reset.c       | 29 +++++++++++++----------
 6 files changed, 34 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index da21c843fed8..7541c6f961b3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -355,6 +355,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	struct i915_gem_context *ctx;
 	unsigned int n;
 	int ret;
+	int i;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx == NULL)
@@ -407,6 +408,9 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->desc_template =
 		default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
 
+	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
+		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+
 	return ctx;
 
 err_pid:
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 071108d34ae0..dc6c58f38cfa 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -209,10 +209,11 @@ struct i915_gem_context {
 	 */
 	atomic_t active_count;
 
-#define CONTEXT_SCORE_GUILTY		10
-#define CONTEXT_SCORE_BAN_THRESHOLD	40
-	/** ban_score: Accumulated score of all hangs caused by this context. */
-	atomic_t ban_score;
+	/**
+	 * @hang_timestamp: The last time(s) this context caused a GPU hang
+	 */
+	unsigned long hang_timestamp[2];
+#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
 
 	/** remap_slice: Bitmask of cache lines that need remapping */
 	u8 remap_slice;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9a65341fec09..3f6eddb6f6de 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -434,11 +434,6 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
 			   ee->instdone.row[slice][subslice]);
 }
 
-static const char *bannable(const struct drm_i915_error_context *ctx)
-{
-	return ctx->bannable ? "" : " (unbannable)";
-}
-
 static void error_print_request(struct drm_i915_error_state_buf *m,
 				const char *prefix,
 				const struct drm_i915_error_request *erq,
@@ -447,9 +442,8 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
-		   prefix, erq->pid, erq->ban_score,
-		   erq->context, erq->seqno,
+	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
+		   prefix, erq->pid, erq->context, erq->seqno,
 		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 			    &erq->flags) ? "!" : "",
 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
@@ -463,10 +457,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
 				const struct drm_i915_error_context *ctx)
 {
-	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n",
+	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, guilty %d active %d\n",
 		   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
-		   ctx->sched_attr.priority, ctx->ban_score, bannable(ctx),
-		   ctx->guilty, ctx->active);
+		   ctx->sched_attr.priority, ctx->guilty, ctx->active);
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
@@ -688,12 +681,10 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		if (!error->engine[i].context.pid)
 			continue;
 
-		err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
+		err_printf(m, "Active process (on ring %s): %s [%d]\n",
 			   engine_name(m->i915, i),
 			   error->engine[i].context.comm,
-			   error->engine[i].context.pid,
-			   error->engine[i].context.ban_score,
-			   bannable(&error->engine[i].context));
+			   error->engine[i].context.pid);
 	}
 	err_printf(m, "Reset count: %u\n", error->reset_count);
 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
@@ -779,13 +770,11 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		if (obj) {
 			err_puts(m, m->i915->engine[i]->name);
 			if (ee->context.pid)
-				err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
+				err_printf(m, " (submitted by %s [%d], ctx %d [%d])",
 					   ee->context.comm,
 					   ee->context.pid,
 					   ee->context.handle,
-					   ee->context.hw_id,
-					   ee->context.ban_score,
-					   bannable(&ee->context));
+					   ee->context.hw_id);
 			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
 				   upper_32_bits(obj->gtt_offset),
 				   lower_32_bits(obj->gtt_offset));
@@ -1301,8 +1290,6 @@ static void record_request(struct i915_request *request,
 	erq->flags = request->fence.flags;
 	erq->context = ctx->hw_id;
 	erq->sched_attr = request->sched.attr;
-	erq->ban_score = atomic_read(&ctx->ban_score);
-	erq->seqno = request->global_seqno;
 	erq->jiffies = request->emitted_jiffies;
 	erq->start = i915_ggtt_offset(request->ring->vma);
 	erq->head = request->head;
@@ -1396,8 +1383,6 @@ static void record_context(struct drm_i915_error_context *e,
 	e->handle = ctx->user_handle;
 	e->hw_id = ctx->hw_id;
 	e->sched_attr = ctx->sched;
-	e->ban_score = atomic_read(&ctx->ban_score);
-	e->bannable = i915_gem_context_is_bannable(ctx);
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
 }
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index afa3adb28f02..94eaf8ab9051 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -122,10 +122,8 @@ struct i915_gpu_state {
 			pid_t pid;
 			u32 handle;
 			u32 hw_id;
-			int ban_score;
 			int active;
 			int guilty;
-			bool bannable;
 			struct i915_sched_attr sched_attr;
 		} context;
 
@@ -149,7 +147,6 @@ struct i915_gpu_state {
 			long jiffies;
 			pid_t pid;
 			u32 context;
-			int ban_score;
 			u32 seqno;
 			u32 start;
 			u32 head;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 5ab4e1c01618..a61e3a4fc9dc 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -290,8 +290,6 @@ static void i915_request_retire(struct i915_request *request)
 
 	i915_request_remove_from_client(request);
 
-	/* Retirement decays the ban score as it is a sign of ctx progress */
-	atomic_dec_if_positive(&request->gem_context->ban_score);
 	intel_context_unpin(request->hw_context);
 
 	__retire_engine_upto(request->engine, request);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 4df4c674223d..732f763a3999 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -59,24 +59,29 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
 
 static bool context_mark_guilty(struct i915_gem_context *ctx)
 {
-	unsigned int score;
-	bool banned, bannable;
+	unsigned long prev_hang;
+	bool banned;
+	int i;
 
 	atomic_inc(&ctx->guilty_count);
 
-	bannable = i915_gem_context_is_bannable(ctx);
-	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
-	banned = (!i915_gem_context_is_recoverable(ctx) ||
-		  score >= CONTEXT_SCORE_BAN_THRESHOLD);
-
-	/* Cool contexts don't accumulate client ban score */
-	if (!bannable)
+	/* Cool contexts are too cool to be banned! (Used for reset testing.) */
+	if (!i915_gem_context_is_bannable(ctx))
 		return false;
 
+	/* Record the timestamp for the last N hangs */
+	prev_hang = ctx->hang_timestamp[0];
+	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++)
+		ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1];
+	ctx->hang_timestamp[i] = jiffies;
+
+	/* If we have hung N+1 times in rapid succession, we ban the context! */
+	banned = !i915_gem_context_is_recoverable(ctx);
+	if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
+		banned = true;
 	if (banned) {
-		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
-				 ctx->name, atomic_read(&ctx->guilty_count),
-				 score);
+		DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
+				 ctx->name, atomic_read(&ctx->guilty_count));
 		i915_gem_context_set_banned(ctx);
 	}
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 03/25] drm/i915: Prevent user context creation while wedged
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
  2019-02-19 12:21 ` [PATCH 02/25] drm/i915: Use time based guilty context banning Chris Wilson
@ 2019-02-19 12:21 ` Chris Wilson
  2019-02-19 13:07   ` Mika Kuoppala
  2019-02-19 12:21 ` [PATCH 04/25] drm/i915: Avoid reset lock in writing fence registers Chris Wilson
                   ` (26 subsequent siblings)
  28 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:21 UTC (permalink / raw)
  To: intel-gfx

Introduce a new ABI method for detecting a wedged driver by reporting
-EIO from DRM_IOCTL_I915_GEM_CONTEXT_CREATE.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 7541c6f961b3..7337aa01c361 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -802,18 +802,23 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv)
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_gem_context_create *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_gem_context *ctx;
 	int ret;
 
-	if (!DRIVER_CAPS(dev_priv)->has_logical_contexts)
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
 		return -ENODEV;
 
 	if (args->pad != 0)
 		return -EINVAL;
 
+	if (i915_terminally_wedged(&i915->gpu_error)) {
+		DRM_DEBUG("driver is wedged; banning new ctx!\n");
+		return -EIO;
+	}
+
 	if (client_is_banned(file_priv)) {
 		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
 			  current->comm,
@@ -826,7 +831,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_create_context(dev_priv, file_priv);
+	ctx = i915_gem_create_context(i915, file_priv);
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 04/25] drm/i915: Avoid reset lock in writing fence registers
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
  2019-02-19 12:21 ` [PATCH 02/25] drm/i915: Use time based guilty context banning Chris Wilson
  2019-02-19 12:21 ` [PATCH 03/25] drm/i915: Prevent user context creation while wedged Chris Wilson
@ 2019-02-19 12:21 ` Chris Wilson
  2019-02-20 14:55   ` Mika Kuoppala
  2019-02-19 12:21 ` [PATCH 05/25] drm/i915: Reorder struct_mutex-vs-reset_lock in i915_gem_fault() Chris Wilson
                   ` (25 subsequent siblings)
  28 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:21 UTC (permalink / raw)
  To: intel-gfx

The idea of taking the reset lock around writing the fence register was
to serialise the mmio write we also perform during the reset where those
registers get clobbered. However, the lock is overkill as write tearing
between reset and fence_update() is harmless; the final value of the
fence register is the same. A race between revoke_fences() and
fence_update() is also harmless at this point as on the fault path where
this is necessary, we acquire the reset lock to coordinate ourselves in
the upper layer.

The danger of acquiring the reset lock again in fence_update() is that
we may recurse from the shrinker along the i915_gem_fault() path.

<4> [125.739646] ============================================
<4> [125.739652] WARNING: possible recursive locking detected
<4> [125.739659] 5.0.0-rc6-ga6e4cbf00557-drmtip_223+ #1 Tainted: G     U
<4> [125.739666] --------------------------------------------
<4> [125.739672] gem_mmap_gtt/1017 is trying to acquire lock:
<4> [125.739679] 00000000a730190a (&dev_priv->gpu_error.reset_backoff_srcu){+.+.}, at: i915_reset_trylock+0x0/0x310 [i915]
<4> [125.739848]
but task is already holding lock:
<4> [125.739854] 00000000a730190a (&dev_priv->gpu_error.reset_backoff_srcu){+.+.}, at: i915_reset_trylock+0x192/0x310 [i915]
<4> [125.739918]
other info that might help us debug this:
<4> [125.739925]  Possible unsafe locking scenario:

<4> [125.739930]        CPU0
<4> [125.739934]        ----
<4> [125.739937]   lock(&dev_priv->gpu_error.reset_backoff_srcu);
<4> [125.739944]   lock(&dev_priv->gpu_error.reset_backoff_srcu);
<4> [125.739950]
 *** DEADLOCK ***

<4> [125.739958]  May be due to missing lock nesting notation

<4> [125.739966] 5 locks held by gem_mmap_gtt/1017:
<4> [125.739972]  #0: 00000000471f682c (&mm->mmap_sem){++++}, at: __do_page_fault+0x133/0x500
<4> [125.739987]  #1: 0000000026542685 (&dev->struct_mutex){+.+.}, at: i915_gem_fault+0x1f6/0x860 [i915]
<4> [125.740061]  #2: 00000000a730190a (&dev_priv->gpu_error.reset_backoff_srcu){+.+.}, at: i915_reset_trylock+0x192/0x310 [i915]
<4> [125.740126]  #3: 00000000c828eb4f (fs_reclaim){+.+.}, at: fs_reclaim_acquire.part.25+0x0/0x30
<4> [125.740140]  #4: 000000002d360d65 (shrinker_rwsem){++++}, at: shrink_slab+0x1cb/0x2c0
<4> [125.740151]
stack backtrace:
<4> [125.740159] CPU: 1 PID: 1017 Comm: gem_mmap_gtt Tainted: G     U            5.0.0-rc6-ga6e4cbf00557-drmtip_223+ #1
<4> [125.740170] Hardware name: Dell Inc.                 OptiPlex 745                 /0GW726, BIOS 2.3.1  05/21/2007
<4> [125.740180] Call Trace:
<4> [125.740189]  dump_stack+0x67/0x9b
<4> [125.740199]  __lock_acquire+0xc75/0x1b00
<4> [125.740209]  ? arch_tlb_finish_mmu+0x2a/0xa0
<4> [125.740216]  ? tlb_finish_mmu+0x1a/0x30
<4> [125.740222]  ? zap_page_range_single+0xe2/0x130
<4> [125.740230]  ? lock_acquire+0xa6/0x1c0
<4> [125.740237]  lock_acquire+0xa6/0x1c0
<4> [125.740296]  ? i915_clear_error_registers+0x280/0x280 [i915]
<4> [125.740357]  i915_reset_trylock+0x44/0x310 [i915]
<4> [125.740417]  ? i915_clear_error_registers+0x280/0x280 [i915]
<4> [125.740426]  ? lockdep_hardirqs_on+0xe0/0x1b0
<4> [125.740434]  ? _raw_spin_unlock_irqrestore+0x39/0x60
<4> [125.740499]  fence_update+0x218/0x470 [i915]
<4> [125.740571]  i915_vma_unbind+0xa6/0x550 [i915]
<4> [125.740640]  i915_gem_object_unbind+0xfa/0x190 [i915]
<4> [125.740711]  i915_gem_shrink+0x2dc/0x590 [i915]
<4> [125.740722]  ? ___preempt_schedule+0x16/0x18
<4> [125.740792]  ? i915_gem_shrinker_scan+0xc9/0x130 [i915]
<4> [125.740861]  i915_gem_shrinker_scan+0xc9/0x130 [i915]
<4> [125.740870]  do_shrink_slab+0x143/0x3f0
<4> [125.740878]  shrink_slab+0x228/0x2c0
<4> [125.740886]  shrink_node+0x167/0x450
<4> [125.740894]  do_try_to_free_pages+0xc4/0x340
<4> [125.740902]  try_to_free_pages+0xdc/0x2e0
<4> [125.740911]  __alloc_pages_nodemask+0x662/0x1110
<4> [125.740921]  ? reacquire_held_locks+0xb5/0x1b0
<4> [125.740928]  ? reacquire_held_locks+0xb5/0x1b0
<4> [125.740986]  ? i915_reset_trylock+0x192/0x310 [i915]
<4> [125.741045]  ? i915_memcpy_init_early+0x30/0x30 [i915]
<4> [125.741054]  pte_alloc_one+0x12/0x70
<4> [125.741060]  __pte_alloc+0x11/0xf0
<4> [125.741067]  apply_to_page_range+0x37e/0x440
<4> [125.741127]  remap_io_mapping+0x6c/0x100 [i915]
<4> [125.741196]  i915_gem_fault+0x5a9/0x860 [i915]
<4> [125.741204]  ? ptlock_alloc+0x15/0x30
<4> [125.741212]  __do_fault+0x2c/0xb0
<4> [125.741218]  __handle_mm_fault+0x8ee/0xfa0
<4> [125.741227]  handle_mm_fault+0x196/0x3a0
<4> [125.741235]  __do_page_fault+0x246/0x500
<4> [125.741243]  ? page_fault+0x8/0x30
<4> [125.741250]  page_fault+0x1e/0x30
<4> [125.741256] RIP: 0033:0x55d0cc456e12
<4> [125.741264] Code: b0 df ff ff 89 c2 8b 85 70 df ff ff 01 c2 8b 85 70 df ff ff 48 98 48 8d 0c 85 00 00 00 00 48 8b 85 e0 df ff ff 48 01 c8 f7 d2 <89> 10 83 85 70 df ff ff 01 81 bd 70 df ff ff ff 03 00 00 7e be 48
<4> [125.741280] RSP: 002b:00007ffc1bab7ab0 EFLAGS: 00010206
<4> [125.741287] RAX: 00007fc787cb6000 RBX: 0000000000000000 RCX: 0000000000000000
<4> [125.741295] RDX: 00000000ffffffff RSI: 0000000000005401 RDI: 0000000000000002
<4> [125.741303] RBP: 00007ffc1bab9b70 R08: 00007ffc1bab7920 R09: 000000000000001b
<4> [125.741310] R10: 7165722074736554 R11: 0000000000000246 R12: 000055d0cc454a80
<4> [125.741318] R13: 00007ffc1bab9f60 R14: 0000000000000000 R15: 0000000000000000

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 1ec1417cf8b4..65624b8e4d15 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -270,24 +270,16 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 		return 0;
 	}
 
-	ret = i915_reset_trylock(fence->i915);
-	if (ret < 0)
-		goto out_rpm;
-
+	WRITE_ONCE(fence->vma, vma);
 	fence_write(fence, vma);
-	fence->vma = vma;
 
 	if (vma) {
 		vma->fence = fence;
 		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
 	}
 
-	i915_reset_unlock(fence->i915, ret);
-	ret = 0;
-
-out_rpm:
 	intel_runtime_pm_put(fence->i915, wakeref);
-	return ret;
+	return 0;
 }
 
 /**
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 05/25] drm/i915: Reorder struct_mutex-vs-reset_lock in i915_gem_fault()
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (2 preceding siblings ...)
  2019-02-19 12:21 ` [PATCH 04/25] drm/i915: Avoid reset lock in writing fence registers Chris Wilson
@ 2019-02-19 12:21 ` Chris Wilson
  2019-02-19 12:21 ` [PATCH 06/25] drm/i915: Trim i915_do_reset() to minimum delays Chris Wilson
                   ` (24 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

Annoyingly, struct_mutex was not entirely eliminated from the reset
pathway; for reasons of its own, intel_display_resumes() requires
struct_mutex to prepare the planes it already captured. To avoid the
immediate problem of a deadlock between the struct_mutex and the reset
srcu, we have to acquire the reset_lock before struct_mutex in
i915_gem_fault(). Now any wait underneath struct_mutex will result us in
having to forcibly reset all inflight rendering, less than ideal.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b421bc7a2e26..b3129cb0a04d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1834,9 +1834,15 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 
 	wakeref = intel_runtime_pm_get(dev_priv);
 
+	srcu = i915_reset_trylock(dev_priv);
+	if (srcu < 0) {
+		ret = srcu;
+		goto err_rpm;
+	}
+
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
-		goto err_rpm;
+		goto err_reset;
 
 	/* Access to snoopable pages through the GTT is incoherent. */
 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
@@ -1885,12 +1891,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
-	srcu = i915_reset_trylock(dev_priv);
-	if (srcu < 0) {
-		ret = srcu;
-		goto err_fence;
-	}
-
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
 			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
@@ -1898,7 +1898,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
 			       &ggtt->iomap);
 	if (ret)
-		goto err_reset;
+		goto err_fence;
 
 	/* Mark as being mmapped into userspace for later revocation */
 	assert_rpm_wakelock_held(dev_priv);
@@ -1908,14 +1908,14 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 
 	i915_vma_set_ggtt_write(vma);
 
-err_reset:
-	i915_reset_unlock(dev_priv, srcu);
 err_fence:
 	i915_vma_unpin_fence(vma);
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
+err_reset:
+	i915_reset_unlock(dev_priv, srcu);
 err_rpm:
 	intel_runtime_pm_put(dev_priv, wakeref);
 	i915_gem_object_unpin_pages(obj);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 06/25] drm/i915: Trim i915_do_reset() to minimum delays
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (3 preceding siblings ...)
  2019-02-19 12:21 ` [PATCH 05/25] drm/i915: Reorder struct_mutex-vs-reset_lock in i915_gem_fault() Chris Wilson
@ 2019-02-19 12:21 ` Chris Wilson
  2019-02-19 12:21 ` [PATCH 07/25] drm/i915: Trim delays for wedging Chris Wilson
                   ` (23 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:21 UTC (permalink / raw)
  To: intel-gfx

Remove the "safety-factor" in our udelays for i915_do_reset(). If we are
told to hold the line high for 20us, do it only for 20us plus the tiny
bit of udelay latency.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reset.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 732f763a3999..358ab1d51570 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -167,12 +167,12 @@ static int i915_do_reset(struct drm_i915_private *i915,
 
 	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	udelay(50);
+	udelay(20);
 	err = wait_for_atomic(i915_in_reset(pdev), 50);
 
 	/* Clear the reset request. */
 	pci_write_config_byte(pdev, I915_GDRST, 0);
-	udelay(50);
+	udelay(20);
 	if (!err)
 		err = wait_for_atomic(!i915_in_reset(pdev), 50);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 07/25] drm/i915: Trim delays for wedging
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (4 preceding siblings ...)
  2019-02-19 12:21 ` [PATCH 06/25] drm/i915: Trim i915_do_reset() to minimum delays Chris Wilson
@ 2019-02-19 12:21 ` Chris Wilson
  2019-02-19 12:48   ` Mika Kuoppala
  2019-02-19 12:21 ` [PATCH 08/25] drm/i915/pmu: Always sample an active ringbuffer Chris Wilson
                   ` (22 subsequent siblings)
  28 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

CI still reports the occasional multi-second delay for resets, in
particular along the wedge+recovery paths. As the likely, and unbounded,
delay here is from sync_rcu, use the expedited variant instead.

Testcase: igt/gem_eio/unwedge-stress
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_reset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 358ab1d51570..9f6da5e4b007 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -835,7 +835,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 	 * either this call here to intel_engine_write_global_seqno, or the one
 	 * in nop_submit_request.
 	 */
-	synchronize_rcu();
+	synchronize_rcu_expedited();
 
 	/* Mark all executing requests as skipped */
 	for_each_engine(engine, i915, id)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 08/25] drm/i915/pmu: Always sample an active ringbuffer
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (5 preceding siblings ...)
  2019-02-19 12:21 ` [PATCH 07/25] drm/i915: Trim delays for wedging Chris Wilson
@ 2019-02-19 12:21 ` Chris Wilson
  2019-02-22 12:10   ` Mika Kuoppala
  2019-02-19 12:21 ` [PATCH 09/25] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno Chris Wilson
                   ` (21 subsequent siblings)
  28 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:21 UTC (permalink / raw)
  To: intel-gfx

As we no longer have a precise indication of requests queued to an
engine, make no presumptions and just sample the ring registers to see
if the engine is busy.

v2: Report busy while the ring is idling on a semaphore/event.
v3: Give the struct a name!
v4: Always 0 outside the powerwell; trusting the powerwell is
accurate enough for our sampling pmu.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 60 ++++++++++---------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 2 files changed, 24 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 13d70b90dd0f..21adad72bd86 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -148,14 +148,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 	spin_unlock_irq(&i915->pmu.lock);
 }
 
-static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
-{
-	if (!fw)
-		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
-
-	return true;
-}
-
 static void
 add_sample(struct i915_pmu_sample *sample, u32 val)
 {
@@ -168,49 +160,43 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	intel_wakeref_t wakeref;
-	bool fw = false;
 
 	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
 		return;
 
-	if (!dev_priv->gt.awake)
-		return;
-
-	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	wakeref = 0;
+	if (READ_ONCE(dev_priv->gt.awake))
+		wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
 	if (!wakeref)
 		return;
 
 	for_each_engine(engine, dev_priv, id) {
-		u32 current_seqno = intel_engine_get_seqno(engine);
-		u32 last_seqno = intel_engine_last_submit(engine);
+		struct intel_engine_pmu *pmu = &engine->pmu;
+		bool busy;
 		u32 val;
 
-		val = !i915_seqno_passed(current_seqno, last_seqno);
-
-		if (val)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
-				   period_ns);
-
-		if (val && (engine->pmu.enable &
-		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
-			fw = grab_forcewake(dev_priv, fw);
-
-			val = I915_READ_FW(RING_CTL(engine->mmio_base));
-		} else {
-			val = 0;
-		}
+		val = I915_READ_FW(RING_CTL(engine->mmio_base));
+		if (val == 0) /* powerwell off => engine idle */
+			continue;
 
 		if (val & RING_WAIT)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
-				   period_ns);
-
+			add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
 		if (val & RING_WAIT_SEMAPHORE)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
-				   period_ns);
-	}
+			add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
 
-	if (fw)
-		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+		/*
+		 * MI_MODE reports IDLE if the ring is waiting, but we regard
+		 * this as being busy instead, as the engine is busy with the
+		 * user request.
+		 */
+		busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
+		if (!busy) {
+			val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
+			busy = !(val & MODE_IDLE);
+		}
+		if (busy)
+			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
+	}
 
 	intel_runtime_pm_put(dev_priv, wakeref);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 710ffb221775..5d45ad4ecca9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -392,7 +392,7 @@ struct intel_engine_cs {
 		bool irq_armed;
 	} breadcrumbs;
 
-	struct {
+	struct intel_engine_pmu {
 		/**
 		 * @enable: Bitmask of enable sample events on this engine.
 		 *
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 09/25] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (6 preceding siblings ...)
  2019-02-19 12:21 ` [PATCH 08/25] drm/i915/pmu: Always sample an active ringbuffer Chris Wilson
@ 2019-02-19 12:21 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 10/25] drm/i915: Remove access to global seqno in the HWSP Chris Wilson
                   ` (20 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:21 UTC (permalink / raw)
  To: intel-gfx

To determine whether an engine has 'stuck', we simply check whether or
not is still on the same seqno for several seconds. To keep this simple
mechanism intact over the loss of a global seqno, we can simply add a
new global heartbeat seqno instead. As we cannot know the sequence in
which requests will then be completed, we use a primitive random number
generator instead (with a cycle long enough to not matter over an
interval of a few thousand requests between hangcheck samples).

The alternative to using a dedicated seqno on every request is to issue
a heartbeat request and query its progress through the system. Sadly
this requires us to reduce struct_mutex so that we can issue requests
without requiring that bkl.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  7 ++---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  5 ++--
 drivers/gpu/drm/i915/intel_hangcheck.c  |  6 ++---
 drivers/gpu/drm/i915/intel_lrc.c        | 15 +++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 36 +++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h | 19 ++++++++++++-
 6 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2aeea977283f..279ee54edcad 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1295,7 +1295,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	with_intel_runtime_pm(dev_priv, wakeref) {
 		for_each_engine(engine, dev_priv, id) {
 			acthd[id] = intel_engine_get_active_head(engine);
-			seqno[id] = intel_engine_get_seqno(engine);
+			seqno[id] = intel_engine_get_hangcheck_seqno(engine);
 		}
 
 		intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
@@ -1315,8 +1315,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	for_each_engine(engine, dev_priv, id) {
 		seq_printf(m, "%s:\n", engine->name);
 		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
-			   engine->hangcheck.seqno, seqno[id],
-			   intel_engine_last_submit(engine),
+			   engine->hangcheck.last_seqno,
+			   seqno[id],
+			   engine->hangcheck.next_seqno,
 			   jiffies_to_msecs(jiffies -
 					    engine->hangcheck.action_timestamp));
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2547e2e51db8..26cfb24caa5f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1499,10 +1499,11 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	if (i915_terminally_wedged(&engine->i915->gpu_error))
 		drm_printf(m, "*** WEDGED ***\n");
 
-	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
+	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n",
 		   intel_engine_get_seqno(engine),
 		   intel_engine_last_submit(engine),
-		   engine->hangcheck.seqno,
+		   engine->hangcheck.last_seqno,
+		   engine->hangcheck.next_seqno,
 		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
 	drm_printf(m, "\tReset count: %d (global %d)\n",
 		   i915_reset_engine_count(error, engine),
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index a219c796e56d..e04b2560369e 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -133,21 +133,21 @@ static void hangcheck_load_sample(struct intel_engine_cs *engine,
 				  struct hangcheck *hc)
 {
 	hc->acthd = intel_engine_get_active_head(engine);
-	hc->seqno = intel_engine_get_seqno(engine);
+	hc->seqno = intel_engine_get_hangcheck_seqno(engine);
 }
 
 static void hangcheck_store_sample(struct intel_engine_cs *engine,
 				   const struct hangcheck *hc)
 {
 	engine->hangcheck.acthd = hc->acthd;
-	engine->hangcheck.seqno = hc->seqno;
+	engine->hangcheck.last_seqno = hc->seqno;
 }
 
 static enum intel_engine_hangcheck_action
 hangcheck_get_action(struct intel_engine_cs *engine,
 		     const struct hangcheck *hc)
 {
-	if (engine->hangcheck.seqno != hc->seqno)
+	if (engine->hangcheck.last_seqno != hc->seqno)
 		return ENGINE_ACTIVE_SEQNO;
 
 	if (intel_engine_is_idle(engine))
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 34a0866959c5..c134b3ca2df3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -178,6 +178,12 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 		I915_GEM_HWS_INDEX_ADDR);
 }
 
+static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_HANGCHECK_ADDR);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -2206,6 +2212,10 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 				  request->fence.seqno,
 				  request->timeline->hwsp_offset);
 
+	cs = gen8_emit_ggtt_write(cs,
+				  intel_engine_next_hangcheck_seqno(request->engine),
+				  intel_hws_hangcheck_address(request->engine));
+
 	cs = gen8_emit_ggtt_write(cs,
 				  request->global_seqno,
 				  intel_hws_seqno_address(request->engine));
@@ -2230,6 +2240,11 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				      PIPE_CONTROL_FLUSH_ENABLE |
 				      PIPE_CONTROL_CS_STALL);
 
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      intel_engine_next_hangcheck_seqno(request->engine),
+				      intel_hws_hangcheck_address(request->engine),
+				      PIPE_CONTROL_CS_STALL);
+
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->global_seqno,
 				      intel_hws_seqno_address(request->engine),
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7f841dba87b3..870184bbd169 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,6 +43,12 @@
  */
 #define LEGACY_REQUEST_SIZE 200
 
+static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_HANGCHECK_ADDR);
+}
+
 static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 {
 	return (i915_ggtt_offset(engine->status_page.vma) +
@@ -316,6 +322,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_QW_WRITE;
+	*cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
 	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
@@ -422,6 +433,11 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = rq->timeline->hwsp_offset;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
+	*cs++ = hws_hangcheck_address(rq->engine);
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
 	*cs++ = (PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_GLOBAL_GTT_IVB |
@@ -447,12 +463,15 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -472,6 +491,10 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
@@ -487,6 +510,7 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = 0;
 
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -930,11 +954,16 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = MI_STORE_DWORD_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX_ADDR;
 	*cs++ = rq->global_seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -956,6 +985,10 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	BUILD_BUG_ON(GEN5_WA_STORES < 1);
 	for (i = 0; i < GEN5_WA_STORES; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
@@ -964,7 +997,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	}
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5d45ad4ecca9..2869aaa9d225 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -6,6 +6,7 @@
 
 #include <linux/hashtable.h>
 #include <linux/irq_work.h>
+#include <linux/random.h>
 #include <linux/seqlock.h>
 
 #include "i915_gem_batch_pool.h"
@@ -119,7 +120,8 @@ struct intel_instdone {
 
 struct intel_engine_hangcheck {
 	u64 acthd;
-	u32 seqno;
+	u32 last_seqno;
+	u32 next_seqno;
 	unsigned long action_timestamp;
 	struct intel_instdone instdone;
 };
@@ -726,6 +728,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32))
 #define I915_GEM_HWS_PREEMPT		0x32
 #define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
+#define I915_GEM_HWS_HANGCHECK		0x34
+#define I915_GEM_HWS_HANGCHECK_ADDR	(I915_GEM_HWS_HANGCHECK * sizeof(u32))
 #define I915_GEM_HWS_SEQNO		0x40
 #define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
 #define I915_GEM_HWS_SCRATCH		0x80
@@ -1086,4 +1090,17 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
 
 #endif
 
+static inline u32
+intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+	return engine->hangcheck.next_seqno =
+		next_pseudo_random32(engine->hangcheck.next_seqno);
+}
+
+static inline u32
+intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+	return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
+}
+
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 10/25] drm/i915: Remove access to global seqno in the HWSP
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (7 preceding siblings ...)
  2019-02-19 12:21 ` [PATCH 09/25] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 11/25] drm/i915: Remove i915_request.global_seqno Chris Wilson
                   ` (19 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

Stop accessing the HWSP to read the global seqno, and stop tracking the
mirror in the engine's execution timeline -- it is unused.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c         |  4 --
 drivers/gpu/drm/i915/i915_gpu_error.h         |  3 --
 drivers/gpu/drm/i915/i915_request.c           | 27 +++++--------
 drivers/gpu/drm/i915/i915_reset.c             |  1 -
 drivers/gpu/drm/i915/intel_engine_cs.c        | 14 +------
 drivers/gpu/drm/i915/intel_lrc.c              | 21 +++-------
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  7 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h       | 40 -------------------
 drivers/gpu/drm/i915/selftests/i915_request.c |  3 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  3 --
 10 files changed, 19 insertions(+), 104 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3f6eddb6f6de..061a767e3bed 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -526,8 +526,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 				   ee->vm_info.pp_dir_base);
 		}
 	}
-	err_printf(m, "  seqno: 0x%08x\n", ee->seqno);
-	err_printf(m, "  last_seqno: 0x%08x\n", ee->last_seqno);
 	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
 	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
 	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
@@ -1216,8 +1214,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 
 	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
 	ee->acthd = intel_engine_get_active_head(engine);
-	ee->seqno = intel_engine_get_seqno(engine);
-	ee->last_seqno = intel_engine_last_submit(engine);
 	ee->start = I915_READ_START(engine);
 	ee->head = I915_READ_HEAD(engine);
 	ee->tail = I915_READ_TAIL(engine);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 94eaf8ab9051..19ac102afaff 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -94,8 +94,6 @@ struct i915_gpu_state {
 		u32 cpu_ring_head;
 		u32 cpu_ring_tail;
 
-		u32 last_seqno;
-
 		/* Register state */
 		u32 start;
 		u32 tail;
@@ -108,7 +106,6 @@ struct i915_gpu_state {
 		u32 bbstate;
 		u32 instpm;
 		u32 instps;
-		u32 seqno;
 		u64 bbaddr;
 		u64 acthd;
 		u32 fault_reg;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a61e3a4fc9dc..c7db71e57af1 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -179,12 +179,11 @@ static void free_capture_list(struct i915_request *request)
 static void __retire_engine_request(struct intel_engine_cs *engine,
 				    struct i915_request *rq)
 {
-	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
+	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
 		  __func__, engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  rq->global_seqno,
-		  hwsp_seqno(rq),
-		  intel_engine_get_seqno(engine));
+		  hwsp_seqno(rq));
 
 	GEM_BUG_ON(!i915_request_completed(rq));
 
@@ -243,12 +242,11 @@ static void i915_request_retire(struct i915_request *request)
 {
 	struct i915_active_request *active, *next;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
 		  request->engine->name,
 		  request->fence.context, request->fence.seqno,
 		  request->global_seqno,
-		  hwsp_seqno(request),
-		  intel_engine_get_seqno(request->engine));
+		  hwsp_seqno(request));
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
@@ -305,12 +303,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 	struct intel_ring *ring = rq->ring;
 	struct i915_request *tmp;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
+	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
 		  rq->engine->name,
 		  rq->fence.context, rq->fence.seqno,
 		  rq->global_seqno,
-		  hwsp_seqno(rq),
-		  intel_engine_get_seqno(rq->engine));
+		  hwsp_seqno(rq));
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_request_completed(rq));
@@ -354,12 +351,11 @@ void __i915_request_submit(struct i915_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	u32 seqno;
 
-	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
+	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  engine->timeline.seqno + 1,
-		  hwsp_seqno(request),
-		  intel_engine_get_seqno(engine));
+		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->timeline.lock);
@@ -371,7 +367,6 @@ void __i915_request_submit(struct i915_request *request)
 
 	seqno = next_global_seqno(&engine->timeline);
 	GEM_BUG_ON(!seqno);
-	GEM_BUG_ON(intel_engine_signaled(engine, seqno));
 
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
@@ -409,12 +404,11 @@ void __i915_request_unsubmit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
+	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  request->global_seqno,
-		  hwsp_seqno(request),
-		  intel_engine_get_seqno(engine));
+		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->timeline.lock);
@@ -425,7 +419,6 @@ void __i915_request_unsubmit(struct i915_request *request)
 	 */
 	GEM_BUG_ON(!request->global_seqno);
 	GEM_BUG_ON(request->global_seqno != engine->timeline.seqno);
-	GEM_BUG_ON(intel_engine_has_completed(engine, request->global_seqno));
 	engine->timeline.seqno--;
 
 	/* We may be recursing from the signal callback of another i915 fence */
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 9f6da5e4b007..6e5813988637 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -788,7 +788,6 @@ static void nop_submit_request(struct i915_request *request)
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 	__i915_request_submit(request);
 	i915_request_mark_complete(request);
-	intel_engine_write_global_seqno(engine, request->global_seqno);
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
 	intel_engine_queue_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 26cfb24caa5f..ebff5c768655 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -455,12 +455,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 	return err;
 }
 
-void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
-{
-	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-	GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
-}
-
 static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 {
 	i915_gem_batch_pool_init(&engine->batch_pool, engine);
@@ -1013,10 +1007,6 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (i915_terminally_wedged(&dev_priv->gpu_error))
 		return true;
 
-	/* Any inflight/incomplete requests? */
-	if (!intel_engine_signaled(engine, intel_engine_last_submit(engine)))
-		return false;
-
 	/* Waiting to drain ELSP? */
 	if (READ_ONCE(engine->execlists.active)) {
 		struct tasklet_struct *t = &engine->execlists.tasklet;
@@ -1499,9 +1489,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	if (i915_terminally_wedged(&engine->i915->gpu_error))
 		drm_printf(m, "*** WEDGED ***\n");
 
-	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n",
-		   intel_engine_get_seqno(engine),
-		   intel_engine_last_submit(engine),
+	drm_printf(m, "\tHangcheck %x:%x [%d ms]\n",
 		   engine->hangcheck.last_seqno,
 		   engine->hangcheck.next_seqno,
 		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c134b3ca2df3..a820f32f0d70 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -528,13 +528,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
 				  engine->name, n,
 				  port[n].context_id, count,
 				  rq->global_seqno,
 				  rq->fence.context, rq->fence.seqno,
 				  hwsp_seqno(rq),
-				  intel_engine_get_seqno(engine),
 				  rq_prio(rq));
 		} else {
 			GEM_BUG_ON(!n);
@@ -840,13 +839,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 	while (num_ports-- && port_isset(port)) {
 		struct i915_request *rq = port_request(port);
 
-		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
+		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
 			  rq->engine->name,
 			  (unsigned int)(port - execlists->port),
 			  rq->global_seqno,
 			  rq->fence.context, rq->fence.seqno,
-			  hwsp_seqno(rq),
-			  intel_engine_get_seqno(rq->engine));
+			  hwsp_seqno(rq));
 
 		GEM_BUG_ON(!execlists->active);
 		execlists_context_schedule_out(rq,
@@ -902,8 +900,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	struct rb_node *rb;
 	unsigned long flags;
 
-	GEM_TRACE("%s current %d\n",
-		  engine->name, intel_engine_get_seqno(engine));
+	GEM_TRACE("%s\n", engine->name);
 
 	/*
 	 * Before we call engine->cancel_requests(), we should have exclusive
@@ -952,10 +949,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
 
-	intel_write_status_page(engine,
-				I915_GEM_HWS_INDEX,
-				intel_engine_last_submit(engine));
-
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
 	execlists->queue_priority_hint = INT_MIN;
@@ -1071,14 +1064,13 @@ static void process_csb(struct intel_engine_cs *engine)
 						EXECLISTS_ACTIVE_USER));
 
 		rq = port_unpack(port, &count);
-		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
+		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
 			  engine->name,
 			  port->context_id, count,
 			  rq ? rq->global_seqno : 0,
 			  rq ? rq->fence.context : 0,
 			  rq ? rq->fence.seqno : 0,
 			  rq ? hwsp_seqno(rq) : 0,
-			  intel_engine_get_seqno(engine),
 			  rq ? rq_prio(rq) : 0);
 
 		/* Check the context/desc id for this event matches */
@@ -1946,10 +1938,9 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	/* Following the reset, we need to reload the CSB read/write pointers */
 	reset_csb_pointers(&engine->execlists);
 
-	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+	GEM_TRACE("%s seqno=%d, stalled? %s\n",
 		  engine->name,
 		  rq ? rq->global_seqno : 0,
-		  intel_engine_get_seqno(engine),
 		  yesno(stalled));
 	if (!rq)
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 870184bbd169..2d59e2990448 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -782,10 +782,9 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 		}
 	}
 
-	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+	GEM_TRACE("%s seqno=%d, stalled? %s\n",
 		  engine->name,
 		  rq ? rq->global_seqno : 0,
-		  intel_engine_get_seqno(engine),
 		  yesno(stalled));
 	/*
 	 * The guilty request will get skipped on a hung engine.
@@ -924,10 +923,6 @@ static void cancel_requests(struct intel_engine_cs *engine)
 		i915_request_mark_complete(request);
 	}
 
-	intel_write_status_page(engine,
-				I915_GEM_HWS_INDEX,
-				intel_engine_last_submit(engine));
-
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2869aaa9d225..551b3daa741c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -848,8 +848,6 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
 	return (head - tail - CACHELINE_BYTES) & (size - 1);
 }
 
-void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
-
 int intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
@@ -867,44 +865,6 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
 
-static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
-{
-	/*
-	 * We are only peeking at the tail of the submit queue (and not the
-	 * queue itself) in order to gain a hint as to the current active
-	 * state of the engine. Callers are not expected to be taking
-	 * engine->timeline->lock, nor are they expected to be concerned
-	 * wtih serialising this hint with anything, so document it as
-	 * a hint and nothing more.
-	 */
-	return READ_ONCE(engine->timeline.seqno);
-}
-
-static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
-{
-	return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
-}
-
-static inline bool intel_engine_signaled(struct intel_engine_cs *engine,
-					 u32 seqno)
-{
-	return i915_seqno_passed(intel_engine_get_seqno(engine), seqno);
-}
-
-static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
-					      u32 seqno)
-{
-	GEM_BUG_ON(!seqno);
-	return intel_engine_signaled(engine, seqno);
-}
-
-static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
-					    u32 seqno)
-{
-	GEM_BUG_ON(!seqno);
-	return intel_engine_signaled(engine, seqno - 1);
-}
-
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 6733dc5b6b4c..074d393f4a02 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -226,8 +226,7 @@ static int igt_request_rewind(void *arg)
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
-		pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n",
-		       vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS]));
+		pr_err("timed out waiting for high priority request\n");
 		goto err;
 	}
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 08f0cab02e0f..79bf27606ab8 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -86,8 +86,6 @@ static struct mock_request *first_request(struct mock_engine *engine)
 static void advance(struct mock_request *request)
 {
 	list_del_init(&request->link);
-	intel_engine_write_global_seqno(request->base.engine,
-					request->base.global_seqno);
 	i915_request_mark_complete(&request->base);
 	GEM_BUG_ON(!i915_request_completed(&request->base));
 
@@ -278,7 +276,6 @@ void mock_engine_flush(struct intel_engine_cs *engine)
 
 void mock_engine_reset(struct intel_engine_cs *engine)
 {
-	intel_engine_write_global_seqno(engine, 0);
 }
 
 void mock_engine_free(struct intel_engine_cs *engine)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 11/25] drm/i915: Remove i915_request.global_seqno
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (8 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 10/25] drm/i915: Remove access to global seqno in the HWSP Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 12/25] drm/i915/selftests: Exercise resetting during non-user payloads Chris Wilson
                   ` (18 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

Having weaned the interrupt handling off using a single global execution
queue, we no longer need to emit a global_seqno. Note that we still have
a few assumptions about execution order along engine timelines, but this
removes the most obvious artefact!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c         | 34 ++-----------
 drivers/gpu/drm/i915/i915_gpu_error.h         |  2 -
 drivers/gpu/drm/i915/i915_request.c           | 34 ++-----------
 drivers/gpu/drm/i915/i915_request.h           | 32 ------------
 drivers/gpu/drm/i915/i915_trace.h             | 25 +++-------
 drivers/gpu/drm/i915/intel_engine_cs.c        |  5 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c              | 34 ++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.c       | 50 +++----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h       |  2 -
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  5 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  1 -
 12 files changed, 32 insertions(+), 194 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 061a767e3bed..fa86c60fb56c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -380,19 +380,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 	err_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
+		err_printf(m, "    %08x_%08x %8u %02x %02x",
 			   upper_32_bits(err->gtt_offset),
 			   lower_32_bits(err->gtt_offset),
 			   err->size,
 			   err->read_domains,
-			   err->write_domain,
-			   err->wseqno);
+			   err->write_domain);
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
 		err_puts(m, err->userptr ? " userptr" : "");
-		err_puts(m, err->engine != -1 ? " " : "");
-		err_puts(m, engine_name(m->i915, err->engine));
 		err_puts(m, i915_cache_level_str(m->i915, err->cache_level));
 
 		if (err->name)
@@ -1048,27 +1045,6 @@ i915_error_object_create(struct drm_i915_private *i915,
 	return dst;
 }
 
-/* The error capture is special as tries to run underneath the normal
- * locking rules - so we use the raw version of the i915_active_request lookup.
- */
-static inline u32
-__active_get_seqno(struct i915_active_request *active)
-{
-	struct i915_request *request;
-
-	request = __i915_active_request_peek(active);
-	return request ? request->global_seqno : 0;
-}
-
-static inline int
-__active_get_engine_id(struct i915_active_request *active)
-{
-	struct i915_request *request;
-
-	request = __i915_active_request_peek(active);
-	return request ? request->engine->id : -1;
-}
-
 static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
@@ -1077,9 +1053,6 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->size = obj->base.size;
 	err->name = obj->base.name;
 
-	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
-	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
-
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->read_domains;
 	err->write_domain = obj->write_domain;
@@ -1284,7 +1257,8 @@ static void record_request(struct i915_request *request,
 	struct i915_gem_context *ctx = request->gem_context;
 
 	erq->flags = request->fence.flags;
-	erq->context = ctx->hw_id;
+	erq->context = request->fence.context;
+	erq->seqno = request->fence.seqno;
 	erq->sched_attr = request->sched.attr;
 	erq->jiffies = request->emitted_jiffies;
 	erq->start = i915_ggtt_offset(request->ring->vma);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 19ac102afaff..8c1569c1830d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -164,7 +164,6 @@ struct i915_gpu_state {
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
-		u32 wseqno;
 		u64 gtt_offset;
 		u32 read_domains;
 		u32 write_domain;
@@ -173,7 +172,6 @@ struct i915_gpu_state {
 		u32 dirty:1;
 		u32 purgeable:1;
 		u32 userptr:1;
-		s32 engine:4;
 		u32 cache_level:3;
 	} *active_bo[I915_NUM_ENGINES], *pinned_bo;
 	u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c7db71e57af1..04424932d479 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -179,10 +179,9 @@ static void free_capture_list(struct i915_request *request)
 static void __retire_engine_request(struct intel_engine_cs *engine,
 				    struct i915_request *rq)
 {
-	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n",
 		  __func__, engine->name,
 		  rq->fence.context, rq->fence.seqno,
-		  rq->global_seqno,
 		  hwsp_seqno(rq));
 
 	GEM_BUG_ON(!i915_request_completed(rq));
@@ -242,10 +241,9 @@ static void i915_request_retire(struct i915_request *request)
 {
 	struct i915_active_request *active, *next;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
 		  request->engine->name,
 		  request->fence.context, request->fence.seqno,
-		  request->global_seqno,
 		  hwsp_seqno(request));
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
@@ -303,10 +301,9 @@ void i915_request_retire_upto(struct i915_request *rq)
 	struct intel_ring *ring = rq->ring;
 	struct i915_request *tmp;
 
-	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
 		  rq->engine->name,
 		  rq->fence.context, rq->fence.seqno,
-		  rq->global_seqno,
 		  hwsp_seqno(rq));
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
@@ -339,22 +336,13 @@ static void move_to_timeline(struct i915_request *request,
 	spin_unlock(&request->timeline->lock);
 }
 
-static u32 next_global_seqno(struct i915_timeline *tl)
-{
-	if (!++tl->seqno)
-		++tl->seqno;
-	return tl->seqno;
-}
-
 void __i915_request_submit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	u32 seqno;
 
-	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld -> current %d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
-		  engine->timeline.seqno + 1,
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
@@ -363,16 +351,10 @@ void __i915_request_submit(struct i915_request *request)
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
 
-	GEM_BUG_ON(request->global_seqno);
-
-	seqno = next_global_seqno(&engine->timeline);
-	GEM_BUG_ON(!seqno);
-
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
-	request->global_seqno = seqno;
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
 	    !i915_request_enable_breadcrumb(request))
 		intel_engine_queue_breadcrumbs(engine);
@@ -404,10 +386,9 @@ void __i915_request_unsubmit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
-		  request->global_seqno,
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
@@ -417,13 +398,9 @@ void __i915_request_unsubmit(struct i915_request *request)
 	 * Only unwind in reverse order, required so that the per-context list
 	 * is kept in seqno/ring order.
 	 */
-	GEM_BUG_ON(!request->global_seqno);
-	GEM_BUG_ON(request->global_seqno != engine->timeline.seqno);
-	engine->timeline.seqno--;
 
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
-	request->global_seqno = 0;
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
 		i915_request_cancel_breadcrumb(request);
 	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
@@ -636,7 +613,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	i915_sched_node_init(&rq->sched);
 
 	/* No zalloc, must clear what we need by hand */
-	rq->global_seqno = 0;
 	rq->file_priv = NULL;
 	rq->batch = NULL;
 	rq->capture_list = NULL;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 40f3e8dcbdd5..1e127c1c53fa 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -147,14 +147,6 @@ struct i915_request {
 	 */
 	const u32 *hwsp_seqno;
 
-	/**
-	 * GEM sequence number associated with this request on the
-	 * global execution timeline. It is zero when the request is not
-	 * on the HW queue (i.e. not on the engine timeline list).
-	 * Its value is guarded by the timeline spinlock.
-	 */
-	u32 global_seqno;
-
 	/** Position in the ring of the start of the request */
 	u32 head;
 
@@ -247,30 +239,6 @@ i915_request_put(struct i915_request *rq)
 	dma_fence_put(&rq->fence);
 }
 
-/**
- * i915_request_global_seqno - report the current global seqno
- * @request - the request
- *
- * A request is assigned a global seqno only when it is on the hardware
- * execution queue. The global seqno can be used to maintain a list of
- * requests on the same engine in retirement order, for example for
- * constructing a priority queue for waiting. Prior to its execution, or
- * if it is subsequently removed in the event of preemption, its global
- * seqno is zero. As both insertion and removal from the execution queue
- * may operate in IRQ context, it is not guarded by the usual struct_mutex
- * BKL. Instead those relying on the global seqno must be prepared for its
- * value to change between reads. Only when the request is complete can
- * the global seqno be stable (due to the memory barriers on submitting
- * the commands to the hardware to write the breadcrumb, if the HWS shows
- * that it has passed the global seqno and the global seqno is unchanged
- * after the read, it is indeed complete).
- */
-static inline u32
-i915_request_global_seqno(const struct i915_request *request)
-{
-	return READ_ONCE(request->global_seqno);
-}
-
 int i915_request_await_object(struct i915_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write);
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 0d9cedb892b0..12893304c8f8 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -708,7 +708,6 @@ DECLARE_EVENT_CLASS(i915_request,
 			     __field(u16, class)
 			     __field(u16, instance)
 			     __field(u32, seqno)
-			     __field(u32, global)
 			     ),
 
 	    TP_fast_assign(
@@ -718,13 +717,11 @@ DECLARE_EVENT_CLASS(i915_request,
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
-			   __entry->global = rq->global_seqno;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u",
+	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
-		      __entry->global)
+		      __entry->hw_id, __entry->ctx, __entry->seqno)
 );
 
 DEFINE_EVENT(i915_request, i915_request_add,
@@ -754,7 +751,6 @@ TRACE_EVENT(i915_request_in,
 			     __field(u16, class)
 			     __field(u16, instance)
 			     __field(u32, seqno)
-			     __field(u32, global_seqno)
 			     __field(u32, port)
 			     __field(u32, prio)
 			    ),
@@ -766,15 +762,14 @@ TRACE_EVENT(i915_request_in,
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
-			   __entry->global_seqno = rq->global_seqno;
 			   __entry->prio = rq->sched.attr.priority;
 			   __entry->port = port;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, global=%u, port=%u",
+	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
 		      __entry->dev, __entry->class, __entry->instance,
 		      __entry->hw_id, __entry->ctx, __entry->seqno,
-		      __entry->prio, __entry->global_seqno, __entry->port)
+		      __entry->prio, __entry->port)
 );
 
 TRACE_EVENT(i915_request_out,
@@ -788,7 +783,6 @@ TRACE_EVENT(i915_request_out,
 			     __field(u16, class)
 			     __field(u16, instance)
 			     __field(u32, seqno)
-			     __field(u32, global_seqno)
 			     __field(u32, completed)
 			    ),
 
@@ -799,14 +793,13 @@ TRACE_EVENT(i915_request_out,
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
-			   __entry->global_seqno = rq->global_seqno;
 			   __entry->completed = i915_request_completed(rq);
 			   ),
 
-		    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, completed?=%u",
+		    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u",
 			      __entry->dev, __entry->class, __entry->instance,
 			      __entry->hw_id, __entry->ctx, __entry->seqno,
-			      __entry->global_seqno, __entry->completed)
+			      __entry->completed)
 );
 
 #else
@@ -849,7 +842,6 @@ TRACE_EVENT(i915_request_wait_begin,
 			     __field(u16, class)
 			     __field(u16, instance)
 			     __field(u32, seqno)
-			     __field(u32, global)
 			     __field(unsigned int, flags)
 			     ),
 
@@ -866,14 +858,13 @@ TRACE_EVENT(i915_request_wait_begin,
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
-			   __entry->global = rq->global_seqno;
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, blocking=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
 		      __entry->hw_id, __entry->ctx, __entry->seqno,
-		      __entry->global, !!(__entry->flags & I915_WAIT_LOCKED),
+		      !!(__entry->flags & I915_WAIT_LOCKED),
 		      __entry->flags)
 );
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ebff5c768655..45ba3a71d17b 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1273,15 +1273,14 @@ static void print_request(struct drm_printer *m,
 
 	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
 
-	drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n",
+	drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
 		   prefix,
-		   rq->global_seqno,
+		   rq->fence.context, rq->fence.seqno,
 		   i915_request_completed(rq) ? "!" :
 		   i915_request_started(rq) ? "*" :
 		   "",
 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 			    &rq->fence.flags) ?  "+" : "",
-		   rq->fence.context, rq->fence.seqno,
 		   buf,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
 		   name);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 8bc8aa54aa35..20cbceeabeae 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -535,7 +535,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 	spin_lock(&client->wq_lock);
 
 	guc_wq_item_append(client, engine->guc_id, ctx_desc,
-			   ring_tail, rq->global_seqno);
+			   ring_tail, rq->fence.seqno);
 	guc_ring_doorbell(client);
 
 	client->submissions[engine->id] += 1;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a820f32f0d70..d638ce9de089 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -172,12 +172,6 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
-	return (i915_ggtt_offset(engine->status_page.vma) +
-		I915_GEM_HWS_INDEX_ADDR);
-}
-
 static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine)
 {
 	return (i915_ggtt_offset(engine->status_page.vma) +
@@ -528,10 +522,9 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
 				  engine->name, n,
 				  port[n].context_id, count,
-				  rq->global_seqno,
 				  rq->fence.context, rq->fence.seqno,
 				  hwsp_seqno(rq),
 				  rq_prio(rq));
@@ -839,10 +832,9 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 	while (num_ports-- && port_isset(port)) {
 		struct i915_request *rq = port_request(port);
 
-		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
+		GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
 			  rq->engine->name,
 			  (unsigned int)(port - execlists->port),
-			  rq->global_seqno,
 			  rq->fence.context, rq->fence.seqno,
 			  hwsp_seqno(rq));
 
@@ -924,8 +916,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	/* Mark all executing requests as skipped. */
 	list_for_each_entry(rq, &engine->timeline.requests, link) {
-		GEM_BUG_ON(!rq->global_seqno);
-
 		if (!i915_request_signaled(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
 
@@ -1064,10 +1054,9 @@ static void process_csb(struct intel_engine_cs *engine)
 						EXECLISTS_ACTIVE_USER));
 
 		rq = port_unpack(port, &count);
-		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
+		GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
 			  engine->name,
 			  port->context_id, count,
-			  rq ? rq->global_seqno : 0,
 			  rq ? rq->fence.context : 0,
 			  rq ? rq->fence.seqno : 0,
 			  rq ? hwsp_seqno(rq) : 0,
@@ -1938,10 +1927,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	/* Following the reset, we need to reload the CSB read/write pointers */
 	reset_csb_pointers(&engine->execlists);
 
-	GEM_TRACE("%s seqno=%d, stalled? %s\n",
-		  engine->name,
-		  rq ? rq->global_seqno : 0,
-		  yesno(stalled));
+	GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled));
 	if (!rq)
 		goto out_unlock;
 
@@ -2196,9 +2182,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
-	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
-	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
-
 	cs = gen8_emit_ggtt_write(cs,
 				  request->fence.seqno,
 				  request->timeline->hwsp_offset);
@@ -2207,10 +2190,6 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 				  intel_engine_next_hangcheck_seqno(request->engine),
 				  intel_hws_hangcheck_address(request->engine));
 
-	cs = gen8_emit_ggtt_write(cs,
-				  request->global_seqno,
-				  intel_hws_seqno_address(request->engine));
-
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 
@@ -2236,11 +2215,6 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				      intel_hws_hangcheck_address(request->engine),
 				      PIPE_CONTROL_CS_STALL);
 
-	cs = gen8_emit_ggtt_write_rcs(cs,
-				      request->global_seqno,
-				      intel_hws_seqno_address(request->engine),
-				      PIPE_CONTROL_CS_STALL);
-
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2d59e2990448..1b96b0960adc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -49,12 +49,6 @@ static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine)
 		I915_GEM_HWS_HANGCHECK_ADDR);
 }
 
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
-	return (i915_ggtt_offset(engine->status_page.vma) +
-		I915_GEM_HWS_INDEX_ADDR);
-}
-
 unsigned int intel_ring_update_space(struct intel_ring *ring)
 {
 	unsigned int space;
@@ -327,11 +321,6 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
 
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
-	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
-	*cs++ = rq->global_seqno;
-
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 
@@ -438,13 +427,6 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = hws_hangcheck_address(rq->engine);
 	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
 
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = (PIPE_CONTROL_QW_WRITE |
-		 PIPE_CONTROL_GLOBAL_GTT_IVB |
-		 PIPE_CONTROL_CS_STALL);
-	*cs++ = intel_hws_seqno_address(rq->engine);
-	*cs++ = rq->global_seqno;
-
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 
@@ -467,11 +449,8 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
 
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
-	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
-	*cs++ = rq->global_seqno;
-
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -495,10 +474,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
 
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
-	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
-	*cs++ = rq->global_seqno;
-
 	for (i = 0; i < GEN7_XCS_WA; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
 		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
@@ -510,7 +485,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = 0;
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -782,10 +756,8 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 		}
 	}
 
-	GEM_TRACE("%s seqno=%d, stalled? %s\n",
-		  engine->name,
-		  rq ? rq->global_seqno : 0,
-		  yesno(stalled));
+	GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled));
+
 	/*
 	 * The guilty request will get skipped on a hung engine.
 	 *
@@ -915,8 +887,6 @@ static void cancel_requests(struct intel_engine_cs *engine)
 
 	/* Mark all submitted requests as skipped. */
 	list_for_each_entry(request, &engine->timeline.requests, link) {
-		GEM_BUG_ON(!request->global_seqno);
-
 		if (!i915_request_signaled(request))
 			dma_fence_set_error(&request->fence, -EIO);
 
@@ -953,12 +923,7 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
 	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
 
-	*cs++ = MI_STORE_DWORD_INDEX;
-	*cs++ = I915_GEM_HWS_INDEX_ADDR;
-	*cs++ = rq->global_seqno;
-
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -976,10 +941,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	*cs++ = MI_FLUSH;
 
-	*cs++ = MI_STORE_DWORD_INDEX;
-	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
-	*cs++ = rq->fence.seqno;
-
 	*cs++ = MI_STORE_DWORD_INDEX;
 	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
 	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
@@ -987,11 +948,12 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	BUILD_BUG_ON(GEN5_WA_STORES < 1);
 	for (i = 0; i < GEN5_WA_STORES; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
-		*cs++ = I915_GEM_HWS_INDEX_ADDR;
-		*cs++ = rq->global_seqno;
+		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
+		*cs++ = rq->fence.seqno;
 	}
 
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 551b3daa741c..de8dba7565b0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -724,8 +724,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
  *
  * The area from dword 0x30 to 0x3ff is available for driver usage.
  */
-#define I915_GEM_HWS_INDEX		0x30
-#define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32))
 #define I915_GEM_HWS_PREEMPT		0x32
 #define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
 #define I915_GEM_HWS_HANGCHECK		0x34
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index c32bc31192ae..e975450d78a1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -571,11 +571,10 @@ static int active_request_put(struct i915_request *rq)
 		return 0;
 
 	if (i915_request_wait(rq, 0, 5 * HZ) < 0) {
-		GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld, seqno %d.\n",
+		GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n",
 			  rq->engine->name,
 			  rq->fence.context,
-			  rq->fence.seqno,
-			  i915_request_global_seqno(rq));
+			  rq->fence.seqno);
 		GEM_TRACE_DUMP();
 
 		i915_gem_set_wedged(rq->i915);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 79bf27606ab8..6f3fb803c747 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -196,7 +196,6 @@ static void mock_submit_request(struct i915_request *request)
 	unsigned long flags;
 
 	i915_request_submit(request);
-	GEM_BUG_ON(!request->global_seqno);
 
 	spin_lock_irqsave(&engine->hw_lock, flags);
 	list_add_tail(&mock->link, &engine->hw_queue);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 12/25] drm/i915/selftests: Exercise resetting during non-user payloads
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (9 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 11/25] drm/i915: Remove i915_request.global_seqno Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 13/25] drm/i915: Reduce the RPS shock Chris Wilson
                   ` (17 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

In selftests/live_hangcheck, we have a lot of tests for resetting simple
spinners, but nothing quite prepared us for how the GPU reacted to
triggering a reset outside of the safe spinner. These two subtests fill
the ring with plain old empty, non-spinning requests, and then triggers
a reset. Without a user-payload to blame, these requests will exercise
the 'non-started' paths and mostly be replayed verbatim.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 .../gpu/drm/i915/selftests/intel_hangcheck.c  | 218 ++++++++++++++++++
 1 file changed, 218 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index e975450d78a1..3045035569e1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -415,6 +415,222 @@ static bool wait_for_idle(struct intel_engine_cs *engine)
 	return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0;
 }
 
+static int igt_reset_nop(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	unsigned int reset_count, count;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	struct drm_file *file;
+	IGT_TIMEOUT(end_time);
+	int err = 0;
+
+	/* Check that we can reset during non-user portions of requests */
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	ctx = live_context(i915, file);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+
+	i915_gem_context_clear_bannable(ctx);
+	wakeref = intel_runtime_pm_get(i915);
+	reset_count = i915_reset_count(&i915->gpu_error);
+	count = 0;
+	do {
+		mutex_lock(&i915->drm.struct_mutex);
+		for_each_engine(engine, i915, id) {
+			int i;
+
+			for (i = 0; i < 16; i++) {
+				struct i915_request *rq;
+
+				rq = i915_request_alloc(engine, ctx);
+				if (IS_ERR(rq)) {
+					err = PTR_ERR(rq);
+					break;
+				}
+
+				i915_request_add(rq);
+			}
+		}
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		igt_global_reset_lock(i915);
+		i915_reset(i915, ALL_ENGINES, NULL);
+		igt_global_reset_unlock(i915);
+		if (i915_terminally_wedged(&i915->gpu_error)) {
+			err = -EIO;
+			break;
+		}
+
+		if (i915_reset_count(&i915->gpu_error) !=
+		    reset_count + ++count) {
+			pr_err("Full GPU reset not recorded!\n");
+			err = -EINVAL;
+			break;
+		}
+
+		if (!i915_reset_flush(i915)) {
+			struct drm_printer p =
+				drm_info_printer(i915->drm.dev);
+
+			pr_err("%s failed to idle after reset\n",
+			       engine->name);
+			intel_engine_dump(engine, &p,
+					  "%s\n", engine->name);
+
+			err = -EIO;
+			break;
+		}
+
+		err = igt_flush_test(i915, 0);
+		if (err)
+			break;
+	} while (time_before(jiffies, end_time));
+	pr_info("%s: %d resets\n", __func__, count);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	intel_runtime_pm_put(i915, wakeref);
+
+out:
+	mock_file_free(i915, file);
+	if (i915_terminally_wedged(&i915->gpu_error))
+		err = -EIO;
+	return err;
+}
+
+static int igt_reset_nop_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	struct drm_file *file;
+	int err = 0;
+
+	/* Check that we can engine-reset during non-user portions */
+
+	if (!intel_has_reset_engine(i915))
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	ctx = live_context(i915, file);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+
+	i915_gem_context_clear_bannable(ctx);
+	wakeref = intel_runtime_pm_get(i915);
+	for_each_engine(engine, i915, id) {
+		unsigned int reset_count, reset_engine_count;
+		unsigned int count;
+		IGT_TIMEOUT(end_time);
+
+		reset_count = i915_reset_count(&i915->gpu_error);
+		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
+							     engine);
+		count = 0;
+
+		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+		do {
+			int i;
+
+			if (!wait_for_idle(engine)) {
+				pr_err("%s failed to idle before reset\n",
+				       engine->name);
+				err = -EIO;
+				break;
+			}
+
+			mutex_lock(&i915->drm.struct_mutex);
+			for (i = 0; i < 16; i++) {
+				struct i915_request *rq;
+
+				rq = i915_request_alloc(engine, ctx);
+				if (IS_ERR(rq)) {
+					err = PTR_ERR(rq);
+					break;
+				}
+
+				i915_request_add(rq);
+			}
+			mutex_unlock(&i915->drm.struct_mutex);
+
+			err = i915_reset_engine(engine, NULL);
+			if (err) {
+				pr_err("i915_reset_engine failed\n");
+				break;
+			}
+
+			if (i915_reset_count(&i915->gpu_error) != reset_count) {
+				pr_err("Full GPU reset recorded! (engine reset expected)\n");
+				err = -EINVAL;
+				break;
+			}
+
+			if (i915_reset_engine_count(&i915->gpu_error, engine) !=
+			    reset_engine_count + ++count) {
+				pr_err("%s engine reset not recorded!\n",
+				       engine->name);
+				err = -EINVAL;
+				break;
+			}
+
+			if (!i915_reset_flush(i915)) {
+				struct drm_printer p =
+					drm_info_printer(i915->drm.dev);
+
+				pr_err("%s failed to idle after reset\n",
+				       engine->name);
+				intel_engine_dump(engine, &p,
+						  "%s\n", engine->name);
+
+				err = -EIO;
+				break;
+			}
+		} while (time_before(jiffies, end_time));
+		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+		pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
+
+		if (err)
+			break;
+
+		err = igt_flush_test(i915, 0);
+		if (err)
+			break;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	intel_runtime_pm_put(i915, wakeref);
+out:
+	mock_file_free(i915, file);
+	if (i915_terminally_wedged(&i915->gpu_error))
+		err = -EIO;
+	return err;
+}
+
 static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 {
 	struct intel_engine_cs *engine;
@@ -1646,6 +1862,8 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_global_reset), /* attempt to recover GPU first */
 		SUBTEST(igt_wedged_reset),
 		SUBTEST(igt_hang_sanitycheck),
+		SUBTEST(igt_reset_nop),
+		SUBTEST(igt_reset_nop_engine),
 		SUBTEST(igt_reset_idle_engine),
 		SUBTEST(igt_reset_active_engine),
 		SUBTEST(igt_reset_engines),
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 13/25] drm/i915: Reduce the RPS shock
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (10 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 12/25] drm/i915/selftests: Exercise resetting during non-user payloads Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 21:00   ` Lyude Paul
  2019-02-20 15:14   ` Mika Kuoppala
  2019-02-19 12:22 ` [PATCH 14/25] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
                   ` (16 subsequent siblings)
  28 siblings, 2 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx; +Cc: Michel Thierry, Eero Tamminen

Limit deboosting and boosting to keep ourselves at the extremes
when in the respective power modes (i.e. slowly decrease frequencies
while in the HIGH_POWER zone and slowly increase frequencies while
in the LOW_POWER zone). On idle, we will hit the timeout and drop
to the next level quickly, and conversely if busy we expect to
hit a waitboost and rapidly switch into max power.

This should improve the UX experience by keeping the GPU clocks higher
than they ostensibly should be (based on simple busyness) by switching
into the INTERACTIVE mode (due to waiting for pageflips) and increasing
clocks via waitboosting. This will incur some additional power, our
saving grace should be rc6 and powergating to keep the extra current
draw in check.

Food for future thought would be deadline scheduling? If we know certain
contexts (high priority compositors) absolutely must hit the next vblank
then we can raise the frequencies ahead of time. Part of this is covered
by per-context frequencies, where userspace is given control over the
frequency range they want the GPU to execute at (for largely the same
problem as this, where the workload is very latency sensitive but at the
EI level appears mostly idle). Indeed, the per-context series does
extend the modeset boosting to include a frequency range tweak which
seems applicable to solving this jittery UX behaviour.

Reported-by: Lyude Paul <lyude@redhat.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109408
References: 0d55babc8392 ("drm/i915: Drop stray clearing of rps->last_adj")
References: 60548c554be2 ("drm/i915: Interactive RPS mode")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>

Quoting Lyude Paul:
> Before reverting 0d55babc8392754352f1058866dd4182ae587d11: [4.20]
>
> 35 measurements [of gnome-shell animations]
> Average: 33.65657142857143 FPS
> FPS observed: 20.8 - 46.87 FPS
> Percentage under 60 FPS: 100.0%
> Percentage under 55 FPS: 100.0%
> Percentage under 50 FPS: 100.0%
> Percentage under 45 FPS: 97.14285714285714%
> Percentage under 40 FPS: 97.14285714285714%
> Percentage under 35 FPS: 45.714285714285715%
> Percentage under 30 FPS: 11.428571428571429%
> Percentage under 25 FPS: 2.857142857142857%
>
> After reverting: [4.19 behaviour]
>
> 30 measurements
> Average: 49.833666666666666 FPS
> FPS observed: 33.85 - 60.0 FPS
> Percentage under 60 FPS: 86.66666666666667%
> Percentage under 55 FPS: 70.0%
> Percentage under 50 FPS: 53.333333333333336%
> Percentage under 45 FPS: 20.0%
> Percentage under 40 FPS: 6.666666666666667%
> Percentage under 35 FPS: 6.666666666666667%
> Percentage under 30 FPS: 0%
> Percentage under 25 FPS: 0%
>
> Patched:
> 42 measurements
> Average: 46.05428571428571 FPS
> FPS observed: 1.82 - 59.98 FPS
> Percentage under 60 FPS: 88.09523809523809%
> Percentage under 55 FPS: 61.904761904761905%
> Percentage under 50 FPS: 45.23809523809524%
> Percentage under 45 FPS: 35.714285714285715%
> Percentage under 40 FPS: 33.33333333333333%
> Percentage under 35 FPS: 19.047619047619047%
> Percentage under 30 FPS: 7.142857142857142%
> Percentage under 25 FPS: 4.761904761904762%

Tested-by: Lyude Paul <lyude@redhat.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 92bb32ed27fb..7c7e84e86c6a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1288,6 +1288,18 @@ static void gen6_pm_rps_work(struct work_struct *work)
 
 	rps->last_adj = adj;
 
+	/*
+	 * Limit deboosting and boosting to keep ourselves at the extremes
+	 * when in the respective power modes (i.e. slowly decrease frequencies
+	 * while in the HIGH_POWER zone and slowly increase frequencies while
+	 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
+	 * to the next level quickly, and conversely if busy we expect to
+	 * hit a waitboost and rapidly switch into max power.
+	 */
+	if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
+	    (adj > 0 && rps->power.mode == LOW_POWER))
+		rps->last_adj = 0;
+
 	/* sysfs frequency interfaces may have snuck in while servicing the
 	 * interrupt
 	 */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 14/25] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (11 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 13/25] drm/i915: Reduce the RPS shock Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 15/25] drm/i915: Skip scanning for signalers if we are already inflight Chris Wilson
                   ` (15 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c | 83 ++++++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_pmu.h |  4 +-
 2 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 21adad72bd86..206f47d12533 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -110,17 +110,49 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
 	return enable;
 }
 
+static u64 __get_rc6(struct drm_i915_private *i915)
+{
+	u64 val;
+
+	val = intel_rc6_residency_ns(i915,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+	return val;
+}
+
 void i915_pmu_gt_parked(struct drm_i915_private *i915)
 {
+	u64 val;
+
 	if (!i915->pmu.base.event_init)
 		return;
 
+	val = 0;
+	if (i915->pmu.sample[__I915_SAMPLE_RC6].cur)
+		val = __get_rc6(i915);
+
 	spin_lock_irq(&i915->pmu.lock);
+
+	if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+		i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+		i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+	}
+	i915->pmu.sleep_timestamp = jiffies;
+
 	/*
 	 * Signal sampling timer to stop if only engine events are enabled and
 	 * GPU went idle.
 	 */
 	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
+
 	spin_unlock_irq(&i915->pmu.lock);
 }
 
@@ -141,10 +173,23 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 		return;
 
 	spin_lock_irq(&i915->pmu.lock);
+
 	/*
 	 * Re-enable sampling timer when GPU goes active.
 	 */
 	__i915_pmu_maybe_start_timer(i915);
+
+	/* Estimate how long we slept and accumulate that into rc6 counters */
+	if (i915->pmu.sample[__I915_SAMPLE_RC6].cur) {
+		u64 val;
+
+		val = jiffies - i915->pmu.sleep_timestamp;
+		val = jiffies_to_nsecs(val);
+		val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+
+		i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+	}
+
 	spin_unlock_irq(&i915->pmu.lock);
 }
 
@@ -411,24 +456,6 @@ static int i915_pmu_event_init(struct perf_event *event)
 	return 0;
 }
 
-static u64 __get_rc6(struct drm_i915_private *i915)
-{
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
 static u64 get_rc6(struct drm_i915_private *i915)
 {
 #if IS_ENABLED(CONFIG_PM)
@@ -436,7 +463,9 @@ static u64 get_rc6(struct drm_i915_private *i915)
 	unsigned long flags;
 	u64 val;
 
-	wakeref = intel_runtime_pm_get_if_in_use(i915);
+	wakeref = 0;
+	if (READ_ONCE(i915->gt.awake))
+		wakeref = intel_runtime_pm_get_if_in_use(i915);
 	if (wakeref) {
 		val = __get_rc6(i915);
 		intel_runtime_pm_put(i915, wakeref);
@@ -458,9 +487,6 @@ static u64 get_rc6(struct drm_i915_private *i915)
 
 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
 	} else {
-		struct pci_dev *pdev = i915->drm.pdev;
-		struct device *kdev = &pdev->dev;
-
 		/*
 		 * We are runtime suspended.
 		 *
@@ -469,7 +495,6 @@ static u64 get_rc6(struct drm_i915_private *i915)
 		 * counter value.
 		 */
 		spin_lock_irqsave(&i915->pmu.lock, flags);
-		spin_lock(&kdev->power.lock);
 
 		/*
 		 * After the above branch intel_runtime_pm_get_if_in_use failed
@@ -482,15 +507,8 @@ static u64 get_rc6(struct drm_i915_private *i915)
 		 * suspended and if not we cannot do better than report the last
 		 * known RC6 value.
 		 */
-		if (kdev->power.runtime_status == RPM_SUSPENDED) {
-			if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				i915->pmu.suspended_jiffies_last =
-						  kdev->power.suspended_jiffies;
-
-			val = kdev->power.suspended_jiffies -
-			      i915->pmu.suspended_jiffies_last;
-			val += jiffies - kdev->power.accounting_timestamp;
-
+		if (!READ_ONCE(i915->gt.awake)) {
+			val = jiffies - i915->pmu.sleep_timestamp;
 			val = jiffies_to_nsecs(val);
 			val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
 
@@ -501,7 +519,6 @@ static u64 get_rc6(struct drm_i915_private *i915)
 			val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
 		}
 
-		spin_unlock(&kdev->power.lock);
 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index b3728c5f13e7..6fa0240a1704 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -97,9 +97,9 @@ struct i915_pmu {
 	 */
 	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
 	/**
-	 * @suspended_jiffies_last: Cached suspend time from PM core.
+	 * @sleep_timestamp: Last time GT parked for RC6 estimation.
 	 */
-	unsigned long suspended_jiffies_last;
+	unsigned long sleep_timestamp;
 	/**
 	 * @i915_attr: Memory block holding device attributes.
 	 */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 15/25] drm/i915: Skip scanning for signalers if we are already inflight
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (12 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 14/25] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 16/25] drm/i915/execlists: Suppress mere WAIT preemption Chris Wilson
                   ` (14 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

When a request has its priority changed, we traverse the graph of all of
its signalers to raise their priorities to match (priority inheritance).
If the request has already started executing its payload, we know that
all of its signalers must have signaled and we do not need to process
our list of signalers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_scheduler.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 8bc042551692..38efefd22dce 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -18,6 +18,11 @@ node_to_request(const struct i915_sched_node *node)
 	return container_of(node, const struct i915_request, sched);
 }
 
+static inline bool node_started(const struct i915_sched_node *node)
+{
+	return i915_request_started(node_to_request(node));
+}
+
 static inline bool node_signaled(const struct i915_sched_node *node)
 {
 	return i915_request_completed(node_to_request(node));
@@ -301,6 +306,10 @@ static void __i915_schedule(struct i915_request *rq,
 	list_for_each_entry(dep, &dfs, dfs_link) {
 		struct i915_sched_node *node = dep->signaler;
 
+		/* If we are already flying, we know we have no signalers */
+		if (node_started(node))
+			continue;
+
 		/*
 		 * Within an engine, there can be no cycle, but we may
 		 * refer to the same dependency chain multiple times
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 16/25] drm/i915/execlists: Suppress mere WAIT preemption
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (13 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 15/25] drm/i915: Skip scanning for signalers if we are already inflight Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 17/25] drm/i915/execlists: Suppress redundant preemption Chris Wilson
                   ` (13 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

WAIT is occasionally suppressed by virtue of preempted requests being
promoted to NEWCLIENT if they have not all ready received that boost.
Make this consistent for all WAIT boosts that they are not allowed to
preempt executing contexts and are merely granted the right to be at the
front of the queue for the next execution slot. This is in keeping with
the desire that the WAIT boost be a minor tweak that does not give
excessive promotion to its user and open ourselves to trivial abuse.

The problem with the inconsistent WAIT preemption becomes more apparent
as the preemption is propagated across the engines, where one engine may
preempt and the other not, and we be relying on the exact execution
order being consistent across engines (e.g. using HW semaphores to
coordinate parallel execution).

v2: Also protect GuC submission from false preemption loops.
v3: Build bug safeguards and better debug messages for st.
v4: Do the priority bumping in unsubmit (i.e. on preemption/reset
unwind), applying it earlier during submit causes out-of-order execution
combined with execute fences.
v5: Call sw_fence_fini for our dummy request (Matthew)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v3
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c         |  15 ++
 drivers/gpu/drm/i915/i915_scheduler.c       |   1 -
 drivers/gpu/drm/i915/i915_scheduler.h       |   2 +
 drivers/gpu/drm/i915/intel_guc_submission.c |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c            |   9 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c  | 163 ++++++++++++++++++++
 6 files changed, 189 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 04424932d479..2762796a1a54 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -353,11 +353,14 @@ void __i915_request_submit(struct i915_request *request)
 
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
 	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
 	    !i915_request_enable_breadcrumb(request))
 		intel_engine_queue_breadcrumbs(engine);
+
 	spin_unlock(&request->lock);
 
 	engine->emit_fini_breadcrumb(request,
@@ -401,10 +404,22 @@ void __i915_request_unsubmit(struct i915_request *request)
 
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
+	/*
+	 * As we do not allow WAIT to preempt inflight requests,
+	 * once we have executed a request, along with triggering
+	 * any execution callbacks, we must preserve its ordering
+	 * within the non-preemptible FIFO.
+	 */
+	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
+	request->sched.attr.priority |= __NO_PREEMPTION;
+
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
 		i915_request_cancel_breadcrumb(request);
+
 	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
 	clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+
 	spin_unlock(&request->lock);
 
 	/* Transfer back from the global per-engine timeline to per-context */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 38efefd22dce..9fb96ff57a29 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -322,7 +322,6 @@ static void __i915_schedule(struct i915_request *rq,
 			if (node_signaled(p->signaler))
 				continue;
 
-			GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
 			if (prio > READ_ONCE(p->signaler->attr.priority))
 				list_move_tail(&p->dfs_link, &dfs);
 		}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index dbe9cb7ecd82..54bd6c89817e 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -33,6 +33,8 @@ enum {
 #define I915_PRIORITY_WAIT	((u8)BIT(0))
 #define I915_PRIORITY_NEWCLIENT	((u8)BIT(1))
 
+#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
+
 struct i915_sched_attr {
 	/**
 	 * @priority: execution and service priority
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 20cbceeabeae..a2846ea1e62c 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -720,7 +720,7 @@ static inline int rq_prio(const struct i915_request *rq)
 
 static inline int port_prio(const struct execlist_port *port)
 {
-	return rq_prio(port_request(port));
+	return rq_prio(port_request(port)) | __NO_PREEMPTION;
 }
 
 static bool __guc_dequeue(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d638ce9de089..ed53de5335c3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -188,6 +188,12 @@ static inline int rq_prio(const struct i915_request *rq)
 	return rq->sched.attr.priority;
 }
 
+static int effective_prio(const struct i915_request *rq)
+{
+	/* Restrict mere WAIT boosts from triggering preemption */
+	return rq_prio(rq) | __NO_PREEMPTION;
+}
+
 static int queue_prio(const struct intel_engine_execlists *execlists)
 {
 	struct i915_priolist *p;
@@ -208,7 +214,7 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
 static inline bool need_preempt(const struct intel_engine_cs *engine,
 				const struct i915_request *rq)
 {
-	const int last_prio = rq_prio(rq);
+	int last_prio;
 
 	if (!intel_engine_has_preemption(engine))
 		return false;
@@ -228,6 +234,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	 * preempt. If that hint is stale or we may be trying to preempt
 	 * ourselves, ignore the request.
 	 */
+	last_prio = effective_prio(rq);
 	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
 				      last_prio))
 		return false;
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 58144e024751..5c1f7e7ca25f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -407,6 +407,168 @@ static int live_suppress_self_preempt(void *arg)
 	goto err_client_b;
 }
 
+static int __i915_sw_fence_call
+dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	return NOTIFY_DONE;
+}
+
+static struct i915_request *dummy_request(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = kmalloc(sizeof(*rq), GFP_KERNEL | __GFP_ZERO);
+	if (!rq)
+		return NULL;
+
+	INIT_LIST_HEAD(&rq->active_list);
+	rq->engine = engine;
+
+	i915_sched_node_init(&rq->sched);
+
+	/* mark this request as permanently incomplete */
+	rq->fence.seqno = 1;
+	BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
+	rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
+	GEM_BUG_ON(i915_request_completed(rq));
+
+	i915_sw_fence_init(&rq->submit, dummy_notify);
+	i915_sw_fence_commit(&rq->submit);
+
+	return rq;
+}
+
+static void dummy_request_free(struct i915_request *dummy)
+{
+	i915_request_mark_complete(dummy);
+	i915_sched_node_fini(dummy->engine->i915, &dummy->sched);
+	i915_sw_fence_fini(&dummy->submit);
+
+	dma_fence_free(&dummy->fence);
+}
+
+static int live_suppress_wait_preempt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct preempt_client client[4];
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	int err = -ENOMEM;
+	int i;
+
+	/*
+	 * Waiters are given a little priority nudge, but not enough
+	 * to actually cause any preemption. Double check that we do
+	 * not needlessly generate preempt-to-idle cycles.
+	 */
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
+		goto err_unlock;
+	if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
+		goto err_client_0;
+	if (preempt_client_init(i915, &client[2])) /* head of queue */
+		goto err_client_1;
+	if (preempt_client_init(i915, &client[3])) /* bystander */
+		goto err_client_2;
+
+	for_each_engine(engine, i915, id) {
+		int depth;
+
+		if (!engine->emit_init_breadcrumb)
+			continue;
+
+		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
+			struct i915_request *rq[ARRAY_SIZE(client)];
+			struct i915_request *dummy;
+
+			engine->execlists.preempt_hang.count = 0;
+
+			dummy = dummy_request(engine);
+			if (!dummy)
+				goto err_client_3;
+
+			for (i = 0; i < ARRAY_SIZE(client); i++) {
+				rq[i] = igt_spinner_create_request(&client[i].spin,
+								   client[i].ctx, engine,
+								   MI_NOOP);
+				if (IS_ERR(rq[i])) {
+					err = PTR_ERR(rq[i]);
+					goto err_wedged;
+				}
+
+				/* Disable NEWCLIENT promotion */
+				__i915_active_request_set(&rq[i]->timeline->last_request,
+							  dummy);
+				i915_request_add(rq[i]);
+			}
+
+			dummy_request_free(dummy);
+
+			GEM_BUG_ON(i915_request_completed(rq[0]));
+			if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
+				pr_err("%s: First client failed to start\n",
+				       engine->name);
+				goto err_wedged;
+			}
+			GEM_BUG_ON(!i915_request_started(rq[0]));
+
+			if (i915_request_wait(rq[depth],
+					      I915_WAIT_LOCKED |
+					      I915_WAIT_PRIORITY,
+					      1) != -ETIME) {
+				pr_err("%s: Waiter depth:%d completed!\n",
+				       engine->name, depth);
+				goto err_wedged;
+			}
+
+			for (i = 0; i < ARRAY_SIZE(client); i++)
+				igt_spinner_end(&client[i].spin);
+
+			if (igt_flush_test(i915, I915_WAIT_LOCKED))
+				goto err_wedged;
+
+			if (engine->execlists.preempt_hang.count) {
+				pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
+				       engine->name,
+				       engine->execlists.preempt_hang.count,
+				       depth);
+				err = -EINVAL;
+				goto err_client_3;
+			}
+		}
+	}
+
+	err = 0;
+err_client_3:
+	preempt_client_fini(&client[3]);
+err_client_2:
+	preempt_client_fini(&client[2]);
+err_client_1:
+	preempt_client_fini(&client[1]);
+err_client_0:
+	preempt_client_fini(&client[0]);
+err_unlock:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+
+err_wedged:
+	for (i = 0; i < ARRAY_SIZE(client); i++)
+		igt_spinner_end(&client[i].spin);
+	i915_gem_set_wedged(i915);
+	err = -EIO;
+	goto err_client_3;
+}
+
 static int live_chain_preempt(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -887,6 +1049,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
 		SUBTEST(live_suppress_self_preempt),
+		SUBTEST(live_suppress_wait_preempt),
 		SUBTEST(live_chain_preempt),
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 17/25] drm/i915/execlists: Suppress redundant preemption
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (14 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 16/25] drm/i915/execlists: Suppress mere WAIT preemption Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 18/25] drm/i915: Make request allocation caches global Chris Wilson
                   ` (12 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

On unwinding the active request we give it a small (limited to internal
priority levels) boost to prevent it from being gazumped a second time.
However, this means that it can be promoted to above the request that
triggered the preemption request, causing a preempt-to-idle cycle for no
change. We can avoid this if we take the boost into account when
checking if the preemption request is valid.

v2: After preemption the active request will be after the preemptee if
they end up with equal priority.

v3: Tvrtko pointed out that this, the existing logic, makes
I915_PRIORITY_WAIT non-preemptible. Document this interesting quirk!

v4: Prove Tvrtko was right about WAIT being non-preemptible and test it.
v5: Except not all priorities were made equal, and the WAIT not preempting
is only if we start off as !NEWCLIENT.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 38 ++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ed53de5335c3..5a3bbf4ded35 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,6 +164,8 @@
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
+#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT)
+
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine,
 					    struct intel_context *ce);
@@ -190,8 +192,30 @@ static inline int rq_prio(const struct i915_request *rq)
 
 static int effective_prio(const struct i915_request *rq)
 {
+	int prio = rq_prio(rq);
+
+	/*
+	 * On unwinding the active request, we give it a priority bump
+	 * equivalent to a freshly submitted request. This protects it from
+	 * being gazumped again, but it would be preferable if we didn't
+	 * let it be gazumped in the first place!
+	 *
+	 * See __unwind_incomplete_requests()
+	 */
+	if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(rq)) {
+		/*
+		 * After preemption, we insert the active request at the
+		 * end of the new priority level. This means that we will be
+		 * _lower_ priority than the preemptee all things equal (and
+		 * so the preemption is valid), so adjust our comparison
+		 * accordingly.
+		 */
+		prio |= ACTIVE_PRIORITY;
+		prio--;
+	}
+
 	/* Restrict mere WAIT boosts from triggering preemption */
-	return rq_prio(rq) | __NO_PREEMPTION;
+	return prio | __NO_PREEMPTION;
 }
 
 static int queue_prio(const struct intel_engine_execlists *execlists)
@@ -359,7 +383,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
-	int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
+	int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
 
 	lockdep_assert_held(&engine->timeline.lock);
 
@@ -390,9 +414,15 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	 * The active request is now effectively the start of a new client
 	 * stream, so give it the equivalent small priority bump to prevent
 	 * it being gazumped a second time by another peer.
+	 *
+	 * One consequence of this preemption boost is that we may jump
+	 * over lesser priorities (such as I915_PRIORITY_WAIT), effectively
+	 * making those priorities non-preemptible. They will be moved forward
+	 * in the priority queue, but they will not gain immediate access to
+	 * the GPU.
 	 */
-	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
-		prio |= I915_PRIORITY_NEWCLIENT;
+	if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
+		prio |= ACTIVE_PRIORITY;
 		active->sched.attr.priority = prio;
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 18/25] drm/i915: Make request allocation caches global
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (15 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 17/25] drm/i915/execlists: Suppress redundant preemption Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 19/25] drm/i915: Introduce i915_timeline.mutex Chris Wilson
                   ` (11 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

As kmem_caches share the same properties (size, allocation/free behaviour)
for all potential devices, we can use global caches. While this
potential has worse fragmentation behaviour (one can argue that
different devices would have different activity lifetimes, but you can
also argue that activity is temporal across the system) it is the
default behaviour of the system at large to amalgamate matching caches.

The benefit for us is much reduced pointer dancing along the frequent
allocation paths.

v2: Defer shrinking until after a global grace period for futureproofing
multiple consumers of the slab caches, similar to the current strategy
for avoiding shrinking too early.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/i915_active.c            |   7 +-
 drivers/gpu/drm/i915/i915_active.h            |   1 +
 drivers/gpu/drm/i915/i915_drv.h               |   3 -
 drivers/gpu/drm/i915/i915_gem.c               |  34 +-----
 drivers/gpu/drm/i915/i915_globals.c           | 113 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_globals.h           |  15 +++
 drivers/gpu/drm/i915/i915_pci.c               |   8 +-
 drivers/gpu/drm/i915/i915_request.c           |  53 ++++++--
 drivers/gpu/drm/i915/i915_request.h           |  10 ++
 drivers/gpu/drm/i915/i915_scheduler.c         |  66 +++++++---
 drivers/gpu/drm/i915/i915_scheduler.h         |  34 +++++-
 drivers/gpu/drm/i915/intel_guc_submission.c   |   3 +-
 drivers/gpu/drm/i915/intel_lrc.c              |   6 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h       |  17 ---
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |   2 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  45 ++++---
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  26 ----
 drivers/gpu/drm/i915/selftests/mock_request.c |  12 +-
 drivers/gpu/drm/i915/selftests/mock_request.h |   7 --
 20 files changed, 313 insertions(+), 150 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_globals.c
 create mode 100644 drivers/gpu/drm/i915/i915_globals.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1787e1299b1b..a1d834068765 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -77,6 +77,7 @@ i915-y += \
 	  i915_gem_tiling.o \
 	  i915_gem_userptr.o \
 	  i915_gemfs.o \
+	  i915_globals.o \
 	  i915_query.o \
 	  i915_request.o \
 	  i915_scheduler.o \
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index db7bb5bd5add..d9f6471ac16c 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -294,7 +294,12 @@ int __init i915_global_active_init(void)
 	return 0;
 }
 
-void __exit i915_global_active_exit(void)
+void i915_global_active_shrink(void)
+{
+	kmem_cache_shrink(global.slab_cache);
+}
+
+void i915_global_active_exit(void)
 {
 	kmem_cache_destroy(global.slab_cache);
 }
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 12b5c1d287d1..5fbd9102384b 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -420,6 +420,7 @@ static inline void i915_active_fini(struct i915_active *ref) { }
 #endif
 
 int i915_global_active_init(void);
+void i915_global_active_shrink(void);
 void i915_global_active_exit(void);
 
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c8d0489a1cd..abf1a90b6975 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1468,9 +1468,6 @@ struct drm_i915_private {
 	struct kmem_cache *objects;
 	struct kmem_cache *vmas;
 	struct kmem_cache *luts;
-	struct kmem_cache *requests;
-	struct kmem_cache *dependencies;
-	struct kmem_cache *priorities;
 
 	const struct intel_device_info __info; /* Use INTEL_INFO() to access. */
 	struct intel_runtime_info __runtime; /* Use RUNTIME_INFO() to access. */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b3129cb0a04d..652754864edc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -42,6 +42,7 @@
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_gemfs.h"
+#include "i915_globals.h"
 #include "i915_reset.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
@@ -187,6 +188,8 @@ void i915_gem_unpark(struct drm_i915_private *i915)
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
+	i915_globals_unpark();
+
 	intel_enable_gt_powersave(i915);
 	i915_update_gfx_val(i915);
 	if (INTEL_GEN(i915) >= 6)
@@ -2892,12 +2895,11 @@ static void shrink_caches(struct drm_i915_private *i915)
 	 * filled slabs to prioritise allocating from the mostly full slabs,
 	 * with the aim of reducing fragmentation.
 	 */
-	kmem_cache_shrink(i915->priorities);
-	kmem_cache_shrink(i915->dependencies);
-	kmem_cache_shrink(i915->requests);
 	kmem_cache_shrink(i915->luts);
 	kmem_cache_shrink(i915->vmas);
 	kmem_cache_shrink(i915->objects);
+
+	i915_globals_park();
 }
 
 struct sleep_rcu_work {
@@ -5234,23 +5236,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	if (!dev_priv->luts)
 		goto err_vmas;
 
-	dev_priv->requests = KMEM_CACHE(i915_request,
-					SLAB_HWCACHE_ALIGN |
-					SLAB_RECLAIM_ACCOUNT |
-					SLAB_TYPESAFE_BY_RCU);
-	if (!dev_priv->requests)
-		goto err_luts;
-
-	dev_priv->dependencies = KMEM_CACHE(i915_dependency,
-					    SLAB_HWCACHE_ALIGN |
-					    SLAB_RECLAIM_ACCOUNT);
-	if (!dev_priv->dependencies)
-		goto err_requests;
-
-	dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
-	if (!dev_priv->priorities)
-		goto err_dependencies;
-
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
 
@@ -5275,12 +5260,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 
 	return 0;
 
-err_dependencies:
-	kmem_cache_destroy(dev_priv->dependencies);
-err_requests:
-	kmem_cache_destroy(dev_priv->requests);
-err_luts:
-	kmem_cache_destroy(dev_priv->luts);
 err_vmas:
 	kmem_cache_destroy(dev_priv->vmas);
 err_objects:
@@ -5298,9 +5277,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 
 	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
 
-	kmem_cache_destroy(dev_priv->priorities);
-	kmem_cache_destroy(dev_priv->dependencies);
-	kmem_cache_destroy(dev_priv->requests);
 	kmem_cache_destroy(dev_priv->luts);
 	kmem_cache_destroy(dev_priv->vmas);
 	kmem_cache_destroy(dev_priv->objects);
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
new file mode 100644
index 000000000000..7fd1b3945a04
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -0,0 +1,113 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include "i915_active.h"
+#include "i915_globals.h"
+#include "i915_request.h"
+#include "i915_scheduler.h"
+
+int __init i915_globals_init(void)
+{
+	int err;
+
+	err = i915_global_active_init();
+	if (err)
+		return err;
+
+	err = i915_global_request_init();
+	if (err)
+		goto err_active;
+
+	err = i915_global_scheduler_init();
+	if (err)
+		goto err_request;
+
+	return 0;
+
+err_request:
+	i915_global_request_exit();
+err_active:
+	i915_global_active_exit();
+	return err;
+}
+
+static void i915_globals_shrink(void)
+{
+	/*
+	 * kmem_cache_shrink() discards empty slabs and reorders partially
+	 * filled slabs to prioritise allocating from the mostly full slabs,
+	 * with the aim of reducing fragmentation.
+	 */
+	i915_global_active_shrink();
+	i915_global_request_shrink();
+	i915_global_scheduler_shrink();
+}
+
+static atomic_t active;
+static atomic_t epoch;
+struct park_work {
+	struct rcu_work work;
+	int epoch;
+};
+
+static void __i915_globals_park(struct work_struct *work)
+{
+	struct park_work *wrk = container_of(work, typeof(*wrk), work.work);
+
+	/* Confirm nothing woke up in the last grace period */
+	if (wrk->epoch == atomic_read(&epoch))
+		i915_globals_shrink();
+
+	kfree(wrk);
+}
+
+void i915_globals_park(void)
+{
+	struct park_work *wrk;
+
+	/*
+	 * Defer shrinking the global slab caches (and other work) until
+	 * after a RCU grace period has completed with no activity. This
+	 * is to try and reduce the latency impact on the consumers caused
+	 * by us shrinking the caches the same time as they are trying to
+	 * allocate, with the assumption being that if we idle long enough
+	 * for an RCU grace period to elapse since the last use, it is likely
+	 * to be longer until we need the caches again.
+	 */
+	if (!atomic_dec_and_test(&active))
+		return;
+
+	wrk = kmalloc(sizeof(*wrk), GFP_KERNEL);
+	if (!wrk)
+		return;
+
+	wrk->epoch = atomic_inc_return(&epoch);
+	INIT_RCU_WORK(&wrk->work, __i915_globals_park);
+	queue_rcu_work(system_wq, &wrk->work);
+}
+
+void i915_globals_unpark(void)
+{
+	atomic_inc(&epoch);
+	atomic_inc(&active);
+}
+
+void __exit i915_globals_exit(void)
+{
+	/* Flush any residual park_work */
+	rcu_barrier();
+	flush_scheduled_work();
+
+	i915_global_scheduler_exit();
+	i915_global_request_exit();
+	i915_global_active_exit();
+
+	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
+	rcu_barrier();
+}
diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h
new file mode 100644
index 000000000000..e468f0413a73
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_globals.h
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_GLOBALS_H_
+#define _I915_GLOBALS_H_
+
+int i915_globals_init(void);
+void i915_globals_park(void);
+void i915_globals_unpark(void);
+void i915_globals_exit(void);
+
+#endif /* _I915_GLOBALS_H_ */
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index c4d6b8da9b03..a9211c370cd1 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -28,8 +28,8 @@
 
 #include <drm/drm_drv.h>
 
-#include "i915_active.h"
 #include "i915_drv.h"
+#include "i915_globals.h"
 #include "i915_selftest.h"
 
 #define PLATFORM(x) .platform = (x), .platform_mask = BIT(x)
@@ -802,7 +802,9 @@ static int __init i915_init(void)
 	bool use_kms = true;
 	int err;
 
-	i915_global_active_init();
+	err = i915_globals_init();
+	if (err)
+		return err;
 
 	err = i915_mock_selftests();
 	if (err)
@@ -835,7 +837,7 @@ static void __exit i915_exit(void)
 		return;
 
 	pci_unregister_driver(&i915_pci_driver);
-	i915_global_active_exit();
+	i915_globals_exit();
 }
 
 module_init(i915_init);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 2762796a1a54..7de4abbbc226 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -32,6 +32,11 @@
 #include "i915_active.h"
 #include "i915_reset.h"
 
+static struct i915_global_request {
+	struct kmem_cache *slab_requests;
+	struct kmem_cache *slab_dependencies;
+} global;
+
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "i915";
@@ -86,7 +91,7 @@ static void i915_fence_release(struct dma_fence *fence)
 	 */
 	i915_sw_fence_fini(&rq->submit);
 
-	kmem_cache_free(rq->i915->requests, rq);
+	kmem_cache_free(global.slab_requests, rq);
 }
 
 const struct dma_fence_ops i915_fence_ops = {
@@ -292,7 +297,7 @@ static void i915_request_retire(struct i915_request *request)
 
 	unreserve_gt(request->i915);
 
-	i915_sched_node_fini(request->i915, &request->sched);
+	i915_sched_node_fini(&request->sched);
 	i915_request_put(request);
 }
 
@@ -506,7 +511,7 @@ i915_request_alloc_slow(struct intel_context *ce)
 	ring_retire_requests(ring);
 
 out:
-	return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
+	return kmem_cache_alloc(global.slab_requests, GFP_KERNEL);
 }
 
 static int add_timeline_barrier(struct i915_request *rq)
@@ -593,7 +598,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 *
 	 * Do not use kmem_cache_zalloc() here!
 	 */
-	rq = kmem_cache_alloc(i915->requests,
+	rq = kmem_cache_alloc(global.slab_requests,
 			      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
 		rq = i915_request_alloc_slow(ce);
@@ -680,7 +685,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
-	kmem_cache_free(i915->requests, rq);
+	kmem_cache_free(global.slab_requests, rq);
 err_unreserve:
 	unreserve_gt(i915);
 	intel_context_unpin(ce);
@@ -699,9 +704,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 		return 0;
 
 	if (to->engine->schedule) {
-		ret = i915_sched_node_add_dependency(to->i915,
-						     &to->sched,
-						     &from->sched);
+		ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
 		if (ret < 0)
 			return ret;
 	}
@@ -1189,3 +1192,37 @@ void i915_retire_requests(struct drm_i915_private *i915)
 #include "selftests/mock_request.c"
 #include "selftests/i915_request.c"
 #endif
+
+int __init i915_global_request_init(void)
+{
+	global.slab_requests = KMEM_CACHE(i915_request,
+					  SLAB_HWCACHE_ALIGN |
+					  SLAB_RECLAIM_ACCOUNT |
+					  SLAB_TYPESAFE_BY_RCU);
+	if (!global.slab_requests)
+		return -ENOMEM;
+
+	global.slab_dependencies = KMEM_CACHE(i915_dependency,
+					      SLAB_HWCACHE_ALIGN |
+					      SLAB_RECLAIM_ACCOUNT);
+	if (!global.slab_dependencies)
+		goto err_requests;
+
+	return 0;
+
+err_requests:
+	kmem_cache_destroy(global.slab_requests);
+	return -ENOMEM;
+}
+
+void i915_global_request_shrink(void)
+{
+	kmem_cache_shrink(global.slab_dependencies);
+	kmem_cache_shrink(global.slab_requests);
+}
+
+void i915_global_request_exit(void)
+{
+	kmem_cache_destroy(global.slab_dependencies);
+	kmem_cache_destroy(global.slab_requests);
+}
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 1e127c1c53fa..be3ded6bcf56 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -29,6 +29,7 @@
 
 #include "i915_gem.h"
 #include "i915_scheduler.h"
+#include "i915_selftest.h"
 #include "i915_sw_fence.h"
 
 #include <uapi/drm/i915_drm.h>
@@ -196,6 +197,11 @@ struct i915_request {
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_link;
+
+	I915_SELFTEST_DECLARE(struct {
+		struct list_head link;
+		unsigned long delay;
+	} mock;)
 };
 
 #define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -371,4 +377,8 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
 
 void i915_retire_requests(struct drm_i915_private *i915);
 
+int i915_global_request_init(void);
+void i915_global_request_shrink(void);
+void i915_global_request_exit(void);
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 9fb96ff57a29..50018ad30233 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -10,6 +10,11 @@
 #include "i915_request.h"
 #include "i915_scheduler.h"
 
+static struct i915_global_scheduler {
+	struct kmem_cache *slab_dependencies;
+	struct kmem_cache *slab_priorities;
+} global;
+
 static DEFINE_SPINLOCK(schedule_lock);
 
 static const struct i915_request *
@@ -37,16 +42,15 @@ void i915_sched_node_init(struct i915_sched_node *node)
 }
 
 static struct i915_dependency *
-i915_dependency_alloc(struct drm_i915_private *i915)
+i915_dependency_alloc(void)
 {
-	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
+	return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
 }
 
 static void
-i915_dependency_free(struct drm_i915_private *i915,
-		     struct i915_dependency *dep)
+i915_dependency_free(struct i915_dependency *dep)
 {
-	kmem_cache_free(i915->dependencies, dep);
+	kmem_cache_free(global.slab_dependencies, dep);
 }
 
 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
@@ -73,25 +77,23 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 	return ret;
 }
 
-int i915_sched_node_add_dependency(struct drm_i915_private *i915,
-				   struct i915_sched_node *node,
+int i915_sched_node_add_dependency(struct i915_sched_node *node,
 				   struct i915_sched_node *signal)
 {
 	struct i915_dependency *dep;
 
-	dep = i915_dependency_alloc(i915);
+	dep = i915_dependency_alloc();
 	if (!dep)
 		return -ENOMEM;
 
 	if (!__i915_sched_node_add_dependency(node, signal, dep,
 					      I915_DEPENDENCY_ALLOC))
-		i915_dependency_free(i915, dep);
+		i915_dependency_free(dep);
 
 	return 0;
 }
 
-void i915_sched_node_fini(struct drm_i915_private *i915,
-			  struct i915_sched_node *node)
+void i915_sched_node_fini(struct i915_sched_node *node)
 {
 	struct i915_dependency *dep, *tmp;
 
@@ -111,7 +113,7 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
 
 		list_del(&dep->wait_link);
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(i915, dep);
+			i915_dependency_free(dep);
 	}
 
 	/* Remove ourselves from everyone who depends upon us */
@@ -121,7 +123,7 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
 
 		list_del(&dep->signal_link);
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
-			i915_dependency_free(i915, dep);
+			i915_dependency_free(dep);
 	}
 
 	spin_unlock(&schedule_lock);
@@ -198,7 +200,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 	if (prio == I915_PRIORITY_NORMAL) {
 		p = &execlists->default_priolist;
 	} else {
-		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
+		p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
 		/* Convert an allocation failure to a priority bump */
 		if (unlikely(!p)) {
 			prio = I915_PRIORITY_NORMAL; /* recurses just once */
@@ -423,3 +425,39 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 
 	spin_unlock_bh(&schedule_lock);
 }
+
+void __i915_priolist_free(struct i915_priolist *p)
+{
+	kmem_cache_free(global.slab_priorities, p);
+}
+
+int __init i915_global_scheduler_init(void)
+{
+	global.slab_dependencies = KMEM_CACHE(i915_dependency,
+					      SLAB_HWCACHE_ALIGN);
+	if (!global.slab_dependencies)
+		return -ENOMEM;
+
+	global.slab_priorities = KMEM_CACHE(i915_priolist,
+					    SLAB_HWCACHE_ALIGN);
+	if (!global.slab_priorities)
+		goto err_priorities;
+
+	return 0;
+
+err_priorities:
+	kmem_cache_destroy(global.slab_priorities);
+	return -ENOMEM;
+}
+
+void i915_global_scheduler_shrink(void)
+{
+	kmem_cache_shrink(global.slab_dependencies);
+	kmem_cache_shrink(global.slab_priorities);
+}
+
+void i915_global_scheduler_exit(void)
+{
+	kmem_cache_destroy(global.slab_dependencies);
+	kmem_cache_destroy(global.slab_priorities);
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 54bd6c89817e..5196ce07b6c2 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -85,6 +85,23 @@ struct i915_dependency {
 #define I915_DEPENDENCY_ALLOC BIT(0)
 };
 
+struct i915_priolist {
+	struct list_head requests[I915_PRIORITY_COUNT];
+	struct rb_node node;
+	unsigned long used;
+	int priority;
+};
+
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
+
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)
+
 void i915_sched_node_init(struct i915_sched_node *node);
 
 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
@@ -92,12 +109,10 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 				      struct i915_dependency *dep,
 				      unsigned long flags);
 
-int i915_sched_node_add_dependency(struct drm_i915_private *i915,
-				   struct i915_sched_node *node,
+int i915_sched_node_add_dependency(struct i915_sched_node *node,
 				   struct i915_sched_node *signal);
 
-void i915_sched_node_fini(struct drm_i915_private *i915,
-			  struct i915_sched_node *node);
+void i915_sched_node_fini(struct i915_sched_node *node);
 
 void i915_schedule(struct i915_request *request,
 		   const struct i915_sched_attr *attr);
@@ -107,4 +122,15 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
 struct list_head *
 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
 
+void __i915_priolist_free(struct i915_priolist *p);
+static inline void i915_priolist_free(struct i915_priolist *p)
+{
+	if (p->priority != I915_PRIORITY_NORMAL)
+		__i915_priolist_free(p);
+}
+
+int i915_global_scheduler_init(void);
+void i915_global_scheduler_shrink(void);
+void i915_global_scheduler_exit(void);
+
 #endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index a2846ea1e62c..56ba2fcbabe6 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -781,8 +781,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		if (p->priority != I915_PRIORITY_NORMAL)
-			kmem_cache_free(engine->i915->priorities, p);
+		i915_priolist_free(p);
 	}
 done:
 	execlists->queue_priority_hint =
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5a3bbf4ded35..d1695037f9ca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -818,8 +818,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		if (p->priority != I915_PRIORITY_NORMAL)
-			kmem_cache_free(engine->i915->priorities, p);
+		i915_priolist_free(p);
 	}
 
 done:
@@ -972,8 +971,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		if (p->priority != I915_PRIORITY_NORMAL)
-			kmem_cache_free(engine->i915->priorities, p);
+		i915_priolist_free(p);
 	}
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index de8dba7565b0..5284f243931a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -187,23 +187,6 @@ enum intel_engine_id {
 #define _VECS(n) (VECS + (n))
 };
 
-struct i915_priolist {
-	struct list_head requests[I915_PRIORITY_COUNT];
-	struct rb_node node;
-	unsigned long used;
-	int priority;
-};
-
-#define priolist_for_each_request(it, plist, idx) \
-	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
-		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
-
-#define priolist_for_each_request_consume(it, n, plist, idx) \
-	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
-		list_for_each_entry_safe(it, n, \
-					 &(plist)->requests[idx - 1], \
-					 sched.link)
-
 struct st_preempt_hang {
 	struct completion completion;
 	unsigned int count;
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 5c1f7e7ca25f..7dde177b0f85 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -441,7 +441,7 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 static void dummy_request_free(struct i915_request *dummy)
 {
 	i915_request_mark_complete(dummy);
-	i915_sched_node_fini(dummy->engine->i915, &dummy->sched);
+	i915_sched_node_fini(&dummy->sched);
 	i915_sw_fence_fini(&dummy->submit);
 
 	dma_fence_free(&dummy->fence);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 6f3fb803c747..ec1ae948954c 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -76,26 +76,26 @@ static void mock_ring_free(struct intel_ring *base)
 	kfree(ring);
 }
 
-static struct mock_request *first_request(struct mock_engine *engine)
+static struct i915_request *first_request(struct mock_engine *engine)
 {
 	return list_first_entry_or_null(&engine->hw_queue,
-					struct mock_request,
-					link);
+					struct i915_request,
+					mock.link);
 }
 
-static void advance(struct mock_request *request)
+static void advance(struct i915_request *request)
 {
-	list_del_init(&request->link);
-	i915_request_mark_complete(&request->base);
-	GEM_BUG_ON(!i915_request_completed(&request->base));
+	list_del_init(&request->mock.link);
+	i915_request_mark_complete(request);
+	GEM_BUG_ON(!i915_request_completed(request));
 
-	intel_engine_queue_breadcrumbs(request->base.engine);
+	intel_engine_queue_breadcrumbs(request->engine);
 }
 
 static void hw_delay_complete(struct timer_list *t)
 {
 	struct mock_engine *engine = from_timer(engine, t, hw_delay);
-	struct mock_request *request;
+	struct i915_request *request;
 	unsigned long flags;
 
 	spin_lock_irqsave(&engine->hw_lock, flags);
@@ -110,8 +110,9 @@ static void hw_delay_complete(struct timer_list *t)
 	 * requeue the timer for the next delayed request.
 	 */
 	while ((request = first_request(engine))) {
-		if (request->delay) {
-			mod_timer(&engine->hw_delay, jiffies + request->delay);
+		if (request->mock.delay) {
+			mod_timer(&engine->hw_delay,
+				  jiffies + request->mock.delay);
 			break;
 		}
 
@@ -169,10 +170,8 @@ mock_context_pin(struct intel_engine_cs *engine,
 
 static int mock_request_alloc(struct i915_request *request)
 {
-	struct mock_request *mock = container_of(request, typeof(*mock), base);
-
-	INIT_LIST_HEAD(&mock->link);
-	mock->delay = 0;
+	INIT_LIST_HEAD(&request->mock.link);
+	request->mock.delay = 0;
 
 	return 0;
 }
@@ -190,7 +189,6 @@ static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
 
 static void mock_submit_request(struct i915_request *request)
 {
-	struct mock_request *mock = container_of(request, typeof(*mock), base);
 	struct mock_engine *engine =
 		container_of(request->engine, typeof(*engine), base);
 	unsigned long flags;
@@ -198,12 +196,13 @@ static void mock_submit_request(struct i915_request *request)
 	i915_request_submit(request);
 
 	spin_lock_irqsave(&engine->hw_lock, flags);
-	list_add_tail(&mock->link, &engine->hw_queue);
-	if (mock->link.prev == &engine->hw_queue) {
-		if (mock->delay)
-			mod_timer(&engine->hw_delay, jiffies + mock->delay);
+	list_add_tail(&request->mock.link, &engine->hw_queue);
+	if (list_is_first(&request->mock.link, &engine->hw_queue)) {
+		if (request->mock.delay)
+			mod_timer(&engine->hw_delay,
+				  jiffies + request->mock.delay);
 		else
-			advance(mock);
+			advance(request);
 	}
 	spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
@@ -263,12 +262,12 @@ void mock_engine_flush(struct intel_engine_cs *engine)
 {
 	struct mock_engine *mock =
 		container_of(engine, typeof(*mock), base);
-	struct mock_request *request, *rn;
+	struct i915_request *request, *rn;
 
 	del_timer_sync(&mock->hw_delay);
 
 	spin_lock_irq(&mock->hw_lock);
-	list_for_each_entry_safe(request, rn, &mock->hw_queue, link)
+	list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link)
 		advance(request);
 	spin_unlock_irq(&mock->hw_lock);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index fc516a2970f4..5a98caba6d69 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -79,9 +79,6 @@ static void mock_device_release(struct drm_device *dev)
 
 	destroy_workqueue(i915->wq);
 
-	kmem_cache_destroy(i915->priorities);
-	kmem_cache_destroy(i915->dependencies);
-	kmem_cache_destroy(i915->requests);
 	kmem_cache_destroy(i915->vmas);
 	kmem_cache_destroy(i915->objects);
 
@@ -211,23 +208,6 @@ struct drm_i915_private *mock_gem_device(void)
 	if (!i915->vmas)
 		goto err_objects;
 
-	i915->requests = KMEM_CACHE(mock_request,
-				    SLAB_HWCACHE_ALIGN |
-				    SLAB_RECLAIM_ACCOUNT |
-				    SLAB_TYPESAFE_BY_RCU);
-	if (!i915->requests)
-		goto err_vmas;
-
-	i915->dependencies = KMEM_CACHE(i915_dependency,
-					SLAB_HWCACHE_ALIGN |
-					SLAB_RECLAIM_ACCOUNT);
-	if (!i915->dependencies)
-		goto err_requests;
-
-	i915->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
-	if (!i915->priorities)
-		goto err_dependencies;
-
 	i915_timelines_init(i915);
 
 	INIT_LIST_HEAD(&i915->gt.active_rings);
@@ -257,12 +237,6 @@ struct drm_i915_private *mock_gem_device(void)
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_timelines_fini(i915);
-	kmem_cache_destroy(i915->priorities);
-err_dependencies:
-	kmem_cache_destroy(i915->dependencies);
-err_requests:
-	kmem_cache_destroy(i915->requests);
-err_vmas:
 	kmem_cache_destroy(i915->vmas);
 err_objects:
 	kmem_cache_destroy(i915->objects);
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index 0dc29e242597..d1a7c9608712 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -31,29 +31,25 @@ mock_request(struct intel_engine_cs *engine,
 	     unsigned long delay)
 {
 	struct i915_request *request;
-	struct mock_request *mock;
 
 	/* NB the i915->requests slab cache is enlarged to fit mock_request */
 	request = i915_request_alloc(engine, context);
 	if (IS_ERR(request))
 		return NULL;
 
-	mock = container_of(request, typeof(*mock), base);
-	mock->delay = delay;
-
-	return &mock->base;
+	request->mock.delay = delay;
+	return request;
 }
 
 bool mock_cancel_request(struct i915_request *request)
 {
-	struct mock_request *mock = container_of(request, typeof(*mock), base);
 	struct mock_engine *engine =
 		container_of(request->engine, typeof(*engine), base);
 	bool was_queued;
 
 	spin_lock_irq(&engine->hw_lock);
-	was_queued = !list_empty(&mock->link);
-	list_del_init(&mock->link);
+	was_queued = !list_empty(&request->mock.link);
+	list_del_init(&request->mock.link);
 	spin_unlock_irq(&engine->hw_lock);
 
 	if (was_queued)
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h
index 995fb728380c..4acf0211df20 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.h
+++ b/drivers/gpu/drm/i915/selftests/mock_request.h
@@ -29,13 +29,6 @@
 
 #include "../i915_request.h"
 
-struct mock_request {
-	struct i915_request base;
-
-	struct list_head link;
-	unsigned long delay;
-};
-
 struct i915_request *
 mock_request(struct intel_engine_cs *engine,
 	     struct i915_gem_context *context,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 19/25] drm/i915: Introduce i915_timeline.mutex
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (16 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 18/25] drm/i915: Make request allocation caches global Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 20/25] drm/i915: Keep timeline HWSP allocated until idle across the system Chris Wilson
                   ` (10 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

A simple mutex used for guarding the flow of requests in and out of the
timeline. In the short-term, it will be used only to guard the addition
of requests into the timeline, taken on alloc and released on commit so
that only one caller can construct a request into the timeline
(important as the seqno and ring pointers must be serialised). This will
be used by observers to ensure that the seqno/hwsp is stable. Later,
when we have reduced retiring to only operate on a single timeline at a
time, we can then use the mutex as the sole guard required for retiring.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c            | 6 +++++-
 drivers/gpu/drm/i915/i915_timeline.c           | 1 +
 drivers/gpu/drm/i915/i915_timeline.h           | 2 ++
 drivers/gpu/drm/i915/selftests/i915_request.c  | 2 --
 drivers/gpu/drm/i915/selftests/mock_timeline.c | 1 +
 5 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 7de4abbbc226..40053232b794 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -562,6 +562,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		return ERR_CAST(ce);
 
 	reserve_gt(i915);
+	mutex_lock(&ce->ring->timeline->mutex);
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
 	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
@@ -687,6 +688,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	kmem_cache_free(global.slab_requests, rq);
 err_unreserve:
+	mutex_unlock(&ce->ring->timeline->mutex);
 	unreserve_gt(i915);
 	intel_context_unpin(ce);
 	return ERR_PTR(ret);
@@ -879,7 +881,7 @@ void i915_request_add(struct i915_request *request)
 	GEM_TRACE("%s fence %llx:%lld\n",
 		  engine->name, request->fence.context, request->fence.seqno);
 
-	lockdep_assert_held(&request->i915->drm.struct_mutex);
+	lockdep_assert_held(&request->timeline->mutex);
 	trace_i915_request_add(request);
 
 	/*
@@ -990,6 +992,8 @@ void i915_request_add(struct i915_request *request)
 	 */
 	if (prev && i915_request_completed(prev))
 		i915_request_retire_upto(prev);
+
+	mutex_unlock(&request->timeline->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index b2202d2e58a2..87a80558da28 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -162,6 +162,7 @@ int i915_timeline_init(struct drm_i915_private *i915,
 	timeline->fence_context = dma_fence_context_alloc(1);
 
 	spin_lock_init(&timeline->lock);
+	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->barrier);
 	INIT_ACTIVE_REQUEST(&timeline->last_request);
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 7bec7d2e45bf..36c3849f7108 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -44,6 +44,8 @@ struct i915_timeline {
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
 
+	struct mutex mutex; /* protects the flow of requests */
+
 	unsigned int pin_count;
 	const u32 *hwsp_seqno;
 	struct i915_vma *hwsp_ggtt;
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 074d393f4a02..afa66f51c239 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -141,14 +141,12 @@ static int igt_fence_wait(void *arg)
 		err = -ENOMEM;
 		goto out_locked;
 	}
-	mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 		pr_err("fence wait success before submit (expected timeout)!\n");
 		goto out_device;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	i915_request_add(request);
 	mutex_unlock(&i915->drm.struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index d2de9ece2118..416d85233263 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -14,6 +14,7 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 	timeline->fence_context = context;
 
 	spin_lock_init(&timeline->lock);
+	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->barrier);
 	INIT_ACTIVE_REQUEST(&timeline->last_request);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 20/25] drm/i915: Keep timeline HWSP allocated until idle across the system
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (17 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 19/25] drm/i915: Introduce i915_timeline.mutex Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 21/25] drm/i915: Compute the global scheduler caps Chris Wilson
                   ` (9 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

In preparation for enabling HW semaphores, we need to keep in flight
timeline HWSP alive until its use across entire system has completed,
as any other timeline active on the GPU may still refer back to the
already retired timeline. We both have to delay recycling available
cachelines and unpinning old HWSP until the next idle point.

An easy option would be to simply keep all used HWSP until the system as
a whole was idle, i.e. we could release them all at once on parking.
However, on a busy system, we may never see a global idle point,
essentially meaning the resource will be leaked until we are forced to
do a GC pass. We already employ a fine-grained idle detection mechanism
for vma, which we can reuse here so that each cacheline can be freed
immediately after the last request using it is retired.

v3: Keep track of the activity of each cacheline.
v4: cacheline_free() on canceling the seqno tracking
v5: Finally with a testcase to exercise wraparound
v6: Pack cacheline into empty bits of page-aligned vaddr

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v5
---
 drivers/gpu/drm/i915/i915_request.c           |  31 +-
 drivers/gpu/drm/i915/i915_request.h           |  11 +
 drivers/gpu/drm/i915/i915_timeline.c          | 278 ++++++++++++++++--
 drivers/gpu/drm/i915/i915_timeline.h          |  10 +-
 .../gpu/drm/i915/selftests/i915_timeline.c    | 113 +++++++
 5 files changed, 404 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 40053232b794..6367c1ca6b5f 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -325,11 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (tmp != rq);
 }
 
-static u32 timeline_get_seqno(struct i915_timeline *tl)
-{
-	return tl->seqno += 1 + tl->has_initial_breadcrumb;
-}
-
 static void move_to_timeline(struct i915_request *request,
 			     struct i915_timeline *timeline)
 {
@@ -532,8 +527,10 @@ struct i915_request *
 i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_request *rq;
 	struct intel_context *ce;
+	struct i915_timeline *tl;
+	struct i915_request *rq;
+	u32 seqno;
 	int ret;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -609,24 +606,27 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		}
 	}
 
-	rq->rcustate = get_state_synchronize_rcu();
-
 	INIT_LIST_HEAD(&rq->active_list);
+
+	tl = ce->ring->timeline;
+	ret = i915_timeline_get_seqno(tl, rq, &seqno);
+	if (ret)
+		goto err_free;
+
 	rq->i915 = i915;
 	rq->engine = engine;
 	rq->gem_context = ctx;
 	rq->hw_context = ce;
 	rq->ring = ce->ring;
-	rq->timeline = ce->ring->timeline;
+	rq->timeline = tl;
 	GEM_BUG_ON(rq->timeline == &engine->timeline);
-	rq->hwsp_seqno = rq->timeline->hwsp_seqno;
+	rq->hwsp_seqno = tl->hwsp_seqno;
+	rq->hwsp_cacheline = tl->hwsp_cacheline;
+	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
 
 	spin_lock_init(&rq->lock);
-	dma_fence_init(&rq->fence,
-		       &i915_fence_ops,
-		       &rq->lock,
-		       rq->timeline->fence_context,
-		       timeline_get_seqno(rq->timeline));
+	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
+		       tl->fence_context, seqno);
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
@@ -686,6 +686,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
+err_free:
 	kmem_cache_free(global.slab_requests, rq);
 err_unreserve:
 	mutex_unlock(&ce->ring->timeline->mutex);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index be3ded6bcf56..ea1e6f0ade53 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -38,6 +38,7 @@ struct drm_file;
 struct drm_i915_gem_object;
 struct i915_request;
 struct i915_timeline;
+struct i915_timeline_cacheline;
 
 struct i915_capture_list {
 	struct i915_capture_list *next;
@@ -148,6 +149,16 @@ struct i915_request {
 	 */
 	const u32 *hwsp_seqno;
 
+	/*
+	 * If we need to access the timeline's seqno for this request in
+	 * another request, we need to keep a read reference to this associated
+	 * cacheline, so that we do not free and recycle it before the foriegn
+	 * observers have completed. Hence, we keep a pointer to the cacheline
+	 * inside the timeline's HWSP vma, but it is only valid while this
+	 * request has not completed and guarded by the timeline mutex.
+	 */
+	struct i915_timeline_cacheline *hwsp_cacheline;
+
 	/** Position in the ring of the start of the request */
 	u32 head;
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 87a80558da28..e3f2617cf02c 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -6,19 +6,29 @@
 
 #include "i915_drv.h"
 
-#include "i915_timeline.h"
+#include "i915_active.h"
 #include "i915_syncmap.h"
+#include "i915_timeline.h"
 
 struct i915_timeline_hwsp {
-	struct i915_vma *vma;
+	struct i915_gt_timelines *gt;
 	struct list_head free_link;
+	struct i915_vma *vma;
 	u64 free_bitmap;
 };
 
-static inline struct i915_timeline_hwsp *
-i915_timeline_hwsp(const struct i915_timeline *tl)
+struct i915_timeline_cacheline {
+	struct i915_active active;
+	struct i915_timeline_hwsp *hwsp;
+	unsigned long vaddr;
+#define CACHELINE_MASK GENMASK(5, 0)
+#define CACHELINE_FREE BIT(6)
+};
+
+static inline struct drm_i915_private *
+hwsp_to_i915(struct i915_timeline_hwsp *hwsp)
 {
-	return tl->hwsp_ggtt->private;
+	return container_of(hwsp->gt, struct drm_i915_private, gt.timelines);
 }
 
 static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
@@ -71,6 +81,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 		vma->private = hwsp;
 		hwsp->vma = vma;
 		hwsp->free_bitmap = ~0ull;
+		hwsp->gt = gt;
 
 		spin_lock(&gt->hwsp_lock);
 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
@@ -88,14 +99,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 	return hwsp->vma;
 }
 
-static void hwsp_free(struct i915_timeline *timeline)
+static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
 {
-	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
-	struct i915_timeline_hwsp *hwsp;
-
-	hwsp = i915_timeline_hwsp(timeline);
-	if (!hwsp) /* leave global HWSP alone! */
-		return;
+	struct i915_gt_timelines *gt = hwsp->gt;
 
 	spin_lock(&gt->hwsp_lock);
 
@@ -103,7 +109,8 @@ static void hwsp_free(struct i915_timeline *timeline)
 	if (!hwsp->free_bitmap)
 		list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
 
-	hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+	GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
+	hwsp->free_bitmap |= BIT_ULL(cacheline);
 
 	/* And if no one is left using it, give the page back to the system */
 	if (hwsp->free_bitmap == ~0ull) {
@@ -115,6 +122,77 @@ static void hwsp_free(struct i915_timeline *timeline)
 	spin_unlock(&gt->hwsp_lock);
 }
 
+static void __idle_cacheline_free(struct i915_timeline_cacheline *cl)
+{
+	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
+
+	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
+	i915_vma_put(cl->hwsp->vma);
+	__idle_hwsp_free(cl->hwsp, cl->vaddr & CACHELINE_MASK);
+
+	i915_active_fini(&cl->active);
+	kfree(cl);
+}
+
+static void __cacheline_retire(struct i915_active *active)
+{
+	struct i915_timeline_cacheline *cl =
+		container_of(active, typeof(*cl), active);
+
+	i915_vma_unpin(cl->hwsp->vma);
+	if (cl->vaddr & CACHELINE_FREE)
+		__idle_cacheline_free(cl);
+}
+
+static struct i915_timeline_cacheline *
+cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline)
+{
+	struct i915_timeline_cacheline *cl;
+	void *vaddr;
+
+	GEM_BUG_ON(cacheline > CACHELINE_MASK);
+
+	cl = kmalloc(sizeof(*cl), GFP_KERNEL);
+	if (!cl)
+		return ERR_PTR(-ENOMEM);
+
+	vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		kfree(cl);
+		return ERR_CAST(vaddr);
+	}
+
+	i915_vma_get(hwsp->vma);
+	cl->hwsp = hwsp;
+	cl->vaddr = (unsigned long)vaddr;
+	cl->vaddr |= cacheline;
+
+	i915_active_init(hwsp_to_i915(hwsp), &cl->active, __cacheline_retire);
+
+	return cl;
+}
+
+static void cacheline_acquire(struct i915_timeline_cacheline *cl)
+{
+	if (cl && i915_active_acquire(&cl->active))
+		__i915_vma_pin(cl->hwsp->vma);
+}
+
+static void cacheline_release(struct i915_timeline_cacheline *cl)
+{
+	if (cl)
+		i915_active_release(&cl->active);
+}
+
+static void cacheline_free(struct i915_timeline_cacheline *cl)
+{
+	GEM_BUG_ON(cl->vaddr & CACHELINE_FREE);
+	cl->vaddr |= CACHELINE_FREE;
+
+	if (i915_active_is_idle(&cl->active))
+		__idle_cacheline_free(cl);
+}
+
 int i915_timeline_init(struct drm_i915_private *i915,
 		       struct i915_timeline *timeline,
 		       const char *name,
@@ -136,29 +214,40 @@ int i915_timeline_init(struct drm_i915_private *i915,
 	timeline->name = name;
 	timeline->pin_count = 0;
 	timeline->has_initial_breadcrumb = !hwsp;
+	timeline->hwsp_cacheline = NULL;
 
-	timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
 	if (!hwsp) {
+		struct i915_timeline_cacheline *cl;
 		unsigned int cacheline;
 
 		hwsp = hwsp_alloc(timeline, &cacheline);
 		if (IS_ERR(hwsp))
 			return PTR_ERR(hwsp);
 
+		cl = cacheline_alloc(hwsp->private, cacheline);
+		if (IS_ERR(cl)) {
+			__idle_hwsp_free(hwsp->private, cacheline);
+			return PTR_ERR(cl);
+		}
+
+		timeline->hwsp_cacheline = cl;
 		timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
-	}
-	timeline->hwsp_ggtt = i915_vma_get(hwsp);
 
-	vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
-	if (IS_ERR(vaddr)) {
-		hwsp_free(timeline);
-		i915_vma_put(hwsp);
-		return PTR_ERR(vaddr);
+		vaddr = (void *)(cl->vaddr & PAGE_MASK);
+	} else {
+		timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+
+		vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
+		if (IS_ERR(vaddr))
+			return PTR_ERR(vaddr);
 	}
 
 	timeline->hwsp_seqno =
 		memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
 
+	timeline->hwsp_ggtt = i915_vma_get(hwsp);
+	GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
+
 	timeline->fence_context = dma_fence_context_alloc(1);
 
 	spin_lock_init(&timeline->lock);
@@ -240,9 +329,12 @@ void i915_timeline_fini(struct i915_timeline *timeline)
 	GEM_BUG_ON(i915_active_request_isset(&timeline->barrier));
 
 	i915_syncmap_free(&timeline->sync);
-	hwsp_free(timeline);
 
-	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+	if (timeline->hwsp_cacheline)
+		cacheline_free(timeline->hwsp_cacheline);
+	else
+		i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+
 	i915_vma_put(timeline->hwsp_ggtt);
 }
 
@@ -285,6 +377,7 @@ int i915_timeline_pin(struct i915_timeline *tl)
 		i915_ggtt_offset(tl->hwsp_ggtt) +
 		offset_in_page(tl->hwsp_offset);
 
+	cacheline_acquire(tl->hwsp_cacheline);
 	timeline_add_to_active(tl);
 
 	return 0;
@@ -294,6 +387,144 @@ int i915_timeline_pin(struct i915_timeline *tl)
 	return err;
 }
 
+static u32 timeline_advance(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
+
+	return tl->seqno += 1 + tl->has_initial_breadcrumb;
+}
+
+static void timeline_rollback(struct i915_timeline *tl)
+{
+	tl->seqno -= 1 + tl->has_initial_breadcrumb;
+}
+
+static noinline int
+__i915_timeline_get_seqno(struct i915_timeline *tl,
+			  struct i915_request *rq,
+			  u32 *seqno)
+{
+	struct i915_timeline_cacheline *cl;
+	unsigned int cacheline;
+	struct i915_vma *vma;
+	void *vaddr;
+	int err;
+
+	/*
+	 * If there is an outstanding GPU reference to this cacheline,
+	 * such as it being sampled by a HW semaphore on another timeline,
+	 * we cannot wraparound our seqno value (the HW semaphore does
+	 * a strict greater-than-or-equals compare, not i915_seqno_passed).
+	 * So if the cacheline is still busy, we must detach ourselves
+	 * from it and leave it inflight alongside its users.
+	 *
+	 * However, if nobody is watching and we can guarantee that nobody
+	 * will, we could simply reuse the same cacheline.
+	 *
+	 * if (i915_active_request_is_signaled(&tl->last_request) &&
+	 *     i915_active_is_signaled(&tl->hwsp_cacheline->active))
+	 *	return 0;
+	 *
+	 * That seems unlikely for a busy timeline that needed to wrap in
+	 * the first place, so just replace the cacheline.
+	 */
+
+	vma = hwsp_alloc(tl, &cacheline);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_rollback;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (err) {
+		__idle_hwsp_free(vma->private, cacheline);
+		goto err_rollback;
+	}
+
+	cl = cacheline_alloc(vma->private, cacheline);
+	if (IS_ERR(cl)) {
+		err = PTR_ERR(cl);
+		__idle_hwsp_free(vma->private, cacheline);
+		goto err_unpin;
+	}
+	GEM_BUG_ON(cl->hwsp->vma != vma);
+
+	/*
+	 * Attach the old cacheline to the current request, so that we only
+	 * free it after the current request is retired, which ensures that
+	 * all writes into the cacheline from previous requests are complete.
+	 */
+	err = i915_active_ref(&tl->hwsp_cacheline->active,
+			      tl->fence_context, rq);
+	if (err)
+		goto err_cacheline;
+
+	cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
+	cacheline_free(tl->hwsp_cacheline);
+
+	i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
+	i915_vma_put(tl->hwsp_ggtt);
+
+	tl->hwsp_ggtt = i915_vma_get(vma);
+
+	vaddr = (void *)(cl->vaddr & PAGE_MASK);
+	tl->hwsp_offset = cacheline * CACHELINE_BYTES;
+	tl->hwsp_seqno =
+		memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
+
+	tl->hwsp_offset += i915_ggtt_offset(vma);
+
+	cacheline_acquire(cl);
+	tl->hwsp_cacheline = cl;
+
+	*seqno = timeline_advance(tl);
+	GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
+	return 0;
+
+err_cacheline:
+	cacheline_free(cl);
+err_unpin:
+	i915_vma_unpin(vma);
+err_rollback:
+	timeline_rollback(tl);
+	return err;
+}
+
+int i915_timeline_get_seqno(struct i915_timeline *tl,
+			    struct i915_request *rq,
+			    u32 *seqno)
+{
+	*seqno = timeline_advance(tl);
+
+	/* Replace the HWSP on wraparound for HW semaphores */
+	if (unlikely(!*seqno && tl->hwsp_cacheline))
+		return __i915_timeline_get_seqno(tl, rq, seqno);
+
+	return 0;
+}
+
+static int cacheline_ref(struct i915_timeline_cacheline *cl,
+			 struct i915_request *rq)
+{
+	return i915_active_ref(&cl->active, rq->fence.context, rq);
+}
+
+int i915_timeline_read_lock(struct i915_request *from, struct i915_request *to)
+{
+	int err;
+
+	GEM_BUG_ON(to->timeline == from->timeline);
+
+	mutex_lock_nested(&from->timeline->mutex, SINGLE_DEPTH_NESTING);
+	err = i915_request_completed(from);
+	if (!err)
+		err = cacheline_ref(from->hwsp_cacheline, to);
+	mutex_unlock(&from->timeline->mutex);
+
+	return err;
+}
+
 void i915_timeline_unpin(struct i915_timeline *tl)
 {
 	GEM_BUG_ON(!tl->pin_count);
@@ -301,6 +532,7 @@ void i915_timeline_unpin(struct i915_timeline *tl)
 		return;
 
 	timeline_remove_from_active(tl);
+	cacheline_release(tl->hwsp_cacheline);
 
 	/*
 	 * Since this timeline is idle, all bariers upon which we were waiting
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 36c3849f7108..5a674260f5b9 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -34,7 +34,7 @@
 #include "i915_utils.h"
 
 struct i915_vma;
-struct i915_timeline_hwsp;
+struct i915_timeline_cacheline;
 
 struct i915_timeline {
 	u64 fence_context;
@@ -51,6 +51,8 @@ struct i915_timeline {
 	struct i915_vma *hwsp_ggtt;
 	u32 hwsp_offset;
 
+	struct i915_timeline_cacheline *hwsp_cacheline;
+
 	bool has_initial_breadcrumb;
 
 	/**
@@ -162,8 +164,14 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 }
 
 int i915_timeline_pin(struct i915_timeline *tl);
+int i915_timeline_get_seqno(struct i915_timeline *tl,
+			    struct i915_request *rq,
+			    u32 *seqno);
 void i915_timeline_unpin(struct i915_timeline *tl);
 
+int i915_timeline_read_lock(struct i915_request *from,
+			    struct i915_request *until);
+
 void i915_timelines_init(struct drm_i915_private *i915);
 void i915_timelines_park(struct drm_i915_private *i915);
 void i915_timelines_fini(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index 12ea69b1a1e5..844701759ffc 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -641,6 +641,118 @@ static int live_hwsp_alternate(void *arg)
 #undef NUM_TIMELINES
 }
 
+static int live_hwsp_wrap(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_timeline *tl;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	int err = 0;
+
+	/*
+	 * Across a seqno wrap, we need to keep the old cacheline alive for
+	 * foreign GPU references.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	tl = i915_timeline_create(i915, __func__, NULL);
+	if (IS_ERR(tl)) {
+		err = PTR_ERR(tl);
+		goto out_rpm;
+	}
+	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
+		goto out_free;
+
+	err = i915_timeline_pin(tl);
+	if (err)
+		goto out_free;
+
+	for_each_engine(engine, i915, id) {
+		const u32 *hwsp_seqno[2];
+		struct i915_request *rq;
+		u32 seqno[2];
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		rq = i915_request_alloc(engine, i915->kernel_context);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out;
+		}
+
+		tl->seqno = -4u;
+
+		err = i915_timeline_get_seqno(tl, rq, &seqno[0]);
+		if (err) {
+			i915_request_add(rq);
+			goto out;
+		}
+		pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
+			 seqno[0], tl->hwsp_offset);
+
+		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
+		if (err) {
+			i915_request_add(rq);
+			goto out;
+		}
+		hwsp_seqno[0] = tl->hwsp_seqno;
+
+		err = i915_timeline_get_seqno(tl, rq, &seqno[1]);
+		if (err) {
+			i915_request_add(rq);
+			goto out;
+		}
+		pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
+			 seqno[1], tl->hwsp_offset);
+
+		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
+		if (err) {
+			i915_request_add(rq);
+			goto out;
+		}
+		hwsp_seqno[1] = tl->hwsp_seqno;
+
+		/* With wrap should come a new hwsp */
+		GEM_BUG_ON(seqno[1] >= seqno[0]);
+		GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
+
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+			pr_err("Wait for timeline writes timed out!\n");
+			err = -EIO;
+			goto out;
+		}
+
+		if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
+			pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
+			       *hwsp_seqno[0], *hwsp_seqno[1],
+			       seqno[0], seqno[1]);
+			err = -EINVAL;
+			goto out;
+		}
+
+		i915_retire_requests(i915); /* recycle HWSP */
+	}
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	i915_timeline_unpin(tl);
+out_free:
+	i915_timeline_put(tl);
+out_rpm:
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return err;
+}
+
 static int live_hwsp_recycle(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -723,6 +835,7 @@ int i915_timeline_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_hwsp_recycle),
 		SUBTEST(live_hwsp_engine),
 		SUBTEST(live_hwsp_alternate),
+		SUBTEST(live_hwsp_wrap),
 	};
 
 	return i915_subtests(tests, i915);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 21/25] drm/i915: Compute the global scheduler caps
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (18 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 20/25] drm/i915: Keep timeline HWSP allocated until idle across the system Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 22/25] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Chris Wilson
                   ` (8 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

Do a pass over all the engines upon starting to determine the global
scheduler capability flags (those that are agreed upon by all).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c         |  2 ++
 drivers/gpu/drm/i915/intel_engine_cs.c  | 39 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        |  6 ----
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 652754864edc..ccc5e03e0e9f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4698,6 +4698,8 @@ static int __i915_gem_restart_engines(void *data)
 		}
 	}
 
+	intel_engines_set_scheduler_caps(i915);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 45ba3a71d17b..8c22f1b6c891 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -608,6 +608,45 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
 	return err;
 }
 
+void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
+{
+	static const struct {
+		u8 engine;
+		u8 sched;
+	} map[] = {
+#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) }
+		MAP(PREEMPTION, PREEMPTION),
+#undef MAP
+	};
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 enabled, disabled;
+
+	enabled = 0;
+	disabled = 0;
+	for_each_engine(engine, i915, id) { /* all engines must agree! */
+		int i;
+
+		if (engine->schedule)
+			enabled |= (I915_SCHEDULER_CAP_ENABLED |
+				    I915_SCHEDULER_CAP_PRIORITY);
+		else
+			disabled |= (I915_SCHEDULER_CAP_ENABLED |
+				     I915_SCHEDULER_CAP_PRIORITY);
+
+		for (i = 0; i < ARRAY_SIZE(map); i++) {
+			if (engine->flags & BIT(map[i].engine))
+				enabled |= BIT(map[i].sched);
+			else
+				disabled |= BIT(map[i].sched);
+		}
+	}
+
+	i915->caps.scheduler = enabled & ~disabled;
+	if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED))
+		i915->caps.scheduler = 0;
+}
+
 static void __intel_context_unpin(struct i915_gem_context *ctx,
 				  struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d1695037f9ca..76c8fd5cd229 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2327,12 +2327,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (engine->i915->preempt_context)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-
-	engine->i915->caps.scheduler =
-		I915_SCHEDULER_CAP_ENABLED |
-		I915_SCHEDULER_CAP_PRIORITY;
-	if (intel_engine_has_preemption(engine))
-		engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5284f243931a..b8ec7e40a59b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -576,6 +576,8 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
 }
 
+void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
+
 static inline bool __execlists_need_preempt(int prio, int last)
 {
 	/*
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 22/25] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (19 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 21/25] drm/i915: Compute the global scheduler caps Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 23/25] drm/i915: Prioritise non-busywait semaphore workloads Chris Wilson
                   ` (7 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

Having introduced per-context seqno, we now have a means to identity
progress across the system without feel of rollback as befell the
global_seqno. That is we can program a MI_SEMAPHORE_WAIT operation in
advance of submission safe in the knowledge that our target seqno and
address is stable.

However, since we are telling the GPU to busy-spin on the target address
until it matches the signaling seqno, we only want to do so when we are
sure that busy-spin will be completed quickly. To achieve this we only
submit the request to HW once the signaler is itself executing (modulo
preemption causing us to wait longer), and we only do so for default and
above priority requests (so that idle priority tasks never themselves
hog the GPU waiting for others).

As might be reasonably expected, HW semaphores excel in inter-engine
synchronisation microbenchmarks (where the reduced latency / increased
throughput more than offset the power cost of spinning on a second ring)
and have significant improvement for single clients that utilize multiple
engines (typically media players), without regressing multiple clients
that can saturate the system.

v3: Drop the older NEQ branch, now we pin the signaler's HWSP anyway.
v4: Tell the world and include it as part of scheduler caps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c           |   2 +-
 drivers/gpu/drm/i915/i915_request.c       | 137 +++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_request.h       |   1 +
 drivers/gpu/drm/i915/i915_sw_fence.c      |   4 +-
 drivers/gpu/drm/i915/i915_sw_fence.h      |   3 +
 drivers/gpu/drm/i915/intel_engine_cs.c    |   1 +
 drivers/gpu/drm/i915/intel_gpu_commands.h |   5 +
 drivers/gpu/drm/i915/intel_lrc.c          |   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h   |   7 ++
 include/uapi/drm/i915_drm.h               |   1 +
 10 files changed, 157 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6630212f2faf..20894e373ee7 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -351,7 +351,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL);
 		break;
 	case I915_PARAM_HAS_SEMAPHORES:
-		value = 0;
+		value = !!(dev_priv->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES);
 		break;
 	case I915_PARAM_HAS_SECURE_BATCHES:
 		value = capable(CAP_SYS_ADMIN);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6367c1ca6b5f..439816b48eb6 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -22,8 +22,9 @@
  *
  */
 
-#include <linux/prefetch.h>
 #include <linux/dma-fence-array.h>
+#include <linux/irq_work.h>
+#include <linux/prefetch.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/signal.h>
@@ -32,9 +33,16 @@
 #include "i915_active.h"
 #include "i915_reset.h"
 
+struct execute_cb {
+	struct list_head link;
+	struct irq_work work;
+	struct i915_sw_fence *fence;
+};
+
 static struct i915_global_request {
 	struct kmem_cache *slab_requests;
 	struct kmem_cache *slab_dependencies;
+	struct kmem_cache *slab_execute_cbs;
 } global;
 
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
@@ -325,6 +333,69 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (tmp != rq);
 }
 
+static void irq_execute_cb(struct irq_work *wrk)
+{
+	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+	i915_sw_fence_complete(cb->fence);
+	kmem_cache_free(global.slab_execute_cbs, cb);
+}
+
+static void __notify_execute_cb(struct i915_request *rq)
+{
+	struct execute_cb *cb;
+
+	lockdep_assert_held(&rq->lock);
+
+	if (list_empty(&rq->execute_cb))
+		return;
+
+	list_for_each_entry(cb, &rq->execute_cb, link)
+		irq_work_queue(&cb->work);
+
+	/*
+	 * XXX Rollback on __i915_request_unsubmit()
+	 *
+	 * In the future, perhaps when we have an active time-slicing scheduler,
+	 * it will be interesting to unsubmit parallel execution and remove
+	 * busywaits from the GPU until their master is restarted. This is
+	 * quite hairy, we have to carefully rollback the fence and do a
+	 * preempt-to-idle cycle on the target engine, all the while the
+	 * master execute_cb may refire.
+	 */
+	INIT_LIST_HEAD(&rq->execute_cb);
+}
+
+static int
+i915_request_await_execution(struct i915_request *rq,
+			     struct i915_request *signal,
+			     gfp_t gfp)
+{
+	struct execute_cb *cb;
+
+	if (i915_request_is_active(signal))
+		return 0;
+
+	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
+	if (!cb)
+		return -ENOMEM;
+
+	cb->fence = &rq->submit;
+	i915_sw_fence_await(cb->fence);
+	init_irq_work(&cb->work, irq_execute_cb);
+
+	spin_lock_irq(&signal->lock);
+	if (i915_request_is_active(signal)) {
+		i915_sw_fence_complete(cb->fence);
+		kmem_cache_free(global.slab_execute_cbs, cb);
+	} else {
+		list_add_tail(&cb->link, &signal->execute_cb);
+	}
+	spin_unlock_irq(&signal->lock);
+
+	return 0;
+}
+
 static void move_to_timeline(struct i915_request *request,
 			     struct i915_timeline *timeline)
 {
@@ -361,6 +432,8 @@ void __i915_request_submit(struct i915_request *request)
 	    !i915_request_enable_breadcrumb(request))
 		intel_engine_queue_breadcrumbs(engine);
 
+	__notify_execute_cb(request);
+
 	spin_unlock(&request->lock);
 
 	engine->emit_fini_breadcrumb(request,
@@ -607,6 +680,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	}
 
 	INIT_LIST_HEAD(&rq->active_list);
+	INIT_LIST_HEAD(&rq->execute_cb);
 
 	tl = ce->ring->timeline;
 	ret = i915_timeline_get_seqno(tl, rq, &seqno);
@@ -695,6 +769,51 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	return ERR_PTR(ret);
 }
 
+static int
+emit_semaphore_wait(struct i915_request *to,
+		    struct i915_request *from,
+		    gfp_t gfp)
+{
+	u32 *cs;
+	int err;
+
+	GEM_BUG_ON(!from->timeline->has_initial_breadcrumb);
+	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
+
+	/* We need to pin the signaler's HWSP until we are finished reading. */
+	err = i915_timeline_read_lock(from, to);
+	if (err)
+		return err;
+
+	/* Only submit our spinner after the signaler is running! */
+	err = i915_request_await_execution(to, from, gfp);
+	if (err)
+		return err;
+
+	cs = intel_ring_begin(to, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/*
+	 * Using greater-than-or-equal here means we have to worry
+	 * about seqno wraparound. To side step that issue, we swap
+	 * the timeline HWSP upon wrapping, so that everyone listening
+	 * for the old (pre-wrap) values do not see the much smaller
+	 * (post-wrap) values than they were expecting (and so wait
+	 * forever).
+	 */
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_GTE_SDD;
+	*cs++ = from->fence.seqno;
+	*cs++ = from->timeline->hwsp_offset;
+	*cs++ = 0;
+
+	intel_ring_advance(to, cs);
+	return 0;
+}
+
 static int
 i915_request_await_request(struct i915_request *to, struct i915_request *from)
 {
@@ -716,6 +835,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
 						       I915_FENCE_GFP);
+	} else if (intel_engine_has_semaphores(to->engine) &&
+		   to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
+		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
 	} else {
 		ret = i915_sw_fence_await_dma_fence(&to->submit,
 						    &from->fence, 0,
@@ -1207,14 +1329,23 @@ int __init i915_global_request_init(void)
 	if (!global.slab_requests)
 		return -ENOMEM;
 
+	global.slab_execute_cbs = KMEM_CACHE(execute_cb,
+					     SLAB_HWCACHE_ALIGN |
+					     SLAB_RECLAIM_ACCOUNT |
+					     SLAB_TYPESAFE_BY_RCU);
+	if (!global.slab_execute_cbs)
+		goto err_requests;
+
 	global.slab_dependencies = KMEM_CACHE(i915_dependency,
 					      SLAB_HWCACHE_ALIGN |
 					      SLAB_RECLAIM_ACCOUNT);
 	if (!global.slab_dependencies)
-		goto err_requests;
+		goto err_execute_cbs;
 
 	return 0;
 
+err_execute_cbs:
+	kmem_cache_destroy(global.slab_execute_cbs);
 err_requests:
 	kmem_cache_destroy(global.slab_requests);
 	return -ENOMEM;
@@ -1223,11 +1354,13 @@ int __init i915_global_request_init(void)
 void i915_global_request_shrink(void)
 {
 	kmem_cache_shrink(global.slab_dependencies);
+	kmem_cache_shrink(global.slab_execute_cbs);
 	kmem_cache_shrink(global.slab_requests);
 }
 
 void i915_global_request_exit(void)
 {
 	kmem_cache_destroy(global.slab_dependencies);
+	kmem_cache_destroy(global.slab_execute_cbs);
 	kmem_cache_destroy(global.slab_requests);
 }
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index ea1e6f0ade53..3e0d62d35226 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -129,6 +129,7 @@ struct i915_request {
 	 */
 	struct i915_sw_fence submit;
 	wait_queue_entry_t submitq;
+	struct list_head execute_cb;
 
 	/*
 	 * A list of everyone we wait upon, and everyone who waits upon us.
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 7c58b049ecb5..8d1400d378d7 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -192,7 +192,7 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
 	__i915_sw_fence_notify(fence, FENCE_FREE);
 }
 
-static void i915_sw_fence_complete(struct i915_sw_fence *fence)
+void i915_sw_fence_complete(struct i915_sw_fence *fence)
 {
 	debug_fence_assert(fence);
 
@@ -202,7 +202,7 @@ static void i915_sw_fence_complete(struct i915_sw_fence *fence)
 	__i915_sw_fence_complete(fence, NULL);
 }
 
-static void i915_sw_fence_await(struct i915_sw_fence *fence)
+void i915_sw_fence_await(struct i915_sw_fence *fence)
 {
 	debug_fence_assert(fence);
 	WARN_ON(atomic_inc_return(&fence->pending) <= 1);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 0e055ea0179f..6dec9e1d1102 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -79,6 +79,9 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    unsigned long timeout,
 				    gfp_t gfp);
 
+void i915_sw_fence_await(struct i915_sw_fence *fence);
+void i915_sw_fence_complete(struct i915_sw_fence *fence);
+
 static inline bool i915_sw_fence_signaled(const struct i915_sw_fence *fence)
 {
 	return atomic_read(&fence->pending) <= 0;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 8c22f1b6c891..d0cb6c48c494 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -616,6 +616,7 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 	} map[] = {
 #define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) }
 		MAP(PREEMPTION, PREEMPTION),
+		MAP(SEMAPHORES, SEMAPHORES),
 #undef MAP
 	};
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
index b96a31bc1080..0efaadd3bc32 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -106,7 +106,12 @@
 #define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
 #define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
 #define   MI_SEMAPHORE_POLL		(1<<15)
+#define   MI_SEMAPHORE_SAD_GT_SDD	(0<<12)
 #define   MI_SEMAPHORE_SAD_GTE_SDD	(1<<12)
+#define   MI_SEMAPHORE_SAD_LT_SDD	(2<<12)
+#define   MI_SEMAPHORE_SAD_LTE_SDD	(3<<12)
+#define   MI_SEMAPHORE_SAD_EQ_SDD	(4<<12)
+#define   MI_SEMAPHORE_SAD_NEQ_SDD	(5<<12)
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 #define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 76c8fd5cd229..75881162235d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2324,6 +2324,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->park = NULL;
 	engine->unpark = NULL;
 
+	engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (engine->i915->preempt_context)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b8ec7e40a59b..2e7264119ec4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -499,6 +499,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
+#define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 	unsigned int flags;
 
 	/*
@@ -576,6 +577,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
 }
 
+static inline bool
+intel_engine_has_semaphores(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
+}
+
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
 
 static inline bool __execlists_need_preempt(int prio, int last)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8304a7f1ec3f..b10eea3f6d24 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -479,6 +479,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_ENABLED	(1ul << 0)
 #define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
+#define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
 
 #define I915_PARAM_HUC_STATUS		 42
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 23/25] drm/i915: Prioritise non-busywait semaphore workloads
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (20 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 22/25] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 24/25] drm/i915/execlists: Skip direct submission if only lite-restore Chris Wilson
                   ` (6 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

We don't want to busywait on the GPU if we have other work to do. If we
give non-busywaiting workloads higher (initial) priority than workloads
that require a busywait, we will prioritise work that is ready to run
immediately. We then also have to be careful that we don't give earlier
semaphores an accidental boost because later work doesn't wait on other
rings, hence we keep a history of semaphore usage of the dependency chain.

Testcase: igt/gem_exec_schedule/semaphore
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.c |  5 +++++
 drivers/gpu/drm/i915/i915_scheduler.h |  9 ++++++---
 drivers/gpu/drm/i915/intel_lrc.c      |  2 +-
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 439816b48eb6..8279f7b04194 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -811,6 +811,7 @@ emit_semaphore_wait(struct i915_request *to,
 	*cs++ = 0;
 
 	intel_ring_advance(to, cs);
+	to->sched.semaphore |= I915_SCHED_HAS_SEMAPHORE;
 	return 0;
 }
 
@@ -1081,6 +1082,21 @@ void i915_request_add(struct i915_request *request)
 	if (engine->schedule) {
 		struct i915_sched_attr attr = request->gem_context->sched;
 
+		/*
+		 * Boost actual workloads past semaphores!
+		 *
+		 * With semaphores we spin on one engine waiting for another,
+		 * simply to reduce the latency of starting our work when
+		 * the signaler completes. However, if there is any other
+		 * work that we could be doing on this engine instead, that
+		 * is better utilisation and will reduce the overall duration
+		 * of the current work. To avoid PI boosting a semaphore
+		 * far in the distance past over useful work, we keep a history
+		 * of any semaphore use along our dependency chain.
+		 */
+		if (!request->sched.semaphore)
+			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
+
 		/*
 		 * Boost priorities to new clients (new request flows).
 		 *
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 50018ad30233..fd684b9ed108 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -39,6 +39,7 @@ void i915_sched_node_init(struct i915_sched_node *node)
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
 	node->attr.priority = I915_PRIORITY_INVALID;
+	node->semaphore = 0;
 }
 
 static struct i915_dependency *
@@ -69,6 +70,10 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 		dep->signaler = signal;
 		dep->flags = flags;
 
+		/* Keep track of whether anyone on this chain has a semaphore */
+		if (signal->semaphore && !node_started(signal))
+			node->semaphore |= signal->semaphore << 1;
+
 		ret = true;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 5196ce07b6c2..24c2c027fd2c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -24,14 +24,15 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 2
+#define I915_USER_PRIORITY_SHIFT 3
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
 
-#define I915_PRIORITY_WAIT	((u8)BIT(0))
-#define I915_PRIORITY_NEWCLIENT	((u8)BIT(1))
+#define I915_PRIORITY_WAIT		((u8)BIT(0))
+#define I915_PRIORITY_NEWCLIENT		((u8)BIT(1))
+#define I915_PRIORITY_NOSEMAPHORE	((u8)BIT(2))
 
 #define __NO_PREEMPTION (I915_PRIORITY_WAIT)
 
@@ -74,6 +75,8 @@ struct i915_sched_node {
 	struct list_head waiters_list; /* those after us, they depend upon us */
 	struct list_head link;
 	struct i915_sched_attr attr;
+	unsigned long semaphore;
+#define I915_SCHED_HAS_SEMAPHORE	BIT(0)
 };
 
 struct i915_dependency {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 75881162235d..5e9cff19a6b6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,7 +164,7 @@
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
-#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT)
+#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 24/25] drm/i915/execlists: Skip direct submission if only lite-restore
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (21 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 23/25] drm/i915: Prioritise non-busywait semaphore workloads Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:22 ` [PATCH 25/25] drm/i915: Use __ffs() in for_each_priolist for more compact code Chris Wilson
                   ` (5 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

If we resubmitting the active context, simply skip the submission as
performing the submission from the interrupt handler has higher
throughput than continually provoking lite-restores. If however, we find
ourselves with a new client, we check whether or not we can dequeue into
the second port or to resolve preemption.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5e9cff19a6b6..e2e219156fbf 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1205,10 +1205,22 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 		tasklet_hi_schedule(&execlists->tasklet);
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static bool inflight(const struct intel_engine_execlists *execlists,
+		     const struct i915_request *rq)
 {
-	if (prio > engine->execlists.queue_priority_hint) {
-		engine->execlists.queue_priority_hint = prio;
+	const struct i915_request *active = port_request(execlists->port);
+
+	return active && active->hw_context == rq->hw_context;
+}
+
+static void submit_queue(struct intel_engine_cs *engine,
+			 const struct i915_request *rq)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	if (rq_prio(rq) > execlists->queue_priority_hint &&
+	    !inflight(execlists, rq)) {
+		execlists->queue_priority_hint = rq_prio(rq);
 		__submit_queue_imm(engine);
 	}
 }
@@ -1226,7 +1238,7 @@ static void execlists_submit_request(struct i915_request *request)
 	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
-	submit_queue(engine, rq_prio(request));
+	submit_queue(engine, request);
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 25/25] drm/i915: Use __ffs() in for_each_priolist for more compact code
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (22 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 24/25] drm/i915/execlists: Skip direct submission if only lite-restore Chris Wilson
@ 2019-02-19 12:22 ` Chris Wilson
  2019-02-19 12:56 ` [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Ville Syrjälä
                   ` (4 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 12:22 UTC (permalink / raw)
  To: intel-gfx

Gcc has a slight preference if we use __ffs() to subtract one from the
index once rather than each use:

__execlists_submission_tasklet              2867    2847     -20

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_scheduler.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 24c2c027fd2c..068a6750540f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -100,9 +100,11 @@ struct i915_priolist {
 		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
 
 #define priolist_for_each_request_consume(it, n, plist, idx) \
-	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+	for (; \
+	     (plist)->used ? (idx = __ffs((plist)->used)), 1 : 0; \
+	     (plist)->used &= ~BIT(idx)) \
 		list_for_each_entry_safe(it, n, \
-					 &(plist)->requests[idx - 1], \
+					 &(plist)->requests[idx], \
 					 sched.link)
 
 void i915_sched_node_init(struct i915_sched_node *node);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH 07/25] drm/i915: Trim delays for wedging
  2019-02-19 12:21 ` [PATCH 07/25] drm/i915: Trim delays for wedging Chris Wilson
@ 2019-02-19 12:48   ` Mika Kuoppala
  0 siblings, 0 replies; 43+ messages in thread
From: Mika Kuoppala @ 2019-02-19 12:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> CI still reports the occasional multi-second delay for resets, in
> particular along the wedge+recovery paths. As the likely, and unbounded,
> delay here is from sync_rcu, use the expedited variant instead.
>
> Testcase: igt/gem_eio/unwedge-stress
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_reset.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
> index 358ab1d51570..9f6da5e4b007 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/i915_reset.c
> @@ -835,7 +835,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
>  	 * either this call here to intel_engine_write_global_seqno, or the one
>  	 * in nop_submit_request.
>  	 */
> -	synchronize_rcu();
> +	synchronize_rcu_expedited();
>  
>  	/* Mark all executing requests as skipped */
>  	for_each_engine(engine, i915, id)
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 01/25] drm/i915: Move verify_wm_state() to heap
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (23 preceding siblings ...)
  2019-02-19 12:22 ` [PATCH 25/25] drm/i915: Use __ffs() in for_each_priolist for more compact code Chris Wilson
@ 2019-02-19 12:56 ` Ville Syrjälä
  2019-02-19 13:05 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/25] " Patchwork
                   ` (3 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Ville Syrjälä @ 2019-02-19 12:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Feb 19, 2019 at 12:21:51PM +0000, Chris Wilson wrote:
> The stack usage exceeded 1024 bytes prompting warnings on conservative
> setups, so move the temporary allocation for HW readback onto the heap.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_display.c | 48 ++++++++++++++++++----------
>  1 file changed, 31 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 3b4a6eeb4573..415d8968f2c5 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -12227,12 +12227,15 @@ static void verify_wm_state(struct drm_crtc *crtc,
>  			    struct drm_crtc_state *new_state)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> -	struct skl_ddb_allocation hw_ddb, *sw_ddb;
> -	struct skl_pipe_wm hw_wm, *sw_wm;
> -	struct skl_plane_wm *hw_plane_wm, *sw_plane_wm;
> +	struct skl_hw_state {
> +		struct skl_ddb_entry ddb_y[I915_MAX_PLANES];
> +		struct skl_ddb_entry ddb_uv[I915_MAX_PLANES];
> +		struct skl_ddb_allocation ddb;
> +		struct skl_pipe_wm wm;
> +	} *hw;
> +	struct skl_ddb_allocation *sw_ddb;
> +	struct skl_pipe_wm *sw_wm;
>  	struct skl_ddb_entry *hw_ddb_entry, *sw_ddb_entry;
> -	struct skl_ddb_entry hw_ddb_y[I915_MAX_PLANES];
> -	struct skl_ddb_entry hw_ddb_uv[I915_MAX_PLANES];
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	const enum pipe pipe = intel_crtc->pipe;
>  	int plane, level, max_level = ilk_wm_max_level(dev_priv);
> @@ -12240,22 +12243,29 @@ static void verify_wm_state(struct drm_crtc *crtc,
>  	if (INTEL_GEN(dev_priv) < 9 || !new_state->active)
>  		return;
>  
> -	skl_pipe_wm_get_hw_state(intel_crtc, &hw_wm);
> +	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
> +	if (!hw)
> +		return;
> +
> +	skl_pipe_wm_get_hw_state(intel_crtc, &hw->wm);
>  	sw_wm = &to_intel_crtc_state(new_state)->wm.skl.optimal;
>  
> -	skl_pipe_ddb_get_hw_state(intel_crtc, hw_ddb_y, hw_ddb_uv);
> +	skl_pipe_ddb_get_hw_state(intel_crtc, hw->ddb_y, hw->ddb_uv);
>  
> -	skl_ddb_get_hw_state(dev_priv, &hw_ddb);
> +	skl_ddb_get_hw_state(dev_priv, &hw->ddb);
>  	sw_ddb = &dev_priv->wm.skl_hw.ddb;
>  
> -	if (INTEL_GEN(dev_priv) >= 11)
> -		if (hw_ddb.enabled_slices != sw_ddb->enabled_slices)
> -			DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
> -				  sw_ddb->enabled_slices,
> -				  hw_ddb.enabled_slices);
> +	if (INTEL_GEN(dev_priv) >= 11 &&
> +	    hw->ddb.enabled_slices != sw_ddb->enabled_slices)
> +		DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
> +			  sw_ddb->enabled_slices,
> +			  hw->ddb.enabled_slices);
> +
>  	/* planes */
>  	for_each_universal_plane(dev_priv, pipe, plane) {
> -		hw_plane_wm = &hw_wm.planes[plane];
> +		struct skl_plane_wm *hw_plane_wm, *sw_plane_wm;
> +
> +		hw_plane_wm = &hw->wm.planes[plane];
>  		sw_plane_wm = &sw_wm->planes[plane];
>  
>  		/* Watermarks */
> @@ -12287,7 +12297,7 @@ static void verify_wm_state(struct drm_crtc *crtc,
>  		}
>  
>  		/* DDB */
> -		hw_ddb_entry = &hw_ddb_y[plane];
> +		hw_ddb_entry = &hw->ddb_y[plane];
>  		sw_ddb_entry = &to_intel_crtc_state(new_state)->wm.skl.plane_ddb_y[plane];
>  
>  		if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) {
> @@ -12305,7 +12315,9 @@ static void verify_wm_state(struct drm_crtc *crtc,
>  	 * once the plane becomes visible, we can skip this check
>  	 */
>  	if (1) {
> -		hw_plane_wm = &hw_wm.planes[PLANE_CURSOR];
> +		struct skl_plane_wm *hw_plane_wm, *sw_plane_wm;
> +
> +		hw_plane_wm = &hw->wm.planes[PLANE_CURSOR];
>  		sw_plane_wm = &sw_wm->planes[PLANE_CURSOR];
>  
>  		/* Watermarks */
> @@ -12337,7 +12349,7 @@ static void verify_wm_state(struct drm_crtc *crtc,
>  		}
>  
>  		/* DDB */
> -		hw_ddb_entry = &hw_ddb_y[PLANE_CURSOR];
> +		hw_ddb_entry = &hw->ddb_y[PLANE_CURSOR];
>  		sw_ddb_entry = &to_intel_crtc_state(new_state)->wm.skl.plane_ddb_y[PLANE_CURSOR];
>  
>  		if (!skl_ddb_entry_equal(hw_ddb_entry, sw_ddb_entry)) {
> @@ -12347,6 +12359,8 @@ static void verify_wm_state(struct drm_crtc *crtc,
>  				  hw_ddb_entry->start, hw_ddb_entry->end);
>  		}
>  	}
> +
> +	kfree(hw);
>  }
>  
>  static void
> -- 
> 2.20.1

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/25] drm/i915: Move verify_wm_state() to heap
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (24 preceding siblings ...)
  2019-02-19 12:56 ` [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Ville Syrjälä
@ 2019-02-19 13:05 ` Patchwork
  2019-02-19 13:15 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  28 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2019-02-19 13:05 UTC (permalink / raw)
  To: intel-gfx

== Series Details ==

Series: series starting with [01/25] drm/i915: Move verify_wm_state() to heap
URL   : https://patchwork.freedesktop.org/series/56895/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
297cf65379fb drm/i915: Move verify_wm_state() to heap
5d4b26b32831 drm/i915: Use time based guilty context banning
85102762750a drm/i915: Prevent user context creation while wedged
89678d92f27e drm/i915: Avoid reset lock in writing fence registers
-:23: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#23: 
<4> [125.739679] 00000000a730190a (&dev_priv->gpu_error.reset_backoff_srcu){+.+.}, at: i915_reset_trylock+0x0/0x310 [i915]

total: 0 errors, 1 warnings, 0 checks, 26 lines checked
3ea835086530 drm/i915: Reorder struct_mutex-vs-reset_lock in i915_gem_fault()
fb5e6eae6d83 drm/i915: Trim i915_do_reset() to minimum delays
-:21: CHECK:USLEEP_RANGE: usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt
#21: FILE: drivers/gpu/drm/i915/i915_reset.c:170:
+	udelay(20);

-:27: CHECK:USLEEP_RANGE: usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt
#27: FILE: drivers/gpu/drm/i915/i915_reset.c:175:
+	udelay(20);

total: 0 errors, 0 warnings, 2 checks, 14 lines checked
d24387682d04 drm/i915: Trim delays for wedging
ffa0c4796c50 drm/i915/pmu: Always sample an active ringbuffer
b591a28bf74f drm/i915: Replace global_seqno with a hangcheck heartbeat seqno
435800fa983d drm/i915: Remove access to global seqno in the HWSP
25c28490b7ed drm/i915: Remove i915_request.global_seqno
b3d7cdcd0ead drm/i915/selftests: Exercise resetting during non-user payloads
-:35: WARNING:LINE_SPACING: Missing a blank line after declarations
#35: FILE: drivers/gpu/drm/i915/selftests/intel_hangcheck.c:427:
+	struct drm_file *file;
+	IGT_TIMEOUT(end_time);

-:154: WARNING:LINE_SPACING: Missing a blank line after declarations
#154: FILE: drivers/gpu/drm/i915/selftests/intel_hangcheck.c:546:
+		unsigned int count;
+		IGT_TIMEOUT(end_time);

total: 0 errors, 2 warnings, 0 checks, 230 lines checked
28bacfa762ca drm/i915: Reduce the RPS shock
-:32: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 0d55babc8392 ("drm/i915: Drop stray clearing of rps->last_adj")'
#32: 
References: 0d55babc8392 ("drm/i915: Drop stray clearing of rps->last_adj")

-:33: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 60548c554be2 ("drm/i915: Interactive RPS mode")'
#33: 
References: 60548c554be2 ("drm/i915: Interactive RPS mode")

total: 2 errors, 0 warnings, 0 checks, 18 lines checked
daf35758baa7 drm/i915/pmu: Use GT parked for estimating RC6 while asleep
f4dd099b254f drm/i915: Skip scanning for signalers if we are already inflight
c75163d88a44 drm/i915/execlists: Suppress mere WAIT preemption
8421bba99b2f drm/i915/execlists: Suppress redundant preemption
d3469eadd959 drm/i915: Make request allocation caches global
-:162: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#162: 
new file mode 100644

-:167: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#167: FILE: drivers/gpu/drm/i915/i915_globals.c:1:
+/*

-:286: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#286: FILE: drivers/gpu/drm/i915/i915_globals.h:1:
+/*

-:627: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#627: FILE: drivers/gpu/drm/i915/i915_scheduler.h:95:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:627: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#627: FILE: drivers/gpu/drm/i915/i915_scheduler.h:95:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:631: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#631: FILE: drivers/gpu/drm/i915/i915_scheduler.h:99:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

-:631: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#631: FILE: drivers/gpu/drm/i915/i915_scheduler.h:99:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

total: 0 errors, 3 warnings, 4 checks, 808 lines checked
4ace7a1a1640 drm/i915: Introduce i915_timeline.mutex
f9cf6b93dd12 drm/i915: Keep timeline HWSP allocated until idle across the system
5c7d5f3e9173 drm/i915: Compute the global scheduler caps
21059fb9f4ad drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+
-:334: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#334: FILE: drivers/gpu/drm/i915/intel_gpu_commands.h:109:
+#define   MI_SEMAPHORE_SAD_GT_SDD	(0<<12)
                                  	  ^

-:336: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#336: FILE: drivers/gpu/drm/i915/intel_gpu_commands.h:111:
+#define   MI_SEMAPHORE_SAD_LT_SDD	(2<<12)
                                  	  ^

-:337: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#337: FILE: drivers/gpu/drm/i915/intel_gpu_commands.h:112:
+#define   MI_SEMAPHORE_SAD_LTE_SDD	(3<<12)
                                   	  ^

-:338: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#338: FILE: drivers/gpu/drm/i915/intel_gpu_commands.h:113:
+#define   MI_SEMAPHORE_SAD_EQ_SDD	(4<<12)
                                  	  ^

-:339: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#339: FILE: drivers/gpu/drm/i915/intel_gpu_commands.h:114:
+#define   MI_SEMAPHORE_SAD_NEQ_SDD	(5<<12)
                                   	  ^

total: 0 errors, 0 warnings, 5 checks, 299 lines checked
c2fa0a72ef2f drm/i915: Prioritise non-busywait semaphore workloads
507ffad016de drm/i915/execlists: Skip direct submission if only lite-restore
ac98d5de17a0 drm/i915: Use __ffs() in for_each_priolist for more compact code

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 03/25] drm/i915: Prevent user context creation while wedged
  2019-02-19 12:21 ` [PATCH 03/25] drm/i915: Prevent user context creation while wedged Chris Wilson
@ 2019-02-19 13:07   ` Mika Kuoppala
  2019-02-19 13:11     ` Chris Wilson
  2019-02-19 13:20     ` Chris Wilson
  0 siblings, 2 replies; 43+ messages in thread
From: Mika Kuoppala @ 2019-02-19 13:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Introduce a new ABI method for detecting a wedged driver by reporting
> -EIO from DRM_IOCTL_I915_GEM_CONTEXT_CREATE.

Need more on the 'why' department. What is lacking with
the method of submitting and noticing the wedged?

Also detecting banned from wedged is not trivial.

I am trying to figure out what is the userspace
need and flow wrt this.

If we will have object parameters, should we
convey this type of information with status query?

-Mika

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 7541c6f961b3..7337aa01c361 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -802,18 +802,23 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv)
>  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  				  struct drm_file *file)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct drm_i915_private *i915 = to_i915(dev);
>  	struct drm_i915_gem_context_create *args = data;
>  	struct drm_i915_file_private *file_priv = file->driver_priv;
>  	struct i915_gem_context *ctx;
>  	int ret;
>  
> -	if (!DRIVER_CAPS(dev_priv)->has_logical_contexts)
> +	if (!DRIVER_CAPS(i915)->has_logical_contexts)
>  		return -ENODEV;
>  
>  	if (args->pad != 0)
>  		return -EINVAL;
>  
> +	if (i915_terminally_wedged(&i915->gpu_error)) {
> +		DRM_DEBUG("driver is wedged; banning new ctx!\n");
> +		return -EIO;
> +	}
> +
>  	if (client_is_banned(file_priv)) {
>  		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
>  			  current->comm,
> @@ -826,7 +831,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  	if (ret)
>  		return ret;
>  
> -	ctx = i915_gem_create_context(dev_priv, file_priv);
> +	ctx = i915_gem_create_context(i915, file_priv);
>  	mutex_unlock(&dev->struct_mutex);
>  	if (IS_ERR(ctx))
>  		return PTR_ERR(ctx);
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 03/25] drm/i915: Prevent user context creation while wedged
  2019-02-19 13:07   ` Mika Kuoppala
@ 2019-02-19 13:11     ` Chris Wilson
  2019-02-19 13:31       ` Mika Kuoppala
  2019-02-19 13:20     ` Chris Wilson
  1 sibling, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 13:11 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-02-19 13:07:08)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Introduce a new ABI method for detecting a wedged driver by reporting
> > -EIO from DRM_IOCTL_I915_GEM_CONTEXT_CREATE.
> 
> Need more on the 'why' department. What is lacking with
> the method of submitting and noticing the wedged?

This fits in with

https://lists.freedesktop.org/archives/mesa-dev/2019-February/215469.html

where we are recreating the contexting the context after receiving the
-EIO from execbuf and so this allows us to actually break that loop if
the driver is wedged.
 
> Also detecting banned from wedged is not trivial.

We are not detecting banned per-se, just saying that if the driver is
wedged.
 
> I am trying to figure out what is the userspace
> need and flow wrt this.
> 
> If we will have object parameters, should we
> convey this type of information with status query?

It's not an object property, it's the status of the driver. Currently the
indication is throttle returning -EIO, but that would look mighty
strange in the above recreate_context().
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/25] drm/i915: Move verify_wm_state() to heap
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (25 preceding siblings ...)
  2019-02-19 13:05 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/25] " Patchwork
@ 2019-02-19 13:15 ` Patchwork
  2019-02-19 13:31 ` ✓ Fi.CI.BAT: success " Patchwork
  2019-02-19 17:14 ` ✗ Fi.CI.IGT: failure " Patchwork
  28 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2019-02-19 13:15 UTC (permalink / raw)
  To: intel-gfx

== Series Details ==

Series: series starting with [01/25] drm/i915: Move verify_wm_state() to heap
URL   : https://patchwork.freedesktop.org/series/56895/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Move verify_wm_state() to heap
Okay!

Commit: drm/i915: Use time based guilty context banning
Okay!

Commit: drm/i915: Prevent user context creation while wedged
Okay!

Commit: drm/i915: Avoid reset lock in writing fence registers
Okay!

Commit: drm/i915: Reorder struct_mutex-vs-reset_lock in i915_gem_fault()
-O:drivers/gpu/drm/i915/i915_gem.c:1898:32: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1898:32: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:1898:32: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:1898:32: warning: expression using sizeof(void)

Commit: drm/i915: Trim i915_do_reset() to minimum delays
Okay!

Commit: drm/i915: Trim delays for wedging
Okay!

Commit: drm/i915/pmu: Always sample an active ringbuffer
Okay!

Commit: drm/i915: Replace global_seqno with a hangcheck heartbeat seqno
Okay!

Commit: drm/i915: Remove access to global seqno in the HWSP
Okay!

Commit: drm/i915: Remove i915_request.global_seqno
Okay!

Commit: drm/i915/selftests: Exercise resetting during non-user payloads
Okay!

Commit: drm/i915: Reduce the RPS shock
Okay!

Commit: drm/i915/pmu: Use GT parked for estimating RC6 while asleep
Okay!

Commit: drm/i915: Skip scanning for signalers if we are already inflight
Okay!

Commit: drm/i915/execlists: Suppress mere WAIT preemption
Okay!

Commit: drm/i915/execlists: Suppress redundant preemption
Okay!

Commit: drm/i915: Make request allocation caches global
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3567:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3564:16: warning: expression using sizeof(void)

Commit: drm/i915: Introduce i915_timeline.mutex
Okay!

Commit: drm/i915: Keep timeline HWSP allocated until idle across the system
Okay!

Commit: drm/i915: Compute the global scheduler caps
Okay!

Commit: drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+
-O:drivers/gpu/drm/i915/i915_drv.c:351:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:351:25: warning: expression using sizeof(void)

Commit: drm/i915: Prioritise non-busywait semaphore workloads
Okay!

Commit: drm/i915/execlists: Skip direct submission if only lite-restore
Okay!

Commit: drm/i915: Use __ffs() in for_each_priolist for more compact code
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 03/25] drm/i915: Prevent user context creation while wedged
  2019-02-19 13:07   ` Mika Kuoppala
  2019-02-19 13:11     ` Chris Wilson
@ 2019-02-19 13:20     ` Chris Wilson
  1 sibling, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 13:20 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-02-19 13:07:08)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Introduce a new ABI method for detecting a wedged driver by reporting
> > -EIO from DRM_IOCTL_I915_GEM_CONTEXT_CREATE.
> 
> Need more on the 'why' department. What is lacking with
> the method of submitting and noticing the wedged?
> 
> Also detecting banned from wedged is not trivial.

The biggest problem is our transient wedging, where we use
i915_gem_set_wedged() to cancel inflight rendering to avoid a deadlock
during reset. That is an issue here as we may submit and see -EIO even
though the driver isn't strictly terminally wedged and may reset.

We've even seen that once in practice,
https://bugs.freedesktop.org/show_bug.cgi?id=109580.

Hmm, pre-existing problem requiring more general solution. I'm thinking
along the lines of

	if (i915_terminally_wedged())
		return i915_reset_backoff() ? -EAGAIN : -EIO;

Joy. /o\

ret = i915_reset_backoff();

static int i915_reset_backoff()
{
	unsigned long flags = READ_ONCE(error->flags);

	if (!(flags & WEDGED))
		return 0;
	
	if (flags & BACKOFF)
		return -EAGAIN;

	return -EIO;
}

Seems eerily familiar, like a full circle.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/25] drm/i915: Move verify_wm_state() to heap
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (26 preceding siblings ...)
  2019-02-19 13:15 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-02-19 13:31 ` Patchwork
  2019-02-19 17:14 ` ✗ Fi.CI.IGT: failure " Patchwork
  28 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2019-02-19 13:31 UTC (permalink / raw)
  To: intel-gfx

== Series Details ==

Series: series starting with [01/25] drm/i915: Move verify_wm_state() to heap
URL   : https://patchwork.freedesktop.org/series/56895/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5630 -> Patchwork_12254
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/56895/revisions/1/mbox/

Known issues
------------

  Here are the changes found in Patchwork_12254 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence:
    - fi-byt-clapper:     PASS -> FAIL [fdo#103191] / [fdo#107362]

  * igt@kms_pipe_crc_basic@read-crc-pipe-a:
    - fi-byt-clapper:     PASS -> FAIL [fdo#107362]

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - fi-blb-e6850:       PASS -> INCOMPLETE [fdo#107718]

  
#### Possible fixes ####

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - fi-byt-clapper:     INCOMPLETE [fdo#102657] -> PASS

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102657]: https://bugs.freedesktop.org/show_bug.cgi?id=102657
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#105998]: https://bugs.freedesktop.org/show_bug.cgi?id=105998
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109294]: https://bugs.freedesktop.org/show_bug.cgi?id=109294
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#109527]: https://bugs.freedesktop.org/show_bug.cgi?id=109527
  [fdo#109528]: https://bugs.freedesktop.org/show_bug.cgi?id=109528
  [fdo#109530]: https://bugs.freedesktop.org/show_bug.cgi?id=109530


Participating hosts (44 -> 40)
------------------------------

  Additional (2): fi-icl-y fi-bdw-5557u 
  Missing    (6): fi-kbl-soraka fi-ilk-m540 fi-byt-squawks fi-icl-u2 fi-bsw-cyan fi-kbl-7500u 


Build changes
-------------

    * Linux: CI_DRM_5630 -> Patchwork_12254

  CI_DRM_5630: 82d591391bfcd9cfe2eeac149c49a678b571cd62 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4837: 368e76156f752e6ed6ac32ed9f400567aef7d3fc @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12254: ac98d5de17a0e8c2c3ec2f60da205af5112019d6 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

ac98d5de17a0 drm/i915: Use __ffs() in for_each_priolist for more compact code
507ffad016de drm/i915/execlists: Skip direct submission if only lite-restore
c2fa0a72ef2f drm/i915: Prioritise non-busywait semaphore workloads
21059fb9f4ad drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+
5c7d5f3e9173 drm/i915: Compute the global scheduler caps
f9cf6b93dd12 drm/i915: Keep timeline HWSP allocated until idle across the system
4ace7a1a1640 drm/i915: Introduce i915_timeline.mutex
d3469eadd959 drm/i915: Make request allocation caches global
8421bba99b2f drm/i915/execlists: Suppress redundant preemption
c75163d88a44 drm/i915/execlists: Suppress mere WAIT preemption
f4dd099b254f drm/i915: Skip scanning for signalers if we are already inflight
daf35758baa7 drm/i915/pmu: Use GT parked for estimating RC6 while asleep
28bacfa762ca drm/i915: Reduce the RPS shock
b3d7cdcd0ead drm/i915/selftests: Exercise resetting during non-user payloads
25c28490b7ed drm/i915: Remove i915_request.global_seqno
435800fa983d drm/i915: Remove access to global seqno in the HWSP
b591a28bf74f drm/i915: Replace global_seqno with a hangcheck heartbeat seqno
ffa0c4796c50 drm/i915/pmu: Always sample an active ringbuffer
d24387682d04 drm/i915: Trim delays for wedging
fb5e6eae6d83 drm/i915: Trim i915_do_reset() to minimum delays
3ea835086530 drm/i915: Reorder struct_mutex-vs-reset_lock in i915_gem_fault()
89678d92f27e drm/i915: Avoid reset lock in writing fence registers
85102762750a drm/i915: Prevent user context creation while wedged
5d4b26b32831 drm/i915: Use time based guilty context banning
297cf65379fb drm/i915: Move verify_wm_state() to heap

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12254/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 03/25] drm/i915: Prevent user context creation while wedged
  2019-02-19 13:11     ` Chris Wilson
@ 2019-02-19 13:31       ` Mika Kuoppala
  0 siblings, 0 replies; 43+ messages in thread
From: Mika Kuoppala @ 2019-02-19 13:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-02-19 13:07:08)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > Introduce a new ABI method for detecting a wedged driver by reporting
>> > -EIO from DRM_IOCTL_I915_GEM_CONTEXT_CREATE.
>> 
>> Need more on the 'why' department. What is lacking with
>> the method of submitting and noticing the wedged?
>
> This fits in with
>
> https://lists.freedesktop.org/archives/mesa-dev/2019-February/215469.html
>
> where we are recreating the contexting the context after receiving the
> -EIO from execbuf and so this allows us to actually break that loop if
> the driver is wedged.
>  
>> Also detecting banned from wedged is not trivial.
>
> We are not detecting banned per-se, just saying that if the driver is
> wedged.

Yes. I was meaning that you will now get -EIO from both wedged
driver and from banned client and you can't tell em apart.
Not that you would need to :O

Add that mesa commit as a reference? (even tho I didn't notice
it setting unrecoverable as it boldy promised :)

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [01/25] drm/i915: Move verify_wm_state() to heap
  2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
                   ` (27 preceding siblings ...)
  2019-02-19 13:31 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-02-19 17:14 ` Patchwork
  2019-02-19 17:16   ` Chris Wilson
  28 siblings, 1 reply; 43+ messages in thread
From: Patchwork @ 2019-02-19 17:14 UTC (permalink / raw)
  To: intel-gfx

== Series Details ==

Series: series starting with [01/25] drm/i915: Move verify_wm_state() to heap
URL   : https://patchwork.freedesktop.org/series/56895/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5630_full -> Patchwork_12254_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_12254_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12254_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_12254_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_exec_schedule@preempt-queue-contexts-chain-bsd2:
    - shard-kbl:          PASS -> FAIL +4

  * igt@gem_exec_schedule@preempt-queue-contexts-chain-vebox:
    - shard-kbl:          NOTRUN -> FAIL

  * igt@perf_pmu@rc6:
    - shard-snb:          PASS -> FAIL

  * igt@perf_pmu@rc6-runtime-pm-long:
    - shard-apl:          PASS -> FAIL +6
    - shard-iclb:         PASS -> FAIL +4
    - shard-glk:          PASS -> FAIL +6
    - shard-hsw:          PASS -> FAIL +2

  
Known issues
------------

  Here are the changes found in Patchwork_12254_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_big:
    - shard-hsw:          PASS -> TIMEOUT [fdo#107937]

  * igt@gem_exec_nop@signal-all:
    - shard-glk:          PASS -> DMESG-WARN [fdo#105763] / [fdo#106538]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-a:
    - shard-kbl:          NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_ccs@pipe-a-crc-primary-basic:
    - shard-iclb:         NOTRUN -> FAIL [fdo#107725]

  * igt@kms_color@pipe-b-degamma:
    - shard-iclb:         NOTRUN -> DMESG-FAIL [fdo#109624]

  * igt@kms_cursor_crc@cursor-256x256-sliding:
    - shard-apl:          PASS -> FAIL [fdo#103232]

  * igt@kms_cursor_crc@cursor-64x21-random:
    - shard-apl:          NOTRUN -> FAIL [fdo#103232]

  * igt@kms_cursor_legacy@cursor-vs-flip-atomic:
    - shard-hsw:          PASS -> FAIL [fdo#103355]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-wc:
    - shard-apl:          PASS -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-move:
    - shard-glk:          PASS -> FAIL [fdo#103167] +1

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-draw-mmap-gtt:
    - shard-iclb:         PASS -> FAIL [fdo#103167] +1

  * igt@kms_plane@plane-position-covered-pipe-b-planes:
    - shard-glk:          PASS -> FAIL [fdo#103166] +1

  * igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
    - shard-apl:          NOTRUN -> FAIL [fdo#108145]
    - shard-kbl:          NOTRUN -> FAIL [fdo#108145]

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
    - shard-apl:          PASS -> FAIL [fdo#103166] +3

  * igt@kms_rotation_crc@multiplane-rotation:
    - shard-kbl:          NOTRUN -> FAIL [fdo#109016]

  * igt@kms_rotation_crc@multiplane-rotation-cropping-bottom:
    - shard-kbl:          PASS -> DMESG-FAIL [fdo#105763]

  * igt@kms_vblank@pipe-b-ts-continuation-modeset:
    - shard-apl:          PASS -> FAIL [fdo#104894] +1

  * igt@pm_rpm@system-suspend-execbuf:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724]

  
#### Possible fixes ####

  * igt@gem_busy@extended-semaphore-blt:
    - shard-iclb:         {SKIP} [fdo#109275] -> PASS +2
    - shard-kbl:          {SKIP} [fdo#109271] -> PASS +5
    - shard-glk:          {SKIP} [fdo#109271] -> PASS +3

  * igt@gem_busy@extended-semaphore-vebox:
    - shard-apl:          {SKIP} [fdo#109271] -> PASS +3

  * igt@i915_selftest@live_workarounds:
    - shard-iclb:         DMESG-FAIL [fdo#108954] -> PASS

  * igt@i915_suspend@fence-restore-tiled2untiled:
    - shard-iclb:         INCOMPLETE [fdo#107713] -> PASS

  * igt@kms_atomic_transition@plane-all-modeset-transition:
    - shard-apl:          INCOMPLETE [fdo#103927] -> PASS

  * igt@kms_busy@extended-modeset-hang-newfb-render-b:
    - shard-snb:          DMESG-WARN [fdo#107956] -> PASS

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-b:
    - shard-iclb:         DMESG-WARN [fdo#107956] -> PASS

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-c:
    - shard-kbl:          DMESG-WARN [fdo#107956] -> PASS

  * igt@kms_color@pipe-b-legacy-gamma:
    - shard-apl:          FAIL [fdo#104782] -> PASS

  * igt@kms_cursor_crc@cursor-128x42-random:
    - shard-apl:          FAIL [fdo#103232] -> PASS

  * igt@kms_cursor_crc@cursor-256x256-suspend:
    - shard-apl:          FAIL [fdo#103191] / [fdo#103232] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-move:
    - shard-iclb:         FAIL [fdo#103167] -> PASS +3

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-draw-blt:
    - shard-glk:          FAIL [fdo#103167] -> PASS +4

  * igt@kms_plane@pixel-format-pipe-a-planes-source-clamping:
    - shard-apl:          FAIL [fdo#108948] -> PASS

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-yf:
    - shard-iclb:         FAIL [fdo#103166] -> PASS +1

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-kbl:          FAIL [fdo#109016] -> PASS

  * igt@kms_setmode@basic:
    - shard-hsw:          FAIL [fdo#99912] -> PASS

  * igt@kms_universal_plane@universal-plane-pipe-b-functional:
    - shard-glk:          FAIL [fdo#103166] -> PASS +1

  * igt@pm_rpm@legacy-planes:
    - shard-iclb:         DMESG-WARN [fdo#107724] -> PASS +1

  * igt@pm_rps@waitboost:
    - shard-apl:          FAIL [fdo#102250] -> PASS

  
#### Warnings ####

  * igt@kms_cursor_crc@cursor-64x64-suspend:
    - shard-iclb:         FAIL [fdo#103232] -> INCOMPLETE [fdo#107713]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102250]: https://bugs.freedesktop.org/show_bug.cgi?id=102250
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103355]: https://bugs.freedesktop.org/show_bug.cgi?id=103355
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104782]: https://bugs.freedesktop.org/show_bug.cgi?id=104782
  [fdo#104894]: https://bugs.freedesktop.org/show_bug.cgi?id=104894
  [fdo#105763]: https://bugs.freedesktop.org/show_bug.cgi?id=105763
  [fdo#106538]: https://bugs.freedesktop.org/show_bug.cgi?id=106538
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#107725]: https://bugs.freedesktop.org/show_bug.cgi?id=107725
  [fdo#107937]: https://bugs.freedesktop.org/show_bug.cgi?id=107937
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108739]: https://bugs.freedesktop.org/show_bug.cgi?id=108739
  [fdo#108948]: https://bugs.freedesktop.org/show_bug.cgi?id=108948
  [fdo#108954]: https://bugs.freedesktop.org/show_bug.cgi?id=108954
  [fdo#109016]: https://bugs.freedesktop.org/show_bug.cgi?id=109016
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109274]: https://bugs.freedesktop.org/show_bug.cgi?id=109274
  [fdo#109275]: https://bugs.freedesktop.org/show_bug.cgi?id=109275
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109280]: https://bugs.freedesktop.org/show_bug.cgi?id=109280
  [fdo#109281]: https://bugs.freedesktop.org/show_bug.cgi?id=109281
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109287]: https://bugs.freedesktop.org/show_bug.cgi?id=109287
  [fdo#109290]: https://bugs.freedesktop.org/show_bug.cgi?id=109290
  [fdo#109291]: https://bugs.freedesktop.org/show_bug.cgi?id=109291
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#109593]: https://bugs.freedesktop.org/show_bug.cgi?id=109593
  [fdo#109624]: https://bugs.freedesktop.org/show_bug.cgi?id=109624
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (7 -> 6)
------------------------------

  Missing    (1): shard-skl 


Build changes
-------------

    * Linux: CI_DRM_5630 -> Patchwork_12254

  CI_DRM_5630: 82d591391bfcd9cfe2eeac149c49a678b571cd62 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4837: 368e76156f752e6ed6ac32ed9f400567aef7d3fc @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12254: ac98d5de17a0e8c2c3ec2f60da205af5112019d6 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12254/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for series starting with [01/25] drm/i915: Move verify_wm_state() to heap
  2019-02-19 17:14 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2019-02-19 17:16   ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-19 17:16 UTC (permalink / raw)
  To: Patchwork, intel-gfx

Quoting Patchwork (2019-02-19 17:14:26)
> #### Possible regressions ####
> 
>   * igt@gem_exec_schedule@preempt-queue-contexts-chain-bsd2:
>     - shard-kbl:          PASS -> FAIL +4
> 
>   * igt@gem_exec_schedule@preempt-queue-contexts-chain-vebox:
>     - shard-kbl:          NOTRUN -> FAIL

I don't recall those failing before. Might wait until after
gem_exec_schedule gets its fixup before investigating.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 13/25] drm/i915: Reduce the RPS shock
  2019-02-19 12:22 ` [PATCH 13/25] drm/i915: Reduce the RPS shock Chris Wilson
@ 2019-02-19 21:00   ` Lyude Paul
  2019-02-20 12:05     ` Chris Wilson
  2019-02-20 15:14   ` Mika Kuoppala
  1 sibling, 1 reply; 43+ messages in thread
From: Lyude Paul @ 2019-02-19 21:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Michel Thierry, Eero Tamminen

Should this maybe be CC'd for stable for v4.20? If so I've already got a
working port of this patch that I can send to you (I've been running it on my
laptop for a while now, seems to work fine)

On Tue, 2019-02-19 at 12:22 +0000, Chris Wilson wrote:
> Limit deboosting and boosting to keep ourselves at the extremes
> when in the respective power modes (i.e. slowly decrease frequencies
> while in the HIGH_POWER zone and slowly increase frequencies while
> in the LOW_POWER zone). On idle, we will hit the timeout and drop
> to the next level quickly, and conversely if busy we expect to
> hit a waitboost and rapidly switch into max power.
> 
> This should improve the UX experience by keeping the GPU clocks higher
> than they ostensibly should be (based on simple busyness) by switching
> into the INTERACTIVE mode (due to waiting for pageflips) and increasing
> clocks via waitboosting. This will incur some additional power, our
> saving grace should be rc6 and powergating to keep the extra current
> draw in check.
> 
> Food for future thought would be deadline scheduling? If we know certain
> contexts (high priority compositors) absolutely must hit the next vblank
> then we can raise the frequencies ahead of time. Part of this is covered
> by per-context frequencies, where userspace is given control over the
> frequency range they want the GPU to execute at (for largely the same
> problem as this, where the workload is very latency sensitive but at the
> EI level appears mostly idle). Indeed, the per-context series does
> extend the modeset boosting to include a frequency range tweak which
> seems applicable to solving this jittery UX behaviour.
> 
> Reported-by: Lyude Paul <lyude@redhat.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109408
> References: 0d55babc8392 ("drm/i915: Drop stray clearing of rps->last_adj")
> References: 60548c554be2 ("drm/i915: Interactive RPS mode")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Lyude Paul <lyude@redhat.com>
> Cc: Eero Tamminen <eero.t.tamminen@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> 
> Quoting Lyude Paul:
> > Before reverting 0d55babc8392754352f1058866dd4182ae587d11: [4.20]
> > 
> > 35 measurements [of gnome-shell animations]
> > Average: 33.65657142857143 FPS
> > FPS observed: 20.8 - 46.87 FPS
> > Percentage under 60 FPS: 100.0%
> > Percentage under 55 FPS: 100.0%
> > Percentage under 50 FPS: 100.0%
> > Percentage under 45 FPS: 97.14285714285714%
> > Percentage under 40 FPS: 97.14285714285714%
> > Percentage under 35 FPS: 45.714285714285715%
> > Percentage under 30 FPS: 11.428571428571429%
> > Percentage under 25 FPS: 2.857142857142857%
> > 
> > After reverting: [4.19 behaviour]
> > 
> > 30 measurements
> > Average: 49.833666666666666 FPS
> > FPS observed: 33.85 - 60.0 FPS
> > Percentage under 60 FPS: 86.66666666666667%
> > Percentage under 55 FPS: 70.0%
> > Percentage under 50 FPS: 53.333333333333336%
> > Percentage under 45 FPS: 20.0%
> > Percentage under 40 FPS: 6.666666666666667%
> > Percentage under 35 FPS: 6.666666666666667%
> > Percentage under 30 FPS: 0%
> > Percentage under 25 FPS: 0%
> > 
> > Patched:
> > 42 measurements
> > Average: 46.05428571428571 FPS
> > FPS observed: 1.82 - 59.98 FPS
> > Percentage under 60 FPS: 88.09523809523809%
> > Percentage under 55 FPS: 61.904761904761905%
> > Percentage under 50 FPS: 45.23809523809524%
> > Percentage under 45 FPS: 35.714285714285715%
> > Percentage under 40 FPS: 33.33333333333333%
> > Percentage under 35 FPS: 19.047619047619047%
> > Percentage under 30 FPS: 7.142857142857142%
> > Percentage under 25 FPS: 4.761904761904762%
> 
> Tested-by: Lyude Paul <lyude@redhat.com>
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c
> b/drivers/gpu/drm/i915/i915_irq.c
> index 92bb32ed27fb..7c7e84e86c6a 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1288,6 +1288,18 @@ static void gen6_pm_rps_work(struct work_struct
> *work)
>  
>  	rps->last_adj = adj;
>  
> +	/*
> +	 * Limit deboosting and boosting to keep ourselves at the extremes
> +	 * when in the respective power modes (i.e. slowly decrease
> frequencies
> +	 * while in the HIGH_POWER zone and slowly increase frequencies while
> +	 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
> +	 * to the next level quickly, and conversely if busy we expect to
> +	 * hit a waitboost and rapidly switch into max power.
> +	 */
> +	if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
> +	    (adj > 0 && rps->power.mode == LOW_POWER))
> +		rps->last_adj = 0;
> +
>  	/* sysfs frequency interfaces may have snuck in while servicing the
>  	 * interrupt
>  	 */
-- 
Cheers,
	Lyude Paul

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 13/25] drm/i915: Reduce the RPS shock
  2019-02-19 21:00   ` Lyude Paul
@ 2019-02-20 12:05     ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2019-02-20 12:05 UTC (permalink / raw)
  To: Lyude Paul, intel-gfx; +Cc: Michel Thierry, Eero Tamminen

Quoting Lyude Paul (2019-02-19 21:00:08)
> Should this maybe be CC'd for stable for v4.20? If so I've already got a
> working port of this patch that I can send to you (I've been running it on my
> laptop for a while now, seems to work fine)

I wouldn't say no (I am still wondering if we can do better than hitting
waitboost and a slow backoff that just happens to be giving high
frequencies until we dip too low and waitboost again, but that's future
work). So if we can get this in, you can send your patch to GregKH for
4.20.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 04/25] drm/i915: Avoid reset lock in writing fence registers
  2019-02-19 12:21 ` [PATCH 04/25] drm/i915: Avoid reset lock in writing fence registers Chris Wilson
@ 2019-02-20 14:55   ` Mika Kuoppala
  0 siblings, 0 replies; 43+ messages in thread
From: Mika Kuoppala @ 2019-02-20 14:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The idea of taking the reset lock around writing the fence register was
> to serialise the mmio write we also perform during the reset where those
> registers get clobbered. However, the lock is overkill as write tearing
> between reset and fence_update() is harmless; the final value of the
> fence register is the same. A race between revoke_fences() and
> fence_update() is also harmless at this point as on the fault path where
> this is necessary, we acquire the reset lock to coordinate ourselves in
> the upper layer.
>
> The danger of acquiring the reset lock again in fence_update() is that
> we may recurse from the shrinker along the i915_gem_fault() path.
>
> <4> [125.739646] ============================================
> <4> [125.739652] WARNING: possible recursive locking detected
> <4> [125.739659] 5.0.0-rc6-ga6e4cbf00557-drmtip_223+ #1 Tainted: G     U
> <4> [125.739666] --------------------------------------------
> <4> [125.739672] gem_mmap_gtt/1017 is trying to acquire lock:
> <4> [125.739679] 00000000a730190a (&dev_priv->gpu_error.reset_backoff_srcu){+.+.}, at: i915_reset_trylock+0x0/0x310 [i915]
> <4> [125.739848]
> but task is already holding lock:
> <4> [125.739854] 00000000a730190a (&dev_priv->gpu_error.reset_backoff_srcu){+.+.}, at: i915_reset_trylock+0x192/0x310 [i915]
> <4> [125.739918]
> other info that might help us debug this:
> <4> [125.739925]  Possible unsafe locking scenario:
>
> <4> [125.739930]        CPU0
> <4> [125.739934]        ----
> <4> [125.739937]   lock(&dev_priv->gpu_error.reset_backoff_srcu);
> <4> [125.739944]   lock(&dev_priv->gpu_error.reset_backoff_srcu);
> <4> [125.739950]
>  *** DEADLOCK ***
>
> <4> [125.739958]  May be due to missing lock nesting notation
>
> <4> [125.739966] 5 locks held by gem_mmap_gtt/1017:
> <4> [125.739972]  #0: 00000000471f682c (&mm->mmap_sem){++++}, at: __do_page_fault+0x133/0x500
> <4> [125.739987]  #1: 0000000026542685 (&dev->struct_mutex){+.+.}, at: i915_gem_fault+0x1f6/0x860 [i915]
> <4> [125.740061]  #2: 00000000a730190a (&dev_priv->gpu_error.reset_backoff_srcu){+.+.}, at: i915_reset_trylock+0x192/0x310 [i915]
> <4> [125.740126]  #3: 00000000c828eb4f (fs_reclaim){+.+.}, at: fs_reclaim_acquire.part.25+0x0/0x30
> <4> [125.740140]  #4: 000000002d360d65 (shrinker_rwsem){++++}, at: shrink_slab+0x1cb/0x2c0
> <4> [125.740151]
> stack backtrace:
> <4> [125.740159] CPU: 1 PID: 1017 Comm: gem_mmap_gtt Tainted: G     U            5.0.0-rc6-ga6e4cbf00557-drmtip_223+ #1
> <4> [125.740170] Hardware name: Dell Inc.                 OptiPlex 745                 /0GW726, BIOS 2.3.1  05/21/2007
> <4> [125.740180] Call Trace:
> <4> [125.740189]  dump_stack+0x67/0x9b
> <4> [125.740199]  __lock_acquire+0xc75/0x1b00
> <4> [125.740209]  ? arch_tlb_finish_mmu+0x2a/0xa0
> <4> [125.740216]  ? tlb_finish_mmu+0x1a/0x30
> <4> [125.740222]  ? zap_page_range_single+0xe2/0x130
> <4> [125.740230]  ? lock_acquire+0xa6/0x1c0
> <4> [125.740237]  lock_acquire+0xa6/0x1c0
> <4> [125.740296]  ? i915_clear_error_registers+0x280/0x280 [i915]
> <4> [125.740357]  i915_reset_trylock+0x44/0x310 [i915]
> <4> [125.740417]  ? i915_clear_error_registers+0x280/0x280 [i915]
> <4> [125.740426]  ? lockdep_hardirqs_on+0xe0/0x1b0
> <4> [125.740434]  ? _raw_spin_unlock_irqrestore+0x39/0x60
> <4> [125.740499]  fence_update+0x218/0x470 [i915]
> <4> [125.740571]  i915_vma_unbind+0xa6/0x550 [i915]
> <4> [125.740640]  i915_gem_object_unbind+0xfa/0x190 [i915]
> <4> [125.740711]  i915_gem_shrink+0x2dc/0x590 [i915]
> <4> [125.740722]  ? ___preempt_schedule+0x16/0x18
> <4> [125.740792]  ? i915_gem_shrinker_scan+0xc9/0x130 [i915]
> <4> [125.740861]  i915_gem_shrinker_scan+0xc9/0x130 [i915]
> <4> [125.740870]  do_shrink_slab+0x143/0x3f0
> <4> [125.740878]  shrink_slab+0x228/0x2c0
> <4> [125.740886]  shrink_node+0x167/0x450
> <4> [125.740894]  do_try_to_free_pages+0xc4/0x340
> <4> [125.740902]  try_to_free_pages+0xdc/0x2e0
> <4> [125.740911]  __alloc_pages_nodemask+0x662/0x1110
> <4> [125.740921]  ? reacquire_held_locks+0xb5/0x1b0
> <4> [125.740928]  ? reacquire_held_locks+0xb5/0x1b0
> <4> [125.740986]  ? i915_reset_trylock+0x192/0x310 [i915]
> <4> [125.741045]  ? i915_memcpy_init_early+0x30/0x30 [i915]
> <4> [125.741054]  pte_alloc_one+0x12/0x70
> <4> [125.741060]  __pte_alloc+0x11/0xf0
> <4> [125.741067]  apply_to_page_range+0x37e/0x440
> <4> [125.741127]  remap_io_mapping+0x6c/0x100 [i915]
> <4> [125.741196]  i915_gem_fault+0x5a9/0x860 [i915]
> <4> [125.741204]  ? ptlock_alloc+0x15/0x30
> <4> [125.741212]  __do_fault+0x2c/0xb0
> <4> [125.741218]  __handle_mm_fault+0x8ee/0xfa0
> <4> [125.741227]  handle_mm_fault+0x196/0x3a0
> <4> [125.741235]  __do_page_fault+0x246/0x500
> <4> [125.741243]  ? page_fault+0x8/0x30
> <4> [125.741250]  page_fault+0x1e/0x30
> <4> [125.741256] RIP: 0033:0x55d0cc456e12
> <4> [125.741264] Code: b0 df ff ff 89 c2 8b 85 70 df ff ff 01 c2 8b 85 70 df ff ff 48 98 48 8d 0c 85 00 00 00 00 48 8b 85 e0 df ff ff 48 01 c8 f7 d2 <89> 10 83 85 70 df ff ff 01 81 bd 70 df ff ff ff 03 00 00 7e be 48
> <4> [125.741280] RSP: 002b:00007ffc1bab7ab0 EFLAGS: 00010206
> <4> [125.741287] RAX: 00007fc787cb6000 RBX: 0000000000000000 RCX: 0000000000000000
> <4> [125.741295] RDX: 00000000ffffffff RSI: 0000000000005401 RDI: 0000000000000002
> <4> [125.741303] RBP: 00007ffc1bab9b70 R08: 00007ffc1bab7920 R09: 000000000000001b
> <4> [125.741310] R10: 7165722074736554 R11: 0000000000000246 R12: 000055d0cc454a80
> <4> [125.741318] R13: 00007ffc1bab9f60 R14: 0000000000000000 R15: 0000000000000000
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_fence_reg.c | 12 ++----------
>  1 file changed, 2 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index 1ec1417cf8b4..65624b8e4d15 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -270,24 +270,16 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  		return 0;
>  	}
>  
> -	ret = i915_reset_trylock(fence->i915);
> -	if (ret < 0)
> -		goto out_rpm;
> -
> +	WRITE_ONCE(fence->vma, vma);
>  	fence_write(fence, vma);
> -	fence->vma = vma;
>  
>  	if (vma) {
>  		vma->fence = fence;
>  		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
>  	}
>  
> -	i915_reset_unlock(fence->i915, ret);
> -	ret = 0;
> -
> -out_rpm:
>  	intel_runtime_pm_put(fence->i915, wakeref);
> -	return ret;
> +	return 0;
>  }
>  
>  /**
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 13/25] drm/i915: Reduce the RPS shock
  2019-02-19 12:22 ` [PATCH 13/25] drm/i915: Reduce the RPS shock Chris Wilson
  2019-02-19 21:00   ` Lyude Paul
@ 2019-02-20 15:14   ` Mika Kuoppala
  1 sibling, 0 replies; 43+ messages in thread
From: Mika Kuoppala @ 2019-02-20 15:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Michel Thierry, Eero Tamminen

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Limit deboosting and boosting to keep ourselves at the extremes
> when in the respective power modes (i.e. slowly decrease frequencies
> while in the HIGH_POWER zone and slowly increase frequencies while
> in the LOW_POWER zone). On idle, we will hit the timeout and drop
> to the next level quickly, and conversely if busy we expect to
> hit a waitboost and rapidly switch into max power.
>
> This should improve the UX experience by keeping the GPU clocks higher
> than they ostensibly should be (based on simple busyness) by switching
> into the INTERACTIVE mode (due to waiting for pageflips) and increasing
> clocks via waitboosting. This will incur some additional power, our
> saving grace should be rc6 and powergating to keep the extra current
> draw in check.
>
> Food for future thought would be deadline scheduling? If we know certain
> contexts (high priority compositors) absolutely must hit the next vblank
> then we can raise the frequencies ahead of time. Part of this is covered
> by per-context frequencies, where userspace is given control over the
> frequency range they want the GPU to execute at (for largely the same
> problem as this, where the workload is very latency sensitive but at the
> EI level appears mostly idle). Indeed, the per-context series does
> extend the modeset boosting to include a frequency range tweak which
> seems applicable to solving this jittery UX behaviour.
>
> Reported-by: Lyude Paul <lyude@redhat.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109408
> References: 0d55babc8392 ("drm/i915: Drop stray clearing of rps->last_adj")
> References: 60548c554be2 ("drm/i915: Interactive RPS mode")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Lyude Paul <lyude@redhat.com>
> Cc: Eero Tamminen <eero.t.tamminen@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
>
> Quoting Lyude Paul:
>> Before reverting 0d55babc8392754352f1058866dd4182ae587d11: [4.20]
>>
>> 35 measurements [of gnome-shell animations]
>> Average: 33.65657142857143 FPS
>> FPS observed: 20.8 - 46.87 FPS
>> Percentage under 60 FPS: 100.0%
>> Percentage under 55 FPS: 100.0%
>> Percentage under 50 FPS: 100.0%
>> Percentage under 45 FPS: 97.14285714285714%
>> Percentage under 40 FPS: 97.14285714285714%
>> Percentage under 35 FPS: 45.714285714285715%
>> Percentage under 30 FPS: 11.428571428571429%
>> Percentage under 25 FPS: 2.857142857142857%
>>
>> After reverting: [4.19 behaviour]
>>
>> 30 measurements
>> Average: 49.833666666666666 FPS
>> FPS observed: 33.85 - 60.0 FPS
>> Percentage under 60 FPS: 86.66666666666667%
>> Percentage under 55 FPS: 70.0%
>> Percentage under 50 FPS: 53.333333333333336%
>> Percentage under 45 FPS: 20.0%
>> Percentage under 40 FPS: 6.666666666666667%
>> Percentage under 35 FPS: 6.666666666666667%
>> Percentage under 30 FPS: 0%
>> Percentage under 25 FPS: 0%
>>
>> Patched:
>> 42 measurements
>> Average: 46.05428571428571 FPS
>> FPS observed: 1.82 - 59.98 FPS
>> Percentage under 60 FPS: 88.09523809523809%
>> Percentage under 55 FPS: 61.904761904761905%
>> Percentage under 50 FPS: 45.23809523809524%
>> Percentage under 45 FPS: 35.714285714285715%
>> Percentage under 40 FPS: 33.33333333333333%
>> Percentage under 35 FPS: 19.047619047619047%
>> Percentage under 30 FPS: 7.142857142857142%
>> Percentage under 25 FPS: 4.761904761904762%
>
> Tested-by: Lyude Paul <lyude@redhat.com>

It does what it says on the tin,
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_irq.c | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 92bb32ed27fb..7c7e84e86c6a 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1288,6 +1288,18 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  
>  	rps->last_adj = adj;
>  
> +	/*
> +	 * Limit deboosting and boosting to keep ourselves at the extremes
> +	 * when in the respective power modes (i.e. slowly decrease frequencies
> +	 * while in the HIGH_POWER zone and slowly increase frequencies while
> +	 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
> +	 * to the next level quickly, and conversely if busy we expect to
> +	 * hit a waitboost and rapidly switch into max power.
> +	 */
> +	if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
> +	    (adj > 0 && rps->power.mode == LOW_POWER))
> +		rps->last_adj = 0;
> +
>  	/* sysfs frequency interfaces may have snuck in while servicing the
>  	 * interrupt
>  	 */
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 08/25] drm/i915/pmu: Always sample an active ringbuffer
  2019-02-19 12:21 ` [PATCH 08/25] drm/i915/pmu: Always sample an active ringbuffer Chris Wilson
@ 2019-02-22 12:10   ` Mika Kuoppala
  2019-02-22 12:17     ` Chris Wilson
  0 siblings, 1 reply; 43+ messages in thread
From: Mika Kuoppala @ 2019-02-22 12:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> As we no longer have a precise indication of requests queued to an
> engine, make no presumptions and just sample the ring registers to see
> if the engine is busy.
>
> v2: Report busy while the ring is idling on a semaphore/event.
> v3: Give the struct a name!
> v4: Always 0 outside the powerwell; trusting the powerwell is
> accurate enough for our sampling pmu.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_pmu.c         | 60 ++++++++++---------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
>  2 files changed, 24 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 13d70b90dd0f..21adad72bd86 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -148,14 +148,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>  	spin_unlock_irq(&i915->pmu.lock);
>  }
>  
> -static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
> -{
> -	if (!fw)
> -		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
> -
> -	return true;
> -}
> -
>  static void
>  add_sample(struct i915_pmu_sample *sample, u32 val)
>  {
> @@ -168,49 +160,43 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
>  	intel_wakeref_t wakeref;
> -	bool fw = false;
>  
>  	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
>  		return;
>  
> -	if (!dev_priv->gt.awake)
> -		return;
> -
> -	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> +	wakeref = 0;
> +	if (READ_ONCE(dev_priv->gt.awake))

Is this gt.awake check just to be more lightweight on sampling?

> +		wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
>  	if (!wakeref)
>  		return;
>  
>  	for_each_engine(engine, dev_priv, id) {
> -		u32 current_seqno = intel_engine_get_seqno(engine);
> -		u32 last_seqno = intel_engine_last_submit(engine);
> +		struct intel_engine_pmu *pmu = &engine->pmu;
> +		bool busy;
>  		u32 val;
>  
> -		val = !i915_seqno_passed(current_seqno, last_seqno);
> -
> -		if (val)
> -			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> -				   period_ns);
> -
> -		if (val && (engine->pmu.enable &
> -		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> -			fw = grab_forcewake(dev_priv, fw);
> -
> -			val = I915_READ_FW(RING_CTL(engine->mmio_base));
> -		} else {
> -			val = 0;
> -		}
> +		val = I915_READ_FW(RING_CTL(engine->mmio_base));
> +		if (val == 0) /* powerwell off => engine idle */
> +			continue;
>  
>  		if (val & RING_WAIT)
> -			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> -				   period_ns);
> -
> +			add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
>  		if (val & RING_WAIT_SEMAPHORE)
> -			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> -				   period_ns);
> -	}
> +			add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
>  
> -	if (fw)
> -		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +		/*
> +		 * MI_MODE reports IDLE if the ring is waiting, but we regard
> +		 * this as being busy instead, as the engine is busy with the
> +		 * user request.
> +		 */
> +		busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
> +		if (!busy) {
> +			val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
> +			busy = !(val & MODE_IDLE);

The comment makes sense if you do
busy = val & MODE_IDLE;

-Mika

> +		}
> +		if (busy)
> +			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
> +	}
>  
>  	intel_runtime_pm_put(dev_priv, wakeref);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 710ffb221775..5d45ad4ecca9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -392,7 +392,7 @@ struct intel_engine_cs {
>  		bool irq_armed;
>  	} breadcrumbs;
>  
> -	struct {
> +	struct intel_engine_pmu {
>  		/**
>  		 * @enable: Bitmask of enable sample events on this engine.
>  		 *
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 08/25] drm/i915/pmu: Always sample an active ringbuffer
  2019-02-22 12:10   ` Mika Kuoppala
@ 2019-02-22 12:17     ` Chris Wilson
  2019-02-22 12:31       ` Mika Kuoppala
  0 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2019-02-22 12:17 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-02-22 12:10:33)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > As we no longer have a precise indication of requests queued to an
> > engine, make no presumptions and just sample the ring registers to see
> > if the engine is busy.
> >
> > v2: Report busy while the ring is idling on a semaphore/event.
> > v3: Give the struct a name!
> > v4: Always 0 outside the powerwell; trusting the powerwell is
> > accurate enough for our sampling pmu.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_pmu.c         | 60 ++++++++++---------------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
> >  2 files changed, 24 insertions(+), 38 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index 13d70b90dd0f..21adad72bd86 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -148,14 +148,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
> >       spin_unlock_irq(&i915->pmu.lock);
> >  }
> >  
> > -static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
> > -{
> > -     if (!fw)
> > -             intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
> > -
> > -     return true;
> > -}
> > -
> >  static void
> >  add_sample(struct i915_pmu_sample *sample, u32 val)
> >  {
> > @@ -168,49 +160,43 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
> >       struct intel_engine_cs *engine;
> >       enum intel_engine_id id;
> >       intel_wakeref_t wakeref;
> > -     bool fw = false;
> >  
> >       if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
> >               return;
> >  
> > -     if (!dev_priv->gt.awake)
> > -             return;
> > -
> > -     wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> > +     wakeref = 0;
> > +     if (READ_ONCE(dev_priv->gt.awake))
> 
> Is this gt.awake check just to be more lightweight on sampling?

Yes, we know that if !awake, then we are idle and are done with
sampling. It also ties in later with a patch to change how we handle rpm
suspend periods (which is why I went with writing it in this idiom).

> > +             wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> >       if (!wakeref)
> >               return;
> >  
> >       for_each_engine(engine, dev_priv, id) {
> > -             u32 current_seqno = intel_engine_get_seqno(engine);
> > -             u32 last_seqno = intel_engine_last_submit(engine);
> > +             struct intel_engine_pmu *pmu = &engine->pmu;
> > +             bool busy;
> >               u32 val;
> >  
> > -             val = !i915_seqno_passed(current_seqno, last_seqno);
> > -
> > -             if (val)
> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> > -                                period_ns);
> > -
> > -             if (val && (engine->pmu.enable &
> > -                 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> > -                     fw = grab_forcewake(dev_priv, fw);
> > -
> > -                     val = I915_READ_FW(RING_CTL(engine->mmio_base));
> > -             } else {
> > -                     val = 0;
> > -             }
> > +             val = I915_READ_FW(RING_CTL(engine->mmio_base));
> > +             if (val == 0) /* powerwell off => engine idle */
> > +                     continue;
> >  
> >               if (val & RING_WAIT)
> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> > -                                period_ns);
> > -
> > +                     add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
> >               if (val & RING_WAIT_SEMAPHORE)
> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> > -                                period_ns);
> > -     }
> > +                     add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
> >  
> > -     if (fw)
> > -             intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> > +             /*
> > +              * MI_MODE reports IDLE if the ring is waiting, but we regard
> > +              * this as being busy instead, as the engine is busy with the
> > +              * user request.
> > +              */
> > +             busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
> > +             if (!busy) {
> > +                     val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
> > +                     busy = !(val & MODE_IDLE);
> 
> The comment makes sense if you do
> busy = val & MODE_IDLE;

/*
 * While waiting on a semaphore or event, MI_MODE reports the ring as
 * idle. However, previously using the seqno, and with execlists sampling,
 * we account for the ring waiting as the engine being busy. Therefore,
 * we record the sample as being busy if either waiting or !idle.
 */
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 08/25] drm/i915/pmu: Always sample an active ringbuffer
  2019-02-22 12:17     ` Chris Wilson
@ 2019-02-22 12:31       ` Mika Kuoppala
  0 siblings, 0 replies; 43+ messages in thread
From: Mika Kuoppala @ 2019-02-22 12:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-02-22 12:10:33)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > As we no longer have a precise indication of requests queued to an
>> > engine, make no presumptions and just sample the ring registers to see
>> > if the engine is busy.
>> >
>> > v2: Report busy while the ring is idling on a semaphore/event.
>> > v3: Give the struct a name!
>> > v4: Always 0 outside the powerwell; trusting the powerwell is
>> > accurate enough for our sampling pmu.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/i915_pmu.c         | 60 ++++++++++---------------
>> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
>> >  2 files changed, 24 insertions(+), 38 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>> > index 13d70b90dd0f..21adad72bd86 100644
>> > --- a/drivers/gpu/drm/i915/i915_pmu.c
>> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
>> > @@ -148,14 +148,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>> >       spin_unlock_irq(&i915->pmu.lock);
>> >  }
>> >  
>> > -static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
>> > -{
>> > -     if (!fw)
>> > -             intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
>> > -
>> > -     return true;
>> > -}
>> > -
>> >  static void
>> >  add_sample(struct i915_pmu_sample *sample, u32 val)
>> >  {
>> > @@ -168,49 +160,43 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>> >       struct intel_engine_cs *engine;
>> >       enum intel_engine_id id;
>> >       intel_wakeref_t wakeref;
>> > -     bool fw = false;
>> >  
>> >       if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
>> >               return;
>> >  
>> > -     if (!dev_priv->gt.awake)
>> > -             return;
>> > -
>> > -     wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
>> > +     wakeref = 0;
>> > +     if (READ_ONCE(dev_priv->gt.awake))
>> 
>> Is this gt.awake check just to be more lightweight on sampling?
>
> Yes, we know that if !awake, then we are idle and are done with
> sampling. It also ties in later with a patch to change how we handle rpm
> suspend periods (which is why I went with writing it in this idiom).
>
>> > +             wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
>> >       if (!wakeref)
>> >               return;
>> >  
>> >       for_each_engine(engine, dev_priv, id) {
>> > -             u32 current_seqno = intel_engine_get_seqno(engine);
>> > -             u32 last_seqno = intel_engine_last_submit(engine);
>> > +             struct intel_engine_pmu *pmu = &engine->pmu;
>> > +             bool busy;
>> >               u32 val;
>> >  
>> > -             val = !i915_seqno_passed(current_seqno, last_seqno);
>> > -
>> > -             if (val)
>> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
>> > -                                period_ns);
>> > -
>> > -             if (val && (engine->pmu.enable &
>> > -                 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
>> > -                     fw = grab_forcewake(dev_priv, fw);
>> > -
>> > -                     val = I915_READ_FW(RING_CTL(engine->mmio_base));
>> > -             } else {
>> > -                     val = 0;
>> > -             }
>> > +             val = I915_READ_FW(RING_CTL(engine->mmio_base));
>> > +             if (val == 0) /* powerwell off => engine idle */
>> > +                     continue;
>> >  
>> >               if (val & RING_WAIT)
>> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
>> > -                                period_ns);
>> > -
>> > +                     add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
>> >               if (val & RING_WAIT_SEMAPHORE)
>> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
>> > -                                period_ns);
>> > -     }
>> > +                     add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
>> >  
>> > -     if (fw)
>> > -             intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>> > +             /*
>> > +              * MI_MODE reports IDLE if the ring is waiting, but we regard
>> > +              * this as being busy instead, as the engine is busy with the
>> > +              * user request.
>> > +              */
>> > +             busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
>> > +             if (!busy) {
>> > +                     val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
>> > +                     busy = !(val & MODE_IDLE);
>> 
>> The comment makes sense if you do
>> busy = val & MODE_IDLE;
>
> /*
>  * While waiting on a semaphore or event, MI_MODE reports the ring as
>  * idle. However, previously using the seqno, and with execlists sampling,
>  * we account for the ring waiting as the engine being busy. Therefore,
>  * we record the sample as being busy if either waiting or !idle.
>  */

Altho now the original makes more sense too, I like this one better!

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2019-02-22 12:31 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-19 12:21 [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Chris Wilson
2019-02-19 12:21 ` [PATCH 02/25] drm/i915: Use time based guilty context banning Chris Wilson
2019-02-19 12:21 ` [PATCH 03/25] drm/i915: Prevent user context creation while wedged Chris Wilson
2019-02-19 13:07   ` Mika Kuoppala
2019-02-19 13:11     ` Chris Wilson
2019-02-19 13:31       ` Mika Kuoppala
2019-02-19 13:20     ` Chris Wilson
2019-02-19 12:21 ` [PATCH 04/25] drm/i915: Avoid reset lock in writing fence registers Chris Wilson
2019-02-20 14:55   ` Mika Kuoppala
2019-02-19 12:21 ` [PATCH 05/25] drm/i915: Reorder struct_mutex-vs-reset_lock in i915_gem_fault() Chris Wilson
2019-02-19 12:21 ` [PATCH 06/25] drm/i915: Trim i915_do_reset() to minimum delays Chris Wilson
2019-02-19 12:21 ` [PATCH 07/25] drm/i915: Trim delays for wedging Chris Wilson
2019-02-19 12:48   ` Mika Kuoppala
2019-02-19 12:21 ` [PATCH 08/25] drm/i915/pmu: Always sample an active ringbuffer Chris Wilson
2019-02-22 12:10   ` Mika Kuoppala
2019-02-22 12:17     ` Chris Wilson
2019-02-22 12:31       ` Mika Kuoppala
2019-02-19 12:21 ` [PATCH 09/25] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno Chris Wilson
2019-02-19 12:22 ` [PATCH 10/25] drm/i915: Remove access to global seqno in the HWSP Chris Wilson
2019-02-19 12:22 ` [PATCH 11/25] drm/i915: Remove i915_request.global_seqno Chris Wilson
2019-02-19 12:22 ` [PATCH 12/25] drm/i915/selftests: Exercise resetting during non-user payloads Chris Wilson
2019-02-19 12:22 ` [PATCH 13/25] drm/i915: Reduce the RPS shock Chris Wilson
2019-02-19 21:00   ` Lyude Paul
2019-02-20 12:05     ` Chris Wilson
2019-02-20 15:14   ` Mika Kuoppala
2019-02-19 12:22 ` [PATCH 14/25] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
2019-02-19 12:22 ` [PATCH 15/25] drm/i915: Skip scanning for signalers if we are already inflight Chris Wilson
2019-02-19 12:22 ` [PATCH 16/25] drm/i915/execlists: Suppress mere WAIT preemption Chris Wilson
2019-02-19 12:22 ` [PATCH 17/25] drm/i915/execlists: Suppress redundant preemption Chris Wilson
2019-02-19 12:22 ` [PATCH 18/25] drm/i915: Make request allocation caches global Chris Wilson
2019-02-19 12:22 ` [PATCH 19/25] drm/i915: Introduce i915_timeline.mutex Chris Wilson
2019-02-19 12:22 ` [PATCH 20/25] drm/i915: Keep timeline HWSP allocated until idle across the system Chris Wilson
2019-02-19 12:22 ` [PATCH 21/25] drm/i915: Compute the global scheduler caps Chris Wilson
2019-02-19 12:22 ` [PATCH 22/25] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Chris Wilson
2019-02-19 12:22 ` [PATCH 23/25] drm/i915: Prioritise non-busywait semaphore workloads Chris Wilson
2019-02-19 12:22 ` [PATCH 24/25] drm/i915/execlists: Skip direct submission if only lite-restore Chris Wilson
2019-02-19 12:22 ` [PATCH 25/25] drm/i915: Use __ffs() in for_each_priolist for more compact code Chris Wilson
2019-02-19 12:56 ` [PATCH 01/25] drm/i915: Move verify_wm_state() to heap Ville Syrjälä
2019-02-19 13:05 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/25] " Patchwork
2019-02-19 13:15 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-02-19 13:31 ` ✓ Fi.CI.BAT: success " Patchwork
2019-02-19 17:14 ` ✗ Fi.CI.IGT: failure " Patchwork
2019-02-19 17:16   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.