All of lore.kernel.org
 help / color / mirror / Atom feed
From: John.C.Harrison@Intel.com
To: Intel-GFX@Lists.FreeDesktop.Org
Subject: [RFC 22/39] drm/i915: Add scheduler hook to GPU reset
Date: Fri, 17 Jul 2015 15:33:31 +0100	[thread overview]
Message-ID: <1437143628-6329-23-git-send-email-John.C.Harrison@Intel.com> (raw)
In-Reply-To: <1437143628-6329-1-git-send-email-John.C.Harrison@Intel.com>

From: John Harrison <John.C.Harrison@Intel.com>

When the watchdog resets the GPU, the scheduler needs to know so that it can
clean up it's view of the world. All in flight nodes must be marked as dead so
that the scheduler does not wait forever for them to complete. Also, all queued
nodes must be marked as dead so that the scheduler does not dead lock the reset
code by saying that the ring can not be idled and it must come back again later.

Change-Id: I184eb59c5c1a1385f9c17db66c7cc46f8904eebd
For: VIZ-1587
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c       |  2 ++
 drivers/gpu/drm/i915/i915_scheduler.c | 63 ++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_scheduler.h |  8 ++++-
 3 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6142e68..6d72caa 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3187,6 +3187,8 @@ void i915_gem_reset(struct drm_device *dev)
 	struct intel_engine_cs *ring;
 	int i;
 
+	i915_scheduler_kill_all(dev);
+
 	/*
 	 * Before we free the objects from the requests, we need to inspect
 	 * them for finding the guilty party. As the requests only borrow
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 73c9ba6..3155f42 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -341,6 +341,56 @@ static void i915_scheduler_node_kill(struct i915_scheduler_queue_entry *node)
 	node->status = i915_sqs_dead;
 }
 
+/* Abandon a queued node completely. For example because the driver is being
+ * reset and it is not valid to preserve absolutely any state at all across the
+ * reinitialisation sequence. */
+static void i915_scheduler_node_kill_queued(struct i915_scheduler_queue_entry *node)
+{
+	BUG_ON(!node);
+	BUG_ON(!I915_SQS_IS_QUEUED(node));
+
+	node->status = i915_sqs_dead;
+}
+
+/* The system is toast. Terminate all nodes with extreme prejudice. */
+void i915_scheduler_kill_all(struct drm_device *dev)
+{
+	struct i915_scheduler_queue_entry   *node;
+	struct drm_i915_private             *dev_priv = dev->dev_private;
+	struct i915_scheduler               *scheduler = dev_priv->scheduler;
+	unsigned long   flags;
+	int             r;
+
+	spin_lock_irqsave(&scheduler->lock, flags);
+
+	for (r = 0; r < I915_NUM_RINGS; r++) {
+		list_for_each_entry(node, &scheduler->node_queue[r], link) {
+			switch (node->status) {
+			case I915_SQS_CASE_COMPLETE:
+			break;
+
+			case I915_SQS_CASE_FLYING:
+				i915_scheduler_node_kill(node);
+			break;
+
+			case I915_SQS_CASE_QUEUED:
+				i915_scheduler_node_kill_queued(node);
+			break;
+
+			default:
+				/* Wot no state?! */
+				BUG();
+			}
+		}
+	}
+
+	memset(scheduler->last_irq_seqno, 0x00, sizeof(scheduler->last_irq_seqno));
+
+	spin_unlock_irqrestore(&scheduler->lock, flags);
+
+	queue_work(dev_priv->wq, &dev_priv->mm.scheduler_work);
+}
+
 /*
  * The batch tagged with the indicated seqence number has completed.
  * Search the queue for it, update its status and those of any batches
@@ -912,7 +962,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
 		scheduler->flags[ring->id] &= ~i915_sf_submitting;
 
 		if (ret) {
-			bool requeue = true;
+			int requeue = 1;
 
 			/* Oh dear! Either the node is broken or the ring is
 			 * busy. So need to kill the node or requeue it and try
@@ -922,7 +972,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
 			case ENODEV:
 			case ENOENT:
 				/* Fatal errors. Kill the node. */
-				requeue = false;
+				requeue = -1;
 			break;
 
 			case EAGAIN:
@@ -941,13 +991,18 @@ static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
 			break;
 			}
 
-			if (requeue) {
+			/* Check that the watchdog/reset code has not nuked
+			 * the node while we weren't looking: */
+			if (node->status == i915_sqs_dead)
+				requeue = 0;
+
+			if (requeue == 1) {
 				i915_scheduler_node_requeue(node);
 				/* No point spinning if the ring is currently
 				 * unavailable so just give up and come back
 				 * later. */
 				break;
-			} else
+			} else if (requeue == -1)
 				i915_scheduler_node_kill(node);
 		}
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 5e094d5..b440e62 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -36,7 +36,7 @@ enum i915_scheduler_queue_status {
 	i915_sqs_flying,
 	/* Finished processing on the hardware: */
 	i915_sqs_complete,
-	/* Killed by catastrophic submission failure: */
+	/* Killed by watchdog or catastrophic submission failure: */
 	i915_sqs_dead,
 	/* Limit value for use with arrays/loops */
 	i915_sqs_MAX
@@ -47,6 +47,11 @@ enum i915_scheduler_queue_status {
 #define I915_SQS_IS_COMPLETE(node)	(((node)->status == i915_sqs_complete) || \
 					 ((node)->status == i915_sqs_dead))
 
+#define I915_SQS_CASE_QUEUED		i915_sqs_queued
+#define I915_SQS_CASE_FLYING		i915_sqs_flying
+#define I915_SQS_CASE_COMPLETE		i915_sqs_complete:		\
+					case i915_sqs_dead
+
 struct i915_scheduler_obj_entry {
 	struct drm_i915_gem_object          *obj;
 };
@@ -91,6 +96,7 @@ int         i915_scheduler_closefile(struct drm_device *dev,
 void        i915_gem_scheduler_clean_node(struct i915_scheduler_queue_entry *node);
 int         i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
 int         i915_scheduler_handle_irq(struct intel_engine_cs *ring);
+void        i915_scheduler_kill_all(struct drm_device *dev);
 void        i915_gem_scheduler_work_handler(struct work_struct *work);
 int         i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked);
 int         i915_scheduler_flush_request(struct drm_i915_gem_request *req,
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-07-17 14:34 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-17 14:33 [RFC 00/39] GPU scheduler for i915 driver John.C.Harrison
2015-07-17 14:33 ` [RFC 01/39] drm/i915: Add total count to context status debugfs output John.C.Harrison
2015-07-17 14:33 ` [RFC 02/39] drm/i915: Updating assorted register and status page definitions John.C.Harrison
2015-07-17 14:33 ` [RFC 03/39] drm/i915: Explicit power enable during deferred context initialisation John.C.Harrison
2015-07-21  7:54   ` Daniel Vetter
2015-07-17 14:33 ` [RFC 04/39] drm/i915: Prelude to splitting i915_gem_do_execbuffer in two John.C.Harrison
2015-07-21  8:06   ` Daniel Vetter
2015-07-17 14:33 ` [RFC 05/39] drm/i915: Split i915_dem_do_execbuffer() in half John.C.Harrison
2015-07-21  8:00   ` Daniel Vetter
2015-07-17 14:33 ` [RFC 06/39] drm/i915: Re-instate request->uniq because it is extremely useful John.C.Harrison
2015-07-17 14:33 ` [RFC 07/39] drm/i915: Start of GPU scheduler John.C.Harrison
2015-07-21  9:40   ` Daniel Vetter
2015-07-17 14:33 ` [RFC 08/39] drm/i915: Prepare retire_requests to handle out-of-order seqnos John.C.Harrison
2015-07-17 14:33 ` [RFC 09/39] drm/i915: Added scheduler hook into i915_gem_complete_requests_ring() John.C.Harrison
2015-07-17 14:33 ` [RFC 10/39] drm/i915: Disable hardware semaphores when GPU scheduler is enabled John.C.Harrison
2015-07-17 14:33 ` [RFC 11/39] drm/i915: Force MMIO flips when scheduler enabled John.C.Harrison
2015-07-17 14:33 ` [RFC 12/39] drm/i915: Added scheduler hook when closing DRM file handles John.C.Harrison
2015-07-17 14:33 ` [RFC 13/39] drm/i915: Added deferred work handler for scheduler John.C.Harrison
2015-07-17 14:33 ` [RFC 14/39] drm/i915: Redirect execbuffer_final() via scheduler John.C.Harrison
2015-07-17 14:33 ` [RFC 15/39] drm/i915: Keep the reserved space mechanism happy John.C.Harrison
2015-07-17 14:33 ` [RFC 16/39] drm/i915: Added tracking/locking of batch buffer objects John.C.Harrison
2015-07-17 14:33 ` [RFC 17/39] drm/i915: Hook scheduler node clean up into retire requests John.C.Harrison
2015-07-17 14:33 ` [RFC 18/39] drm/i915: Added scheduler interrupt handler hook John.C.Harrison
2015-07-17 14:33 ` [RFC 19/39] drm/i915: Added scheduler support to __wait_request() calls John.C.Harrison
2015-07-21  9:27   ` Daniel Vetter
2015-07-17 14:33 ` [RFC 20/39] drm/i915: Added scheduler support to page fault handler John.C.Harrison
2015-07-17 14:33 ` [RFC 21/39] drm/i915: Added scheduler flush calls to ring throttle and idle functions John.C.Harrison
2015-07-17 14:33 ` John.C.Harrison [this message]
2015-07-17 14:33 ` [RFC 23/39] drm/i915: Added a module parameter for allowing scheduler overrides John.C.Harrison
2015-07-17 14:33 ` [RFC 24/39] drm/i915: Support for 'unflushed' ring idle John.C.Harrison
2015-07-21  8:50   ` Daniel Vetter
2015-07-17 14:33 ` [RFC 25/39] drm/i915: Defer seqno allocation until actual hardware submission time John.C.Harrison
2015-07-17 14:33 ` [RFC 26/39] drm/i915: Added immediate submission override to scheduler John.C.Harrison
2015-07-17 14:33 ` [RFC 27/39] drm/i915: Add sync wait support " John.C.Harrison
2015-07-21  9:59   ` Daniel Vetter
2015-07-17 14:33 ` [RFC 28/39] drm/i915: Connecting execbuff fences " John.C.Harrison
2015-07-17 14:33 ` [RFC 29/39] drm/i915: Added trace points " John.C.Harrison
2015-07-17 14:33 ` [RFC 30/39] drm/i915: Added scheduler queue throttling by DRM file handle John.C.Harrison
2015-07-17 14:33 ` [RFC 31/39] drm/i915: Added debugfs interface to scheduler tuning parameters John.C.Harrison
2015-07-17 14:33 ` [RFC 32/39] drm/i915: Added debug state dump facilities to scheduler John.C.Harrison
2015-07-17 14:33 ` [RFC 33/39] drm/i915: Add early exit to execbuff_final() if insufficient ring space John.C.Harrison
2015-07-17 14:33 ` [RFC 34/39] drm/i915: Added scheduler statistic reporting to debugfs John.C.Harrison
2015-07-17 14:33 ` [RFC 35/39] drm/i915: Added seqno values to scheduler status dump John.C.Harrison
2015-07-17 14:33 ` [RFC 36/39] drm/i915: Add scheduler support functions for TDR John.C.Harrison
2015-07-17 14:33 ` [RFC 37/39] drm/i915: GPU priority bumping to prevent starvation John.C.Harrison
2015-07-17 14:33 ` [RFC 38/39] drm/i915: Enable GPU scheduler by default John.C.Harrison
2015-07-17 14:33 ` [RFC 39/39] drm/i915: Allow scheduler to manage inter-ring object synchronisation John.C.Harrison
2015-07-21 13:33 ` [RFC 00/39] GPU scheduler for i915 driver Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1437143628-6329-23-git-send-email-John.C.Harrison@Intel.com \
    --to=john.c.harrison@intel.com \
    --cc=Intel-GFX@Lists.FreeDesktop.Org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.