All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine
@ 2020-01-15  8:33 Chris Wilson
  2020-01-15  8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson
                   ` (8 more replies)
  0 siblings, 9 replies; 21+ messages in thread
From: Chris Wilson @ 2020-01-15  8:33 UTC (permalink / raw)
  To: intel-gfx

Since commit 422d7df4f090 ("drm/i915: Replace engine->timeline with a
plain list"), we used the default embedded priotree slot for the virtual
engine request queue, which means we can also use the same solitary slot
with the scheduler. However, the priolist is expected to be guarded by
the engine->active.lock, but this is not true for the virtual engine

v2: Update i915_sched_node.link explanation for current usage where it
is a link on both the queue and on the runlists.

References: 422d7df4f090 ("drm/i915: Replace engine->timeline with a plain list")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c   | 13 ++++++++-----
 drivers/gpu/drm/i915/i915_request.c   |  4 +++-
 drivers/gpu/drm/i915/i915_request.h   | 17 +++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.c | 22 ++++++++++------------
 4 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9e430590fb3a..f0cbd240a8c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -985,6 +985,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
 			list_move(&rq->sched.link, pl);
+			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+
 			active = rq;
 		} else {
 			struct intel_engine_cs *owner = rq->context->engine;
@@ -2430,11 +2432,12 @@ static void execlists_preempt(struct timer_list *timer)
 }
 
 static void queue_request(struct intel_engine_cs *engine,
-			  struct i915_sched_node *node,
-			  int prio)
+			  struct i915_request *rq)
 {
-	GEM_BUG_ON(!list_empty(&node->link));
-	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
+	GEM_BUG_ON(!list_empty(&rq->sched.link));
+	list_add_tail(&rq->sched.link,
+		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
+	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 }
 
 static void __submit_queue_imm(struct intel_engine_cs *engine)
@@ -2470,7 +2473,7 @@ static void execlists_submit_request(struct i915_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	queue_request(engine, &request->sched, rq_prio(request));
+	queue_request(engine, request);
 
 	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 	GEM_BUG_ON(list_empty(&request->sched.link));
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index be185886e4fc..9ed0d3bc7249 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -408,8 +408,10 @@ bool __i915_request_submit(struct i915_request *request)
 xfer:	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
-	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
 		list_move_tail(&request->sched.link, &engine->active.requests);
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
+	}
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
 	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 031433691a06..a9f0d3c8d8b7 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -70,6 +70,18 @@ enum {
 	 */
 	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
 
+	/*
+	 * I915_FENCE_FLAG_PQUEUE - this request is ready for execution
+	 *
+	 * Using the scheduler, when a request is ready for execution it is put
+	 * into the priority queue, and removed from the queue when transferred
+	 * to the HW runlists. We want to track its membership within that
+	 * queue so that we can easily check before rescheduling.
+	 *
+	 * See i915_request_in_priority_queue()
+	 */
+	I915_FENCE_FLAG_PQUEUE,
+
 	/*
 	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
 	 *
@@ -361,6 +373,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq)
 	return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 }
 
+static inline bool i915_request_in_priority_queue(const struct i915_request *rq)
+{
+	return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+}
+
 /**
  * Returns true if seq1 is later than seq2.
  */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index bf87c70bfdd9..db3da81b7f05 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -326,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node,
 
 		node->attr.priority = prio;
 
-		if (list_empty(&node->link)) {
-			/*
-			 * If the request is not in the priolist queue because
-			 * it is not yet runnable, then it doesn't contribute
-			 * to our preemption decisions. On the other hand,
-			 * if the request is on the HW, it too is not in the
-			 * queue; but in that case we may still need to reorder
-			 * the inflight requests.
-			 */
+		/*
+		 * Once the request is ready, it will be place into the
+		 * priority lists and then onto the HW runlist. Before the
+		 * request is ready, it does not contribute to our preemption
+		 * decisions and we can safely ignore it, as it will, and
+		 * any preemption required, be dealt with upon submission.
+		 * See engine->submit_request()
+		 */
+		if (list_empty(&node->link))
 			continue;
-		}
 
-		if (!intel_engine_is_virtual(engine) &&
-		    !i915_request_is_active(node_to_request(node))) {
+		if (i915_request_in_priority_queue(node_to_request(node))) {
 			if (!cache.priolist)
 				cache.priolist =
 					i915_sched_lookup_priolist(engine,
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests
  2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
@ 2020-01-15  8:33 ` Chris Wilson
  2020-01-15 10:58   ` Tvrtko Ursulin
  2020-01-15 11:10   ` [Intel-gfx] [PATCH v3] " Chris Wilson
  2020-01-15  8:33 ` [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture Chris Wilson
                   ` (7 subsequent siblings)
  8 siblings, 2 replies; 21+ messages in thread
From: Chris Wilson @ 2020-01-15  8:33 UTC (permalink / raw)
  To: intel-gfx

In order to support out-of-line error capture, we need to remove the
active request from HW and put it to one side while a worker compresses
and stores all the details associated with that request. (As that
compression may take an arbitrary user-controlled amount of time, we
want to let the engine continue running on other workloads while the
hanging request is dumped.) Not only do we need to remove the active
request, but we also have to remove its context and all requests that
were dependent on it (both in flight, queued and future submission).

Finally once the capture is complete, we need to be able to resubmit the
request and its dependents and allow them to execute.

v2: Replace stack recursion with a simple list.

References: https://gitlab.freedesktop.org/drm/intel/issues/738
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   1 +
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 159 ++++++++++++++++++-
 drivers/gpu/drm/i915/gt/selftest_lrc.c       | 103 ++++++++++++
 drivers/gpu/drm/i915/i915_request.h          |  22 +++
 5 files changed, 282 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f451ef376548..c296aaf381e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -671,6 +671,7 @@ void
 intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
 {
 	INIT_LIST_HEAD(&engine->active.requests);
+	INIT_LIST_HEAD(&engine->active.hold);
 
 	spin_lock_init(&engine->active.lock);
 	lockdep_set_subclass(&engine->active.lock, subclass);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 00287515e7af..77e68c7643de 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -295,6 +295,7 @@ struct intel_engine_cs {
 	struct {
 		spinlock_t lock;
 		struct list_head requests;
+		struct list_head hold; /* ready requests, but on hold */
 	} active;
 
 	struct llist_head barrier_tasks;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f0cbd240a8c2..43c19dc9c0c7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2353,6 +2353,145 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 	}
 }
 
+static void __execlists_hold(struct i915_request *rq)
+{
+	LIST_HEAD(list);
+
+	do {
+		struct i915_dependency *p;
+
+		if (i915_request_is_active(rq))
+			__i915_request_unsubmit(rq);
+
+		RQ_TRACE(rq, "on hold\n");
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
+		i915_request_set_hold(rq);
+
+		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			/* Leave semaphores spinning on the other engines */
+			if (w->engine != rq->engine)
+				continue;
+
+			if (list_empty(&w->sched.link))
+				continue; /* Not yet submitted */
+
+			if (i915_request_completed(w))
+				continue;
+
+			if (i915_request_has_hold(rq))
+				continue;
+
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while(rq);
+}
+
+__maybe_unused
+static void execlists_hold(struct intel_engine_cs *engine,
+			   struct i915_request *rq)
+{
+	spin_lock_irq(&engine->active.lock);
+
+	/*
+	 * Transfer this request onto the hold queue to prevent it
+	 * being resumbitted to HW (and potentially completed) before we have
+	 * released it. Since we may have already submitted following
+	 * requests, we need to remove those as well.
+	 */
+	GEM_BUG_ON(i915_request_completed(rq));
+	GEM_BUG_ON(i915_request_has_hold(rq));
+	GEM_BUG_ON(rq->engine != engine);
+	__execlists_hold(rq);
+
+	spin_unlock_irq(&engine->active.lock);
+}
+
+static bool hold_request(const struct i915_request *rq)
+{
+	struct i915_dependency *p;
+
+	/*
+	 * If one of our ancestors is still on hold, we must also still be on
+	 * hold, otherwise we will bypass it and execute before it.
+	 */
+	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+		const struct i915_request *s =
+			container_of(p->signaler, typeof(*s), sched);
+
+		if (s->engine != rq->engine)
+			continue;
+
+		return i915_request_has_hold(s);
+	}
+
+	return false;
+}
+
+static void __execlists_unhold(struct i915_request *rq)
+{
+	LIST_HEAD(list);
+
+	do {
+		struct i915_dependency *p;
+
+		GEM_BUG_ON(!i915_request_has_hold(rq));
+		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+
+		i915_request_clear_hold(rq);
+		list_move_tail(&rq->sched.link,
+			       i915_sched_lookup_priolist(rq->engine,
+							  rq_prio(rq)));
+		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		RQ_TRACE(rq, "hold release\n");
+
+		/* Also release any children on this engine that are ready */
+		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			if (w->engine != rq->engine)
+				continue;
+
+			if (!i915_request_has_hold(rq))
+				continue;
+
+			/* Check that no other parents are on hold */
+			if (hold_request(rq))
+				continue;
+
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while(rq);
+}
+
+__maybe_unused
+static void execlists_unhold(struct intel_engine_cs *engine,
+			     struct i915_request *rq)
+{
+	spin_lock_irq(&engine->active.lock);
+
+	/*
+	 * Move this request back to the priority queue, and all of its
+	 * children and grandchildren that were suspended along with it.
+	 */
+	__execlists_unhold(rq);
+
+	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
+		engine->execlists.queue_priority_hint = rq_prio(rq);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+
+	spin_unlock_irq(&engine->active.lock);
+}
+
 static noinline void preempt_reset(struct intel_engine_cs *engine)
 {
 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
@@ -2465,6 +2604,13 @@ static void submit_queue(struct intel_engine_cs *engine,
 	__submit_queue_imm(engine);
 }
 
+static bool on_hold(const struct intel_engine_cs *engine,
+		    const struct i915_request *rq)
+{
+	GEM_BUG_ON(i915_request_has_hold(rq));
+	return !list_empty(&engine->active.hold) && hold_request(rq);
+}
+
 static void execlists_submit_request(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -2473,12 +2619,17 @@ static void execlists_submit_request(struct i915_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	queue_request(engine, request);
+	if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */
+		list_add_tail(&request->sched.link, &engine->active.hold);
+		i915_request_set_hold(request);
+	} else {
+		queue_request(engine, request);
 
-	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-	GEM_BUG_ON(list_empty(&request->sched.link));
+		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+		GEM_BUG_ON(list_empty(&request->sched.link));
 
-	submit_queue(engine, request);
+		submit_queue(engine, request);
+	}
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 15cda024e3e4..78501d79c0ea 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg)
 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
 }
 
+static int live_hold_reset(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = 0;
+
+	/*
+	 * In order to support offline error capture for fast preempt reset,
+	 * we need to decouple the guilty request and ensure that it and its
+	 * descendents are not executed while the capture is in progress.
+	 */
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+		unsigned long heartbeat;
+		struct i915_request *rq;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			break;
+		}
+
+		engine_heartbeat_disable(engine, &heartbeat);
+
+		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out;
+		}
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(&spin, rq)) {
+			intel_gt_set_wedged(gt);
+			err = -ETIME;
+			goto out;
+		}
+
+		/* We have our request executing, now remove it and reset */
+
+		if (test_and_set_bit(I915_RESET_ENGINE + id,
+				     &gt->reset.flags)) {
+			spin_unlock_irq(&engine->active.lock);
+			intel_gt_set_wedged(gt);
+			err = -EBUSY;
+			goto out;
+		}
+		tasklet_disable(&engine->execlists.tasklet);
+
+		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+		execlists_hold(engine, rq);
+		GEM_BUG_ON(!i915_request_has_hold(rq));
+
+		intel_engine_reset(engine, NULL);
+		GEM_BUG_ON(rq->fence.error != -EIO);
+
+		tasklet_enable(&engine->execlists.tasklet);
+		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
+				      &gt->reset.flags);
+
+		/* Check that we do not resubmit the held request */
+		i915_request_get(rq);
+		if (!i915_request_wait(rq, 0, HZ / 5)) {
+			pr_err("%s: on hold request completed!\n",
+			       engine->name);
+			i915_request_put(rq);
+			err = -EIO;
+			goto out;
+		}
+		GEM_BUG_ON(!i915_request_has_hold(rq));
+
+		/* But is resubmitted on release */
+		execlists_unhold(engine, rq);
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+			pr_err("%s: held request did not complete!\n",
+			       engine->name);
+			intel_gt_set_wedged(gt);
+			err = -ETIME;
+		}
+		i915_request_put(rq);
+
+out:
+		engine_heartbeat_enable(engine, heartbeat);
+		intel_context_put(ce);
+		if (err)
+			break;
+	}
+
+	igt_spinner_fini(&spin);
+	return err;
+}
+
 static int
 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 {
@@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_sanitycheck),
 		SUBTEST(live_unlite_switch),
 		SUBTEST(live_unlite_preempt),
+		SUBTEST(live_hold_reset),
 		SUBTEST(live_timeslice_preempt),
 		SUBTEST(live_timeslice_queue),
 		SUBTEST(live_busywait_preempt),
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index a9f0d3c8d8b7..47fa5419c74f 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -90,6 +90,13 @@ enum {
 	 */
 	I915_FENCE_FLAG_SIGNAL,
 
+	/*
+	 * I915_FENCE_FLAG_HOLD - this request is currently on hold
+	 *
+	 * This request has been suspended, pending an ongoing investigation.
+	 */
+	I915_FENCE_FLAG_HOLD,
+
 	/*
 	 * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
 	 *
@@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq)
 	return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
 }
 
+static inline bool i915_request_has_hold(const struct i915_request *rq)
+{
+	return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
+}
+
+static inline void i915_request_set_hold(struct i915_request *rq)
+{
+	set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
+static inline void i915_request_clear_hold(struct i915_request *rq)
+{
+	clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
 static inline struct intel_timeline *
 i915_request_timeline(struct i915_request *rq)
 {
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture
  2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
  2020-01-15  8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson
@ 2020-01-15  8:33 ` Chris Wilson
  2020-01-16 17:22   ` Tvrtko Ursulin
  2020-01-15  9:02 ` [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists Chris Wilson
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2020-01-15  8:33 UTC (permalink / raw)
  To: intel-gfx

Currently, we skip error capture upon forced preemption. We apply forced
preemption when there is a higher priority request that should be
running but is being blocked, and we skip inline error capture so that
the preemption request is not further delayed by a user controlled
capture -- extending the denial of service.

However, preemption reset is also used for heartbeats and regular GPU
hangs. By skipping the error capture, we remove the ability to debug GPU
hangs.

In order to capture the error without delaying the preemption request
further, we can do an out-of-line capture by removing the guilty request
from the execution queue and scheduling a work to dump that request.
When removing a request, we need to remove the entire context and all
descendants from the execution queue, so that they do not jump past.

Closes: https://gitlab.freedesktop.org/drm/intel/issues/738
Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 120 +++++++++++++++++++++++++++-
 1 file changed, 118 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 43c19dc9c0c7..a84477df32bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2392,7 +2392,6 @@ static void __execlists_hold(struct i915_request *rq)
 	} while(rq);
 }
 
-__maybe_unused
 static void execlists_hold(struct intel_engine_cs *engine,
 			   struct i915_request *rq)
 {
@@ -2472,7 +2471,6 @@ static void __execlists_unhold(struct i915_request *rq)
 	} while(rq);
 }
 
-__maybe_unused
 static void execlists_unhold(struct intel_engine_cs *engine,
 			     struct i915_request *rq)
 {
@@ -2492,6 +2490,121 @@ static void execlists_unhold(struct intel_engine_cs *engine,
 	spin_unlock_irq(&engine->active.lock);
 }
 
+struct execlists_capture {
+	struct work_struct work;
+	struct i915_request *rq;
+	struct i915_gpu_coredump *error;
+};
+
+static void execlists_capture_work(struct work_struct *work)
+{
+	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
+	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
+	struct intel_engine_cs *engine = cap->rq->engine;
+	struct intel_gt_coredump *gt = cap->error->gt;
+	struct intel_engine_capture_vma *vma;
+
+	/* Compress all the objects attached to the request, slow! */
+	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
+	if (vma) {
+		struct i915_vma_compress *compress =
+			i915_vma_capture_prepare(gt);
+
+		intel_engine_coredump_add_vma(gt->engine, vma, compress);
+		i915_vma_capture_finish(gt, compress);
+	}
+
+	gt->simulated = gt->engine->simulated;
+	cap->error->simulated = gt->simulated;
+
+	/* Publish the error state, and announce it to the world */
+	i915_error_state_store(cap->error);
+	i915_gpu_coredump_put(cap->error);
+
+	/* Return this request and all that depend upon it for signaling */
+	execlists_unhold(engine, cap->rq);
+
+	kfree(cap);
+}
+
+static struct i915_gpu_coredump *capture_regs(struct intel_engine_cs *engine)
+{
+	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+	struct i915_gpu_coredump *e;
+
+	e = i915_gpu_coredump_alloc(engine->i915, gfp);
+	if (!e)
+		return NULL;
+
+	e->gt = intel_gt_coredump_alloc(engine->gt, gfp);
+	if (!e->gt)
+		goto err;
+
+	e->gt->engine = intel_engine_coredump_alloc(engine, gfp);
+	if (!e->gt->engine)
+		goto err_gt;
+
+	return e;
+
+err_gt:
+	kfree(e->gt);
+err:
+	kfree(e);
+	return NULL;
+}
+
+static void execlists_capture(struct intel_engine_cs *engine)
+{
+	struct execlists_capture *cap;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
+		return;
+
+	cap = kmalloc(sizeof(*cap), GFP_ATOMIC);
+	if (!cap)
+		return;
+
+	cap->rq = execlists_active(&engine->execlists);
+	GEM_BUG_ON(!cap->rq);
+
+	cap->rq = active_request(cap->rq->context->timeline, cap->rq);
+
+	/*
+	 * We need to _quickly_ capture the engine state before we reset.
+	 * We are inside an atomic section (softirq) here and we are delaying
+	 * the forced preemption event.
+	 */
+	cap->error = capture_regs(engine);
+	if (!cap->error)
+		goto err_free;
+
+	if (i915_request_completed(cap->rq)) /* oops, not so guilty! */
+		goto err_store;
+
+	/*
+	 * Remove the request from the execlists queue, and take ownership
+	 * of the request. We pass it to our worker who will _slowly_ compress
+	 * all the pages the _user_ requested for debugging their batch, after
+	 * which we return it to the queue for signaling.
+	 *
+	 * By removing them from the execlists queue, we also remove the
+	 * requests from being processed by __unwind_incomplete_requests()
+	 * during the intel_engine_reset(), and so they will *not* be replayed
+	 * afterwards.
+	 */
+	execlists_hold(engine, cap->rq);
+
+	INIT_WORK(&cap->work, execlists_capture_work);
+	schedule_work(&cap->work);
+	return;
+
+err_store:
+	i915_error_state_store(cap->error);
+	i915_gpu_coredump_put(cap->error);
+err_free:
+	kfree(cap);
+}
+
 static noinline void preempt_reset(struct intel_engine_cs *engine)
 {
 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
@@ -2509,6 +2622,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
 	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
 		     READ_ONCE(engine->props.preempt_timeout_ms),
 		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+
+	ring_set_paused(engine, 1); /* Freeze the request in place */
+	execlists_capture(engine);
 	intel_engine_reset(engine, "preemption time out");
 
 	tasklet_enable(&engine->execlists.tasklet);
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists
  2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
  2020-01-15  8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson
  2020-01-15  8:33 ` [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture Chris Wilson
@ 2020-01-15  9:02 ` Chris Wilson
  2020-01-16 17:23   ` Tvrtko Ursulin
  2020-01-15  9:44 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) Patchwork
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2020-01-15  9:02 UTC (permalink / raw)
  To: intel-gfx

If we keep track of when the i915_request.sched.link is on the HW
runlist, or in the priority queue we can simplify our interactions with
the request (such as during rescheduling). This also simplifies the next
patch where we introduce a new in-between list, for requests that are
ready but neither on the run list or in the queue.

v2: Update i915_sched_node.link explanation for current usage where it
is a link on both the queue and on the runlists.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c   | 13 ++++++++-----
 drivers/gpu/drm/i915/i915_request.c   |  4 +++-
 drivers/gpu/drm/i915/i915_request.h   | 17 +++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.c | 22 ++++++++++------------
 4 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9e430590fb3a..f0cbd240a8c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -985,6 +985,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
 			list_move(&rq->sched.link, pl);
+			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+
 			active = rq;
 		} else {
 			struct intel_engine_cs *owner = rq->context->engine;
@@ -2430,11 +2432,12 @@ static void execlists_preempt(struct timer_list *timer)
 }
 
 static void queue_request(struct intel_engine_cs *engine,
-			  struct i915_sched_node *node,
-			  int prio)
+			  struct i915_request *rq)
 {
-	GEM_BUG_ON(!list_empty(&node->link));
-	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
+	GEM_BUG_ON(!list_empty(&rq->sched.link));
+	list_add_tail(&rq->sched.link,
+		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
+	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 }
 
 static void __submit_queue_imm(struct intel_engine_cs *engine)
@@ -2470,7 +2473,7 @@ static void execlists_submit_request(struct i915_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	queue_request(engine, &request->sched, rq_prio(request));
+	queue_request(engine, request);
 
 	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 	GEM_BUG_ON(list_empty(&request->sched.link));
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index be185886e4fc..9ed0d3bc7249 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -408,8 +408,10 @@ bool __i915_request_submit(struct i915_request *request)
 xfer:	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
-	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
 		list_move_tail(&request->sched.link, &engine->active.requests);
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
+	}
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
 	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 031433691a06..a9f0d3c8d8b7 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -70,6 +70,18 @@ enum {
 	 */
 	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
 
+	/*
+	 * I915_FENCE_FLAG_PQUEUE - this request is ready for execution
+	 *
+	 * Using the scheduler, when a request is ready for execution it is put
+	 * into the priority queue, and removed from the queue when transferred
+	 * to the HW runlists. We want to track its membership within that
+	 * queue so that we can easily check before rescheduling.
+	 *
+	 * See i915_request_in_priority_queue()
+	 */
+	I915_FENCE_FLAG_PQUEUE,
+
 	/*
 	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
 	 *
@@ -361,6 +373,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq)
 	return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 }
 
+static inline bool i915_request_in_priority_queue(const struct i915_request *rq)
+{
+	return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+}
+
 /**
  * Returns true if seq1 is later than seq2.
  */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index bf87c70bfdd9..db3da81b7f05 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -326,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node,
 
 		node->attr.priority = prio;
 
-		if (list_empty(&node->link)) {
-			/*
-			 * If the request is not in the priolist queue because
-			 * it is not yet runnable, then it doesn't contribute
-			 * to our preemption decisions. On the other hand,
-			 * if the request is on the HW, it too is not in the
-			 * queue; but in that case we may still need to reorder
-			 * the inflight requests.
-			 */
+		/*
+		 * Once the request is ready, it will be place into the
+		 * priority lists and then onto the HW runlist. Before the
+		 * request is ready, it does not contribute to our preemption
+		 * decisions and we can safely ignore it, as it will, and
+		 * any preemption required, be dealt with upon submission.
+		 * See engine->submit_request()
+		 */
+		if (list_empty(&node->link))
 			continue;
-		}
 
-		if (!intel_engine_is_virtual(engine) &&
-		    !i915_request_is_active(node_to_request(node))) {
+		if (i915_request_in_priority_queue(node_to_request(node))) {
 			if (!cache.priolist)
 				cache.priolist =
 					i915_sched_lookup_priolist(engine,
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2)
  2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
                   ` (2 preceding siblings ...)
  2020-01-15  9:02 ` [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists Chris Wilson
@ 2020-01-15  9:44 ` Patchwork
  2020-01-15 10:06 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2020-01-15  9:44 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2)
URL   : https://patchwork.freedesktop.org/series/72048/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
201e1a96d586 drm/i915: Keep track of request among the scheduling lists
e4a508266496 drm/i915/gt: Allow temporary suspension of inflight requests
-:92: ERROR:SPACING: space required before the open parenthesis '('
#92: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:2392:
+	} while(rq);

-:172: ERROR:SPACING: space required before the open parenthesis '('
#172: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:2472:
+	} while(rq);

total: 2 errors, 0 warnings, 0 checks, 342 lines checked
4aab7f2eb367 drm/i915/execlists: Offline error capture

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2)
  2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
                   ` (3 preceding siblings ...)
  2020-01-15  9:44 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) Patchwork
@ 2020-01-15 10:06 ` Patchwork
  2020-01-15 10:06 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2020-01-15 10:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2)
URL   : https://patchwork.freedesktop.org/series/72048/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7748 -> Patchwork_16106
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/index.html

Known issues
------------

  Here are the changes found in Patchwork_16106 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_switch@rcs0:
    - fi-icl-guc:         [PASS][1] -> [INCOMPLETE][2] ([i915#140])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-guc/igt@gem_ctx_switch@rcs0.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-icl-guc/igt@gem_ctx_switch@rcs0.html

  * igt@i915_module_load@reload-with-fault-injection:
    - fi-kbl-x1275:       [PASS][3] -> [INCOMPLETE][4] ([i915#879])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-x1275/igt@i915_module_load@reload-with-fault-injection.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-kbl-x1275/igt@i915_module_load@reload-with-fault-injection.html

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      [PASS][5] -> [FAIL][6] ([i915#178])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live_execlists:
    - fi-icl-y:           [PASS][7] -> [DMESG-FAIL][8] ([fdo#108569])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-y/igt@i915_selftest@live_execlists.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-icl-y/igt@i915_selftest@live_execlists.html

  * igt@i915_selftest@live_hangcheck:
    - fi-kbl-7500u:       [PASS][9] -> [DMESG-FAIL][10] ([i915#889]) +7 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-7500u/igt@i915_selftest@live_hangcheck.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-kbl-7500u/igt@i915_selftest@live_hangcheck.html

  * igt@i915_selftest@live_vma:
    - fi-kbl-7500u:       [PASS][11] -> [DMESG-WARN][12] ([i915#889]) +23 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-7500u/igt@i915_selftest@live_vma.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-kbl-7500u/igt@i915_selftest@live_vma.html

  
#### Possible fixes ####

  * igt@gem_exec_gttfill@basic:
    - fi-bsw-n3050:       [TIMEOUT][13] ([fdo#112271]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-bsw-n3050/igt@gem_exec_gttfill@basic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-bsw-n3050/igt@gem_exec_gttfill@basic.html

  * igt@i915_module_load@reload-with-fault-injection:
    - fi-skl-6700k2:      [INCOMPLETE][15] ([i915#671]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6700k2/igt@i915_module_load@reload-with-fault-injection.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-skl-6700k2/igt@i915_module_load@reload-with-fault-injection.html

  * igt@i915_selftest@live_blt:
    - fi-hsw-4770:        [DMESG-FAIL][17] ([i915#563]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-hsw-4770/igt@i915_selftest@live_blt.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-hsw-4770/igt@i915_selftest@live_blt.html

  * igt@i915_selftest@live_gem_contexts:
    - fi-hsw-4770r:       [DMESG-FAIL][19] ([i915#722]) -> [PASS][20]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-hsw-4770r/igt@i915_selftest@live_gem_contexts.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-hsw-4770r/igt@i915_selftest@live_gem_contexts.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][21] ([fdo#111096] / [i915#323]) -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  
#### Warnings ####

  * igt@i915_selftest@live_blt:
    - fi-hsw-4770r:       [DMESG-FAIL][23] ([i915#553] / [i915#725]) -> [DMESG-FAIL][24] ([i915#725])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-hsw-4770r/igt@i915_selftest@live_blt.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-hsw-4770r/igt@i915_selftest@live_blt.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-icl-u2:          [FAIL][25] ([i915#217]) -> [DMESG-WARN][26] ([IGT#4] / [i915#263])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [IGT#4]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/4
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271
  [i915#140]: https://gitlab.freedesktop.org/drm/intel/issues/140
  [i915#178]: https://gitlab.freedesktop.org/drm/intel/issues/178
  [i915#217]: https://gitlab.freedesktop.org/drm/intel/issues/217
  [i915#263]: https://gitlab.freedesktop.org/drm/intel/issues/263
  [i915#323]: https://gitlab.freedesktop.org/drm/intel/issues/323
  [i915#553]: https://gitlab.freedesktop.org/drm/intel/issues/553
  [i915#563]: https://gitlab.freedesktop.org/drm/intel/issues/563
  [i915#671]: https://gitlab.freedesktop.org/drm/intel/issues/671
  [i915#722]: https://gitlab.freedesktop.org/drm/intel/issues/722
  [i915#725]: https://gitlab.freedesktop.org/drm/intel/issues/725
  [i915#879]: https://gitlab.freedesktop.org/drm/intel/issues/879
  [i915#889]: https://gitlab.freedesktop.org/drm/intel/issues/889
  [i915#937]: https://gitlab.freedesktop.org/drm/intel/issues/937


Participating hosts (42 -> 46)
------------------------------

  Additional (8): fi-byt-j1900 fi-ivb-3770 fi-skl-lmem fi-blb-e6850 fi-byt-n2820 fi-bsw-nick fi-skl-6600u fi-snb-2600 
  Missing    (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7748 -> Patchwork_16106

  CI-20190529: 20190529
  CI_DRM_7748: 1793de9a4215356790b87608fcfc9e99eeb6954d @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5365: e9ec0ed63b25c86861ffac3c8601cc4d1b910b65 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_16106: 4aab7f2eb367114a402d222d92c77f495553ff42 @ git://anongit.freedesktop.org/gfx-ci/linux


== Kernel 32bit build ==

Warning: Kernel 32bit buildtest failed:
https://intel-gfx-ci.01.org/Patchwork_16106/build_32bit.log

  CALL    scripts/checksyscalls.sh
  CALL    scripts/atomic/check-atomics.sh
  CHK     include/generated/compile.h
Kernel: arch/x86/boot/bzImage is ready  (#1)
  Building modules, stage 2.
  MODPOST 122 modules
ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
scripts/Makefile.modpost:93: recipe for target '__modpost' failed
make[1]: *** [__modpost] Error 1
Makefile:1282: recipe for target 'modules' failed
make: *** [modules] Error 2


== Linux commits ==

4aab7f2eb367 drm/i915/execlists: Offline error capture
e4a508266496 drm/i915/gt: Allow temporary suspension of inflight requests
201e1a96d586 drm/i915: Keep track of request among the scheduling lists

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BUILD: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2)
  2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
                   ` (4 preceding siblings ...)
  2020-01-15 10:06 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-01-15 10:06 ` Patchwork
  2020-01-15 14:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) Patchwork
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2020-01-15 10:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2)
URL   : https://patchwork.freedesktop.org/series/72048/
State : warning

== Summary ==

CALL    scripts/checksyscalls.sh
  CALL    scripts/atomic/check-atomics.sh
  CHK     include/generated/compile.h
Kernel: arch/x86/boot/bzImage is ready  (#1)
  Building modules, stage 2.
  MODPOST 122 modules
ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
scripts/Makefile.modpost:93: recipe for target '__modpost' failed
make[1]: *** [__modpost] Error 1
Makefile:1282: recipe for target 'modules' failed
make: *** [modules] Error 2

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/build_32bit.log
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests
  2020-01-15  8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson
@ 2020-01-15 10:58   ` Tvrtko Ursulin
  2020-01-15 11:01     ` Chris Wilson
  2020-01-15 11:10   ` [Intel-gfx] [PATCH v3] " Chris Wilson
  1 sibling, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2020-01-15 10:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2020 08:33, Chris Wilson wrote:
> In order to support out-of-line error capture, we need to remove the
> active request from HW and put it to one side while a worker compresses
> and stores all the details associated with that request. (As that
> compression may take an arbitrary user-controlled amount of time, we
> want to let the engine continue running on other workloads while the
> hanging request is dumped.) Not only do we need to remove the active
> request, but we also have to remove its context and all requests that
> were dependent on it (both in flight, queued and future submission).
> 
> Finally once the capture is complete, we need to be able to resubmit the
> request and its dependents and allow them to execute.
> 
> v2: Replace stack recursion with a simple list.
> 
> References: https://gitlab.freedesktop.org/drm/intel/issues/738
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    |   1 +
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |   1 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 159 ++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 103 ++++++++++++
>   drivers/gpu/drm/i915/i915_request.h          |  22 +++
>   5 files changed, 282 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index f451ef376548..c296aaf381e7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -671,6 +671,7 @@ void
>   intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
>   {
>   	INIT_LIST_HEAD(&engine->active.requests);
> +	INIT_LIST_HEAD(&engine->active.hold);
>   
>   	spin_lock_init(&engine->active.lock);
>   	lockdep_set_subclass(&engine->active.lock, subclass);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 00287515e7af..77e68c7643de 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -295,6 +295,7 @@ struct intel_engine_cs {
>   	struct {
>   		spinlock_t lock;
>   		struct list_head requests;
> +		struct list_head hold; /* ready requests, but on hold */
>   	} active;
>   
>   	struct llist_head barrier_tasks;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index f0cbd240a8c2..43c19dc9c0c7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2353,6 +2353,145 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
>   	}
>   }
>   
> +static void __execlists_hold(struct i915_request *rq)
> +{
> +	LIST_HEAD(list);
> +
> +	do {
> +		struct i915_dependency *p;
> +
> +		if (i915_request_is_active(rq))
> +			__i915_request_unsubmit(rq);
> +
> +		RQ_TRACE(rq, "on hold\n");
> +		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
> +		i915_request_set_hold(rq);
> +
> +		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
> +			struct i915_request *w =
> +				container_of(p->waiter, typeof(*w), sched);
> +
> +			/* Leave semaphores spinning on the other engines */
> +			if (w->engine != rq->engine)
> +				continue;
> +
> +			if (list_empty(&w->sched.link))
> +				continue; /* Not yet submitted */
> +
> +			if (i915_request_completed(w))
> +				continue;
> +
> +			if (i915_request_has_hold(rq))
> +				continue;
> +
> +			list_move_tail(&w->sched.link, &list);
> +		}
> +
> +		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
> +	} while(rq);
> +}
> +
> +__maybe_unused
> +static void execlists_hold(struct intel_engine_cs *engine,
> +			   struct i915_request *rq)
> +{
> +	spin_lock_irq(&engine->active.lock);
> +
> +	/*
> +	 * Transfer this request onto the hold queue to prevent it
> +	 * being resumbitted to HW (and potentially completed) before we have
> +	 * released it. Since we may have already submitted following
> +	 * requests, we need to remove those as well.
> +	 */
> +	GEM_BUG_ON(i915_request_completed(rq));
> +	GEM_BUG_ON(i915_request_has_hold(rq));
> +	GEM_BUG_ON(rq->engine != engine);
> +	__execlists_hold(rq);
> +
> +	spin_unlock_irq(&engine->active.lock);
> +}
> +
> +static bool hold_request(const struct i915_request *rq)
> +{
> +	struct i915_dependency *p;
> +
> +	/*
> +	 * If one of our ancestors is still on hold, we must also still be on
> +	 * hold, otherwise we will bypass it and execute before it.
> +	 */
> +	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
> +		const struct i915_request *s =
> +			container_of(p->signaler, typeof(*s), sched);
> +
> +		if (s->engine != rq->engine)
> +			continue;
> +
> +		return i915_request_has_hold(s);

It shouldn't be:

	if (i915_request_has_hold(s))
		return true;

?

> +	}
> +
> +	return false;
> +}
> +
> +static void __execlists_unhold(struct i915_request *rq)
> +{
> +	LIST_HEAD(list);
> +
> +	do {
> +		struct i915_dependency *p;
> +
> +		GEM_BUG_ON(!i915_request_has_hold(rq));
> +		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
> +
> +		i915_request_clear_hold(rq);
> +		list_move_tail(&rq->sched.link,
> +			       i915_sched_lookup_priolist(rq->engine,
> +							  rq_prio(rq)));
> +		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +		RQ_TRACE(rq, "hold release\n");
> +
> +		/* Also release any children on this engine that are ready */
> +		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
> +			struct i915_request *w =
> +				container_of(p->waiter, typeof(*w), sched);
> +
> +			if (w->engine != rq->engine)
> +				continue;
> +
> +			if (!i915_request_has_hold(rq))

is_held?

> +				continue;
> +
> +			/* Check that no other parents are on hold */
> +			if (hold_request(rq))
> +				continue;

This would be two simultaneous hangs in the interlinked hierarchy? But 
since the engine must be the same, can't be possible.

Regards,

Tvrtko

> +
> +			list_move_tail(&w->sched.link, &list);
> +		}
> +
> +		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
> +	} while(rq);
> +}
> +
> +__maybe_unused
> +static void execlists_unhold(struct intel_engine_cs *engine,
> +			     struct i915_request *rq)
> +{
> +	spin_lock_irq(&engine->active.lock);
> +
> +	/*
> +	 * Move this request back to the priority queue, and all of its
> +	 * children and grandchildren that were suspended along with it.
> +	 */
> +	__execlists_unhold(rq);
> +
> +	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
> +		engine->execlists.queue_priority_hint = rq_prio(rq);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
> +	}
> +
> +	spin_unlock_irq(&engine->active.lock);
> +}
> +
>   static noinline void preempt_reset(struct intel_engine_cs *engine)
>   {
>   	const unsigned int bit = I915_RESET_ENGINE + engine->id;
> @@ -2465,6 +2604,13 @@ static void submit_queue(struct intel_engine_cs *engine,
>   	__submit_queue_imm(engine);
>   }
>   
> +static bool on_hold(const struct intel_engine_cs *engine,
> +		    const struct i915_request *rq)
> +{
> +	GEM_BUG_ON(i915_request_has_hold(rq));
> +	return !list_empty(&engine->active.hold) && hold_request(rq);
> +}
> +
>   static void execlists_submit_request(struct i915_request *request)
>   {
>   	struct intel_engine_cs *engine = request->engine;
> @@ -2473,12 +2619,17 @@ static void execlists_submit_request(struct i915_request *request)
>   	/* Will be called from irq-context when using foreign fences. */
>   	spin_lock_irqsave(&engine->active.lock, flags);
>   
> -	queue_request(engine, request);
> +	if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */
> +		list_add_tail(&request->sched.link, &engine->active.hold);
> +		i915_request_set_hold(request);
> +	} else {
> +		queue_request(engine, request);
>   
> -	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> -	GEM_BUG_ON(list_empty(&request->sched.link));
> +		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> +		GEM_BUG_ON(list_empty(&request->sched.link));
>   
> -	submit_queue(engine, request);
> +		submit_queue(engine, request);
> +	}
>   
>   	spin_unlock_irqrestore(&engine->active.lock, flags);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 15cda024e3e4..78501d79c0ea 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg)
>   	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
>   }
>   
> +static int live_hold_reset(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	struct igt_spinner spin;
> +	int err = 0;
> +
> +	/*
> +	 * In order to support offline error capture for fast preempt reset,
> +	 * we need to decouple the guilty request and ensure that it and its
> +	 * descendents are not executed while the capture is in progress.
> +	 */
> +
> +	if (!intel_has_reset_engine(gt))
> +		return 0;
> +
> +	if (igt_spinner_init(&spin, gt))
> +		return -ENOMEM;
> +
> +	for_each_engine(engine, gt, id) {
> +		struct intel_context *ce;
> +		unsigned long heartbeat;
> +		struct i915_request *rq;
> +
> +		ce = intel_context_create(engine);
> +		if (IS_ERR(ce)) {
> +			err = PTR_ERR(ce);
> +			break;
> +		}
> +
> +		engine_heartbeat_disable(engine, &heartbeat);
> +
> +		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto out;
> +		}
> +		i915_request_add(rq);
> +
> +		if (!igt_wait_for_spinner(&spin, rq)) {
> +			intel_gt_set_wedged(gt);
> +			err = -ETIME;
> +			goto out;
> +		}
> +
> +		/* We have our request executing, now remove it and reset */
> +
> +		if (test_and_set_bit(I915_RESET_ENGINE + id,
> +				     &gt->reset.flags)) {
> +			spin_unlock_irq(&engine->active.lock);
> +			intel_gt_set_wedged(gt);
> +			err = -EBUSY;
> +			goto out;
> +		}
> +		tasklet_disable(&engine->execlists.tasklet);
> +
> +		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
> +		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
> +
> +		execlists_hold(engine, rq);
> +		GEM_BUG_ON(!i915_request_has_hold(rq));
> +
> +		intel_engine_reset(engine, NULL);
> +		GEM_BUG_ON(rq->fence.error != -EIO);
> +
> +		tasklet_enable(&engine->execlists.tasklet);
> +		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
> +				      &gt->reset.flags);
> +
> +		/* Check that we do not resubmit the held request */
> +		i915_request_get(rq);
> +		if (!i915_request_wait(rq, 0, HZ / 5)) {
> +			pr_err("%s: on hold request completed!\n",
> +			       engine->name);
> +			i915_request_put(rq);
> +			err = -EIO;
> +			goto out;
> +		}
> +		GEM_BUG_ON(!i915_request_has_hold(rq));
> +
> +		/* But is resubmitted on release */
> +		execlists_unhold(engine, rq);
> +		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
> +			pr_err("%s: held request did not complete!\n",
> +			       engine->name);
> +			intel_gt_set_wedged(gt);
> +			err = -ETIME;
> +		}
> +		i915_request_put(rq);
> +
> +out:
> +		engine_heartbeat_enable(engine, heartbeat);
> +		intel_context_put(ce);
> +		if (err)
> +			break;
> +	}
> +
> +	igt_spinner_fini(&spin);
> +	return err;
> +}
> +
>   static int
>   emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
>   {
> @@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_sanitycheck),
>   		SUBTEST(live_unlite_switch),
>   		SUBTEST(live_unlite_preempt),
> +		SUBTEST(live_hold_reset),
>   		SUBTEST(live_timeslice_preempt),
>   		SUBTEST(live_timeslice_queue),
>   		SUBTEST(live_busywait_preempt),
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a9f0d3c8d8b7..47fa5419c74f 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -90,6 +90,13 @@ enum {
>   	 */
>   	I915_FENCE_FLAG_SIGNAL,
>   
> +	/*
> +	 * I915_FENCE_FLAG_HOLD - this request is currently on hold
> +	 *
> +	 * This request has been suspended, pending an ongoing investigation.
> +	 */
> +	I915_FENCE_FLAG_HOLD,
> +
>   	/*
>   	 * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
>   	 *
> @@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq)
>   	return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
>   }
>   
> +static inline bool i915_request_has_hold(const struct i915_request *rq)
> +{
> +	return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
> +}
> +
> +static inline void i915_request_set_hold(struct i915_request *rq)
> +{
> +	set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
> +}
> +
> +static inline void i915_request_clear_hold(struct i915_request *rq)
> +{
> +	clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
> +}
> +
>   static inline struct intel_timeline *
>   i915_request_timeline(struct i915_request *rq)
>   {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests
  2020-01-15 10:58   ` Tvrtko Ursulin
@ 2020-01-15 11:01     ` Chris Wilson
  0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2020-01-15 11:01 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-01-15 10:58:39)
> 
> On 15/01/2020 08:33, Chris Wilson wrote:
> > +static bool hold_request(const struct i915_request *rq)
> > +{
> > +     struct i915_dependency *p;
> > +
> > +     /*
> > +      * If one of our ancestors is still on hold, we must also still be on
> > +      * hold, otherwise we will bypass it and execute before it.
> > +      */
> > +     list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
> > +             const struct i915_request *s =
> > +                     container_of(p->signaler, typeof(*s), sched);
> > +
> > +             if (s->engine != rq->engine)
> > +                     continue;
> > +
> > +             return i915_request_has_hold(s);
> 
> It shouldn't be:
> 
>         if (i915_request_has_hold(s))
>                 return true;
> 
> ?

Yes, it should be.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Intel-gfx] [PATCH v3] drm/i915/gt: Allow temporary suspension of inflight requests
  2020-01-15  8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson
  2020-01-15 10:58   ` Tvrtko Ursulin
@ 2020-01-15 11:10   ` Chris Wilson
  2020-01-15 11:37     ` Tvrtko Ursulin
  2020-01-16 17:12     ` Tvrtko Ursulin
  1 sibling, 2 replies; 21+ messages in thread
From: Chris Wilson @ 2020-01-15 11:10 UTC (permalink / raw)
  To: intel-gfx

In order to support out-of-line error capture, we need to remove the
active request from HW and put it to one side while a worker compresses
and stores all the details associated with that request. (As that
compression may take an arbitrary user-controlled amount of time, we
want to let the engine continue running on other workloads while the
hanging request is dumped.) Not only do we need to remove the active
request, but we also have to remove its context and all requests that
were dependent on it (both in flight, queued and future submission).

Finally once the capture is complete, we need to be able to resubmit the
request and its dependents and allow them to execute.

v2: Replace stack recursion with a simple list.
v3: Check all the parents, not just the first, when searching for a
stuck ancestor!

References: https://gitlab.freedesktop.org/drm/intel/issues/738
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   1 +
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 160 ++++++++++++++++++-
 drivers/gpu/drm/i915/gt/selftest_lrc.c       | 103 ++++++++++++
 drivers/gpu/drm/i915/i915_request.h          |  22 +++
 5 files changed, 283 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f451ef376548..c296aaf381e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -671,6 +671,7 @@ void
 intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
 {
 	INIT_LIST_HEAD(&engine->active.requests);
+	INIT_LIST_HEAD(&engine->active.hold);
 
 	spin_lock_init(&engine->active.lock);
 	lockdep_set_subclass(&engine->active.lock, subclass);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 00287515e7af..77e68c7643de 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -295,6 +295,7 @@ struct intel_engine_cs {
 	struct {
 		spinlock_t lock;
 		struct list_head requests;
+		struct list_head hold; /* ready requests, but on hold */
 	} active;
 
 	struct llist_head barrier_tasks;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f0cbd240a8c2..05a05ceeac6a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2353,6 +2353,146 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 	}
 }
 
+static void __execlists_hold(struct i915_request *rq)
+{
+	LIST_HEAD(list);
+
+	do {
+		struct i915_dependency *p;
+
+		if (i915_request_is_active(rq))
+			__i915_request_unsubmit(rq);
+
+		RQ_TRACE(rq, "on hold\n");
+		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
+		i915_request_set_hold(rq);
+
+		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			/* Leave semaphores spinning on the other engines */
+			if (w->engine != rq->engine)
+				continue;
+
+			if (list_empty(&w->sched.link))
+				continue; /* Not yet submitted */
+
+			if (i915_request_completed(w))
+				continue;
+
+			if (i915_request_has_hold(rq))
+				continue;
+
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while (rq);
+}
+
+__maybe_unused
+static void execlists_hold(struct intel_engine_cs *engine,
+			   struct i915_request *rq)
+{
+	spin_lock_irq(&engine->active.lock);
+
+	/*
+	 * Transfer this request onto the hold queue to prevent it
+	 * being resumbitted to HW (and potentially completed) before we have
+	 * released it. Since we may have already submitted following
+	 * requests, we need to remove those as well.
+	 */
+	GEM_BUG_ON(i915_request_completed(rq));
+	GEM_BUG_ON(i915_request_has_hold(rq));
+	GEM_BUG_ON(rq->engine != engine);
+	__execlists_hold(rq);
+
+	spin_unlock_irq(&engine->active.lock);
+}
+
+static bool hold_request(const struct i915_request *rq)
+{
+	struct i915_dependency *p;
+
+	/*
+	 * If one of our ancestors is on hold, we must also be on hold,
+	 * otherwise we will bypass it and execute before it.
+	 */
+	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+		const struct i915_request *s =
+			container_of(p->signaler, typeof(*s), sched);
+
+		if (s->engine != rq->engine)
+			continue;
+
+		if (i915_request_has_hold(s))
+			return true;
+	}
+
+	return false;
+}
+
+static void __execlists_unhold(struct i915_request *rq)
+{
+	LIST_HEAD(list);
+
+	do {
+		struct i915_dependency *p;
+
+		GEM_BUG_ON(!i915_request_has_hold(rq));
+		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+
+		i915_request_clear_hold(rq);
+		list_move_tail(&rq->sched.link,
+			       i915_sched_lookup_priolist(rq->engine,
+							  rq_prio(rq)));
+		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+		RQ_TRACE(rq, "hold release\n");
+
+		/* Also release any children on this engine that are ready */
+		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
+
+			if (w->engine != rq->engine)
+				continue;
+
+			if (!i915_request_has_hold(rq))
+				continue;
+
+			/* Check that no other parents are on hold */
+			if (hold_request(rq))
+				continue;
+
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while (rq);
+}
+
+__maybe_unused
+static void execlists_unhold(struct intel_engine_cs *engine,
+			     struct i915_request *rq)
+{
+	spin_lock_irq(&engine->active.lock);
+
+	/*
+	 * Move this request back to the priority queue, and all of its
+	 * children and grandchildren that were suspended along with it.
+	 */
+	__execlists_unhold(rq);
+
+	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
+		engine->execlists.queue_priority_hint = rq_prio(rq);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+	}
+
+	spin_unlock_irq(&engine->active.lock);
+}
+
 static noinline void preempt_reset(struct intel_engine_cs *engine)
 {
 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
@@ -2465,6 +2605,13 @@ static void submit_queue(struct intel_engine_cs *engine,
 	__submit_queue_imm(engine);
 }
 
+static bool on_hold(const struct intel_engine_cs *engine,
+		    const struct i915_request *rq)
+{
+	GEM_BUG_ON(i915_request_has_hold(rq));
+	return !list_empty(&engine->active.hold) && hold_request(rq);
+}
+
 static void execlists_submit_request(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -2473,12 +2620,17 @@ static void execlists_submit_request(struct i915_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	queue_request(engine, request);
+	if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */
+		list_add_tail(&request->sched.link, &engine->active.hold);
+		i915_request_set_hold(request);
+	} else {
+		queue_request(engine, request);
 
-	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-	GEM_BUG_ON(list_empty(&request->sched.link));
+		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+		GEM_BUG_ON(list_empty(&request->sched.link));
 
-	submit_queue(engine, request);
+		submit_queue(engine, request);
+	}
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 15cda024e3e4..78501d79c0ea 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg)
 	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
 }
 
+static int live_hold_reset(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = 0;
+
+	/*
+	 * In order to support offline error capture for fast preempt reset,
+	 * we need to decouple the guilty request and ensure that it and its
+	 * descendents are not executed while the capture is in progress.
+	 */
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+		unsigned long heartbeat;
+		struct i915_request *rq;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			break;
+		}
+
+		engine_heartbeat_disable(engine, &heartbeat);
+
+		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out;
+		}
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(&spin, rq)) {
+			intel_gt_set_wedged(gt);
+			err = -ETIME;
+			goto out;
+		}
+
+		/* We have our request executing, now remove it and reset */
+
+		if (test_and_set_bit(I915_RESET_ENGINE + id,
+				     &gt->reset.flags)) {
+			spin_unlock_irq(&engine->active.lock);
+			intel_gt_set_wedged(gt);
+			err = -EBUSY;
+			goto out;
+		}
+		tasklet_disable(&engine->execlists.tasklet);
+
+		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+		execlists_hold(engine, rq);
+		GEM_BUG_ON(!i915_request_has_hold(rq));
+
+		intel_engine_reset(engine, NULL);
+		GEM_BUG_ON(rq->fence.error != -EIO);
+
+		tasklet_enable(&engine->execlists.tasklet);
+		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
+				      &gt->reset.flags);
+
+		/* Check that we do not resubmit the held request */
+		i915_request_get(rq);
+		if (!i915_request_wait(rq, 0, HZ / 5)) {
+			pr_err("%s: on hold request completed!\n",
+			       engine->name);
+			i915_request_put(rq);
+			err = -EIO;
+			goto out;
+		}
+		GEM_BUG_ON(!i915_request_has_hold(rq));
+
+		/* But is resubmitted on release */
+		execlists_unhold(engine, rq);
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+			pr_err("%s: held request did not complete!\n",
+			       engine->name);
+			intel_gt_set_wedged(gt);
+			err = -ETIME;
+		}
+		i915_request_put(rq);
+
+out:
+		engine_heartbeat_enable(engine, heartbeat);
+		intel_context_put(ce);
+		if (err)
+			break;
+	}
+
+	igt_spinner_fini(&spin);
+	return err;
+}
+
 static int
 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 {
@@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_sanitycheck),
 		SUBTEST(live_unlite_switch),
 		SUBTEST(live_unlite_preempt),
+		SUBTEST(live_hold_reset),
 		SUBTEST(live_timeslice_preempt),
 		SUBTEST(live_timeslice_queue),
 		SUBTEST(live_busywait_preempt),
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index a9f0d3c8d8b7..47fa5419c74f 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -90,6 +90,13 @@ enum {
 	 */
 	I915_FENCE_FLAG_SIGNAL,
 
+	/*
+	 * I915_FENCE_FLAG_HOLD - this request is currently on hold
+	 *
+	 * This request has been suspended, pending an ongoing investigation.
+	 */
+	I915_FENCE_FLAG_HOLD,
+
 	/*
 	 * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
 	 *
@@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq)
 	return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
 }
 
+static inline bool i915_request_has_hold(const struct i915_request *rq)
+{
+	return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
+}
+
+static inline void i915_request_set_hold(struct i915_request *rq)
+{
+	set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
+static inline void i915_request_clear_hold(struct i915_request *rq)
+{
+	clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
 static inline struct intel_timeline *
 i915_request_timeline(struct i915_request *rq)
 {
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915/gt: Allow temporary suspension of inflight requests
  2020-01-15 11:10   ` [Intel-gfx] [PATCH v3] " Chris Wilson
@ 2020-01-15 11:37     ` Tvrtko Ursulin
  2020-01-15 11:46       ` Chris Wilson
  2020-01-16 17:12     ` Tvrtko Ursulin
  1 sibling, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2020-01-15 11:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2020 11:10, Chris Wilson wrote:
> In order to support out-of-line error capture, we need to remove the
> active request from HW and put it to one side while a worker compresses
> and stores all the details associated with that request. (As that
> compression may take an arbitrary user-controlled amount of time, we
> want to let the engine continue running on other workloads while the
> hanging request is dumped.) Not only do we need to remove the active
> request, but we also have to remove its context and all requests that
> were dependent on it (both in flight, queued and future submission).
> 
> Finally once the capture is complete, we need to be able to resubmit the
> request and its dependents and allow them to execute.
> 
> v2: Replace stack recursion with a simple list.
> v3: Check all the parents, not just the first, when searching for a
> stuck ancestor!
> 
> References: https://gitlab.freedesktop.org/drm/intel/issues/738
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    |   1 +
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |   1 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 160 ++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 103 ++++++++++++
>   drivers/gpu/drm/i915/i915_request.h          |  22 +++
>   5 files changed, 283 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index f451ef376548..c296aaf381e7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -671,6 +671,7 @@ void
>   intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
>   {
>   	INIT_LIST_HEAD(&engine->active.requests);
> +	INIT_LIST_HEAD(&engine->active.hold);
>   
>   	spin_lock_init(&engine->active.lock);
>   	lockdep_set_subclass(&engine->active.lock, subclass);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 00287515e7af..77e68c7643de 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -295,6 +295,7 @@ struct intel_engine_cs {
>   	struct {
>   		spinlock_t lock;
>   		struct list_head requests;
> +		struct list_head hold; /* ready requests, but on hold */
>   	} active;
>   
>   	struct llist_head barrier_tasks;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index f0cbd240a8c2..05a05ceeac6a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2353,6 +2353,146 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
>   	}
>   }
>   
> +static void __execlists_hold(struct i915_request *rq)
> +{
> +	LIST_HEAD(list);
> +
> +	do {
> +		struct i915_dependency *p;
> +
> +		if (i915_request_is_active(rq))
> +			__i915_request_unsubmit(rq);
> +
> +		RQ_TRACE(rq, "on hold\n");
> +		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
> +		i915_request_set_hold(rq);
> +
> +		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
> +			struct i915_request *w =
> +				container_of(p->waiter, typeof(*w), sched);
> +
> +			/* Leave semaphores spinning on the other engines */
> +			if (w->engine != rq->engine)
> +				continue;
> +
> +			if (list_empty(&w->sched.link))
> +				continue; /* Not yet submitted */
> +
> +			if (i915_request_completed(w))
> +				continue;
> +
> +			if (i915_request_has_hold(rq))
> +				continue;
> +
> +			list_move_tail(&w->sched.link, &list);
> +		}
> +
> +		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
> +	} while (rq);
> +}
> +
> +__maybe_unused
> +static void execlists_hold(struct intel_engine_cs *engine,
> +			   struct i915_request *rq)
> +{
> +	spin_lock_irq(&engine->active.lock);
> +
> +	/*
> +	 * Transfer this request onto the hold queue to prevent it
> +	 * being resumbitted to HW (and potentially completed) before we have
> +	 * released it. Since we may have already submitted following
> +	 * requests, we need to remove those as well.
> +	 */
> +	GEM_BUG_ON(i915_request_completed(rq));
> +	GEM_BUG_ON(i915_request_has_hold(rq));
> +	GEM_BUG_ON(rq->engine != engine);
> +	__execlists_hold(rq);
> +
> +	spin_unlock_irq(&engine->active.lock);
> +}
> +
> +static bool hold_request(const struct i915_request *rq)
> +{
> +	struct i915_dependency *p;
> +
> +	/*
> +	 * If one of our ancestors is on hold, we must also be on hold,
> +	 * otherwise we will bypass it and execute before it.
> +	 */
> +	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
> +		const struct i915_request *s =
> +			container_of(p->signaler, typeof(*s), sched);
> +
> +		if (s->engine != rq->engine)
> +			continue;
> +
> +		if (i915_request_has_hold(s))
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +static void __execlists_unhold(struct i915_request *rq)
> +{
> +	LIST_HEAD(list);
> +
> +	do {
> +		struct i915_dependency *p;
> +
> +		GEM_BUG_ON(!i915_request_has_hold(rq));
> +		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
> +
> +		i915_request_clear_hold(rq);
> +		list_move_tail(&rq->sched.link,
> +			       i915_sched_lookup_priolist(rq->engine,
> +							  rq_prio(rq)));
> +		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +		RQ_TRACE(rq, "hold release\n");
> +
> +		/* Also release any children on this engine that are ready */
> +		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
> +			struct i915_request *w =
> +				container_of(p->waiter, typeof(*w), sched);
> +
> +			if (w->engine != rq->engine)
> +				continue;
> +
> +			if (!i915_request_has_hold(rq))
> +				continue;
> +
> +			/* Check that no other parents are on hold */
> +			if (hold_request(rq))
> +				continue;

I had a question on this check. How can it be other parents on the same 
engine on hold if there can be one engine reset at a time?

Oh and also I was thinking would i915_request_has_hold be better called 
i915_request_is_held? Or is_on_hold?

Regards,

Tvrtko

> +
> +			list_move_tail(&w->sched.link, &list);
> +		}
> +
> +		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
> +	} while (rq);
> +}
> +
> +__maybe_unused
> +static void execlists_unhold(struct intel_engine_cs *engine,
> +			     struct i915_request *rq)
> +{
> +	spin_lock_irq(&engine->active.lock);
> +
> +	/*
> +	 * Move this request back to the priority queue, and all of its
> +	 * children and grandchildren that were suspended along with it.
> +	 */
> +	__execlists_unhold(rq);
> +
> +	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
> +		engine->execlists.queue_priority_hint = rq_prio(rq);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
> +	}
> +
> +	spin_unlock_irq(&engine->active.lock);
> +}
> +
>   static noinline void preempt_reset(struct intel_engine_cs *engine)
>   {
>   	const unsigned int bit = I915_RESET_ENGINE + engine->id;
> @@ -2465,6 +2605,13 @@ static void submit_queue(struct intel_engine_cs *engine,
>   	__submit_queue_imm(engine);
>   }
>   
> +static bool on_hold(const struct intel_engine_cs *engine,
> +		    const struct i915_request *rq)
> +{
> +	GEM_BUG_ON(i915_request_has_hold(rq));
> +	return !list_empty(&engine->active.hold) && hold_request(rq);
> +}
> +
>   static void execlists_submit_request(struct i915_request *request)
>   {
>   	struct intel_engine_cs *engine = request->engine;
> @@ -2473,12 +2620,17 @@ static void execlists_submit_request(struct i915_request *request)
>   	/* Will be called from irq-context when using foreign fences. */
>   	spin_lock_irqsave(&engine->active.lock, flags);
>   
> -	queue_request(engine, request);
> +	if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */
> +		list_add_tail(&request->sched.link, &engine->active.hold);
> +		i915_request_set_hold(request);
> +	} else {
> +		queue_request(engine, request);
>   
> -	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> -	GEM_BUG_ON(list_empty(&request->sched.link));
> +		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> +		GEM_BUG_ON(list_empty(&request->sched.link));
>   
> -	submit_queue(engine, request);
> +		submit_queue(engine, request);
> +	}
>   
>   	spin_unlock_irqrestore(&engine->active.lock, flags);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 15cda024e3e4..78501d79c0ea 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg)
>   	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
>   }
>   
> +static int live_hold_reset(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	struct igt_spinner spin;
> +	int err = 0;
> +
> +	/*
> +	 * In order to support offline error capture for fast preempt reset,
> +	 * we need to decouple the guilty request and ensure that it and its
> +	 * descendents are not executed while the capture is in progress.
> +	 */
> +
> +	if (!intel_has_reset_engine(gt))
> +		return 0;
> +
> +	if (igt_spinner_init(&spin, gt))
> +		return -ENOMEM;
> +
> +	for_each_engine(engine, gt, id) {
> +		struct intel_context *ce;
> +		unsigned long heartbeat;
> +		struct i915_request *rq;
> +
> +		ce = intel_context_create(engine);
> +		if (IS_ERR(ce)) {
> +			err = PTR_ERR(ce);
> +			break;
> +		}
> +
> +		engine_heartbeat_disable(engine, &heartbeat);
> +
> +		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto out;
> +		}
> +		i915_request_add(rq);
> +
> +		if (!igt_wait_for_spinner(&spin, rq)) {
> +			intel_gt_set_wedged(gt);
> +			err = -ETIME;
> +			goto out;
> +		}
> +
> +		/* We have our request executing, now remove it and reset */
> +
> +		if (test_and_set_bit(I915_RESET_ENGINE + id,
> +				     &gt->reset.flags)) {
> +			spin_unlock_irq(&engine->active.lock);
> +			intel_gt_set_wedged(gt);
> +			err = -EBUSY;
> +			goto out;
> +		}
> +		tasklet_disable(&engine->execlists.tasklet);
> +
> +		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
> +		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
> +
> +		execlists_hold(engine, rq);
> +		GEM_BUG_ON(!i915_request_has_hold(rq));
> +
> +		intel_engine_reset(engine, NULL);
> +		GEM_BUG_ON(rq->fence.error != -EIO);
> +
> +		tasklet_enable(&engine->execlists.tasklet);
> +		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
> +				      &gt->reset.flags);
> +
> +		/* Check that we do not resubmit the held request */
> +		i915_request_get(rq);
> +		if (!i915_request_wait(rq, 0, HZ / 5)) {
> +			pr_err("%s: on hold request completed!\n",
> +			       engine->name);
> +			i915_request_put(rq);
> +			err = -EIO;
> +			goto out;
> +		}
> +		GEM_BUG_ON(!i915_request_has_hold(rq));
> +
> +		/* But is resubmitted on release */
> +		execlists_unhold(engine, rq);
> +		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
> +			pr_err("%s: held request did not complete!\n",
> +			       engine->name);
> +			intel_gt_set_wedged(gt);
> +			err = -ETIME;
> +		}
> +		i915_request_put(rq);
> +
> +out:
> +		engine_heartbeat_enable(engine, heartbeat);
> +		intel_context_put(ce);
> +		if (err)
> +			break;
> +	}
> +
> +	igt_spinner_fini(&spin);
> +	return err;
> +}
> +
>   static int
>   emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
>   {
> @@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_sanitycheck),
>   		SUBTEST(live_unlite_switch),
>   		SUBTEST(live_unlite_preempt),
> +		SUBTEST(live_hold_reset),
>   		SUBTEST(live_timeslice_preempt),
>   		SUBTEST(live_timeslice_queue),
>   		SUBTEST(live_busywait_preempt),
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a9f0d3c8d8b7..47fa5419c74f 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -90,6 +90,13 @@ enum {
>   	 */
>   	I915_FENCE_FLAG_SIGNAL,
>   
> +	/*
> +	 * I915_FENCE_FLAG_HOLD - this request is currently on hold
> +	 *
> +	 * This request has been suspended, pending an ongoing investigation.
> +	 */
> +	I915_FENCE_FLAG_HOLD,
> +
>   	/*
>   	 * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
>   	 *
> @@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq)
>   	return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
>   }
>   
> +static inline bool i915_request_has_hold(const struct i915_request *rq)
> +{
> +	return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
> +}
> +
> +static inline void i915_request_set_hold(struct i915_request *rq)
> +{
> +	set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
> +}
> +
> +static inline void i915_request_clear_hold(struct i915_request *rq)
> +{
> +	clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
> +}
> +
>   static inline struct intel_timeline *
>   i915_request_timeline(struct i915_request *rq)
>   {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915/gt: Allow temporary suspension of inflight requests
  2020-01-15 11:37     ` Tvrtko Ursulin
@ 2020-01-15 11:46       ` Chris Wilson
  0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2020-01-15 11:46 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-01-15 11:37:23)
> 
> On 15/01/2020 11:10, Chris Wilson wrote:
> > In order to support out-of-line error capture, we need to remove the
> > active request from HW and put it to one side while a worker compresses
> > and stores all the details associated with that request. (As that
> > compression may take an arbitrary user-controlled amount of time, we
> > want to let the engine continue running on other workloads while the
> > hanging request is dumped.) Not only do we need to remove the active
> > request, but we also have to remove its context and all requests that
> > were dependent on it (both in flight, queued and future submission).
> > 
> > Finally once the capture is complete, we need to be able to resubmit the
> > request and its dependents and allow them to execute.
> > 
> > v2: Replace stack recursion with a simple list.
> > v3: Check all the parents, not just the first, when searching for a
> > stuck ancestor!
> > 
> > References: https://gitlab.freedesktop.org/drm/intel/issues/738
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine_cs.c    |   1 +
> >   drivers/gpu/drm/i915/gt/intel_engine_types.h |   1 +
> >   drivers/gpu/drm/i915/gt/intel_lrc.c          | 160 ++++++++++++++++++-
> >   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 103 ++++++++++++
> >   drivers/gpu/drm/i915/i915_request.h          |  22 +++
> >   5 files changed, 283 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index f451ef376548..c296aaf381e7 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -671,6 +671,7 @@ void
> >   intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
> >   {
> >       INIT_LIST_HEAD(&engine->active.requests);
> > +     INIT_LIST_HEAD(&engine->active.hold);
> >   
> >       spin_lock_init(&engine->active.lock);
> >       lockdep_set_subclass(&engine->active.lock, subclass);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index 00287515e7af..77e68c7643de 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -295,6 +295,7 @@ struct intel_engine_cs {
> >       struct {
> >               spinlock_t lock;
> >               struct list_head requests;
> > +             struct list_head hold; /* ready requests, but on hold */
> >       } active;
> >   
> >       struct llist_head barrier_tasks;
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index f0cbd240a8c2..05a05ceeac6a 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -2353,6 +2353,146 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
> >       }
> >   }
> >   
> > +static void __execlists_hold(struct i915_request *rq)
> > +{
> > +     LIST_HEAD(list);
> > +
> > +     do {
> > +             struct i915_dependency *p;
> > +
> > +             if (i915_request_is_active(rq))
> > +                     __i915_request_unsubmit(rq);
> > +
> > +             RQ_TRACE(rq, "on hold\n");
> > +             clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> > +             list_move_tail(&rq->sched.link, &rq->engine->active.hold);
> > +             i915_request_set_hold(rq);
> > +
> > +             list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
> > +                     struct i915_request *w =
> > +                             container_of(p->waiter, typeof(*w), sched);
> > +
> > +                     /* Leave semaphores spinning on the other engines */
> > +                     if (w->engine != rq->engine)
> > +                             continue;
> > +
> > +                     if (list_empty(&w->sched.link))
> > +                             continue; /* Not yet submitted */
> > +
> > +                     if (i915_request_completed(w))
> > +                             continue;
> > +
> > +                     if (i915_request_has_hold(rq))
> > +                             continue;
> > +
> > +                     list_move_tail(&w->sched.link, &list);
> > +             }
> > +
> > +             rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
> > +     } while (rq);
> > +}
> > +
> > +__maybe_unused
> > +static void execlists_hold(struct intel_engine_cs *engine,
> > +                        struct i915_request *rq)
> > +{
> > +     spin_lock_irq(&engine->active.lock);
> > +
> > +     /*
> > +      * Transfer this request onto the hold queue to prevent it
> > +      * being resumbitted to HW (and potentially completed) before we have
> > +      * released it. Since we may have already submitted following
> > +      * requests, we need to remove those as well.
> > +      */
> > +     GEM_BUG_ON(i915_request_completed(rq));
> > +     GEM_BUG_ON(i915_request_has_hold(rq));
> > +     GEM_BUG_ON(rq->engine != engine);
> > +     __execlists_hold(rq);
> > +
> > +     spin_unlock_irq(&engine->active.lock);
> > +}
> > +
> > +static bool hold_request(const struct i915_request *rq)
> > +{
> > +     struct i915_dependency *p;
> > +
> > +     /*
> > +      * If one of our ancestors is on hold, we must also be on hold,
> > +      * otherwise we will bypass it and execute before it.
> > +      */
> > +     list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
> > +             const struct i915_request *s =
> > +                     container_of(p->signaler, typeof(*s), sched);
> > +
> > +             if (s->engine != rq->engine)
> > +                     continue;
> > +
> > +             if (i915_request_has_hold(s))
> > +                     return true;
> > +     }
> > +
> > +     return false;
> > +}
> > +
> > +static void __execlists_unhold(struct i915_request *rq)
> > +{
> > +     LIST_HEAD(list);
> > +
> > +     do {
> > +             struct i915_dependency *p;
> > +
> > +             GEM_BUG_ON(!i915_request_has_hold(rq));
> > +             GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
> > +
> > +             i915_request_clear_hold(rq);
> > +             list_move_tail(&rq->sched.link,
> > +                            i915_sched_lookup_priolist(rq->engine,
> > +                                                       rq_prio(rq)));
> > +             set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> > +             RQ_TRACE(rq, "hold release\n");
> > +
> > +             /* Also release any children on this engine that are ready */
> > +             list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
> > +                     struct i915_request *w =
> > +                             container_of(p->waiter, typeof(*w), sched);
> > +
> > +                     if (w->engine != rq->engine)
> > +                             continue;
> > +
> > +                     if (!i915_request_has_hold(rq))
> > +                             continue;
> > +
> > +                     /* Check that no other parents are on hold */
> > +                     if (hold_request(rq))
> > +                             continue;
> 
> I had a question on this check. How can it be other parents on the same 
> engine on hold if there can be one engine reset at a time?

We hold onto the request for capture past the reset. So there could be
multiple capture workers in flight, if we have a flurry of clients
each triggering a GPU hang.

> Oh and also I was thinking would i915_request_has_hold be better called 
> i915_request_is_held? Or is_on_hold?

i915_request_on_hold() has been popping into my held as I read it. Fits
with the on_priority_queue() and I might do a i915_request_is_ready()
{ return !list_empty()) }. (I am formulating a plan to
s/active.requests/active.run/ and
s/i915_request_is_active/i915_request_on_runlist/)

Then is_active() could be return !list_empty().
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3)
  2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
                   ` (5 preceding siblings ...)
  2020-01-15 10:06 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork
@ 2020-01-15 14:37 ` Patchwork
  2020-01-15 14:37 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork
  2020-01-17 20:47 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  8 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2020-01-15 14:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3)
URL   : https://patchwork.freedesktop.org/series/72048/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7748 -> Patchwork_16108
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/index.html

Known issues
------------

  Here are the changes found in Patchwork_16108 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_module_load@reload-with-fault-injection:
    - fi-bxt-dsi:         [PASS][1] -> [DMESG-WARN][2] ([i915#889])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-bxt-dsi/igt@i915_module_load@reload-with-fault-injection.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-bxt-dsi/igt@i915_module_load@reload-with-fault-injection.html

  * igt@i915_pm_rpm@module-reload:
    - fi-kbl-guc:         [PASS][3] -> [SKIP][4] ([fdo#109271])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-guc/igt@i915_pm_rpm@module-reload.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-kbl-guc/igt@i915_pm_rpm@module-reload.html
    - fi-skl-6770hq:      [PASS][5] -> [DMESG-FAIL][6] ([i915#178] / [i915#889])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live_active:
    - fi-skl-6770hq:      [PASS][7] -> [DMESG-WARN][8] ([i915#889]) +22 similar issues
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6770hq/igt@i915_selftest@live_active.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-skl-6770hq/igt@i915_selftest@live_active.html

  * igt@i915_selftest@live_execlists:
    - fi-icl-y:           [PASS][9] -> [DMESG-FAIL][10] ([fdo#108569])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-y/igt@i915_selftest@live_execlists.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-icl-y/igt@i915_selftest@live_execlists.html

  * igt@i915_selftest@live_uncore:
    - fi-skl-6770hq:      [PASS][11] -> [DMESG-FAIL][12] ([i915#889]) +7 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6770hq/igt@i915_selftest@live_uncore.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-skl-6770hq/igt@i915_selftest@live_uncore.html

  
#### Possible fixes ####

  * igt@gem_exec_gttfill@basic:
    - fi-bsw-n3050:       [TIMEOUT][13] ([fdo#112271]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-bsw-n3050/igt@gem_exec_gttfill@basic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-bsw-n3050/igt@gem_exec_gttfill@basic.html

  * igt@i915_module_load@reload-with-fault-injection:
    - fi-skl-6700k2:      [INCOMPLETE][15] ([i915#671]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6700k2/igt@i915_module_load@reload-with-fault-injection.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-skl-6700k2/igt@i915_module_load@reload-with-fault-injection.html

  
#### Warnings ####

  * igt@i915_selftest@live_blt:
    - fi-hsw-4770:        [DMESG-FAIL][17] ([i915#563]) -> [DMESG-FAIL][18] ([i915#770])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-hsw-4770/igt@i915_selftest@live_blt.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-hsw-4770/igt@i915_selftest@live_blt.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-icl-u2:          [FAIL][19] ([i915#217]) -> [DMESG-WARN][20] ([IGT#4] / [i915#263])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [IGT#4]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/4
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271
  [i915#178]: https://gitlab.freedesktop.org/drm/intel/issues/178
  [i915#217]: https://gitlab.freedesktop.org/drm/intel/issues/217
  [i915#263]: https://gitlab.freedesktop.org/drm/intel/issues/263
  [i915#563]: https://gitlab.freedesktop.org/drm/intel/issues/563
  [i915#671]: https://gitlab.freedesktop.org/drm/intel/issues/671
  [i915#770]: https://gitlab.freedesktop.org/drm/intel/issues/770
  [i915#889]: https://gitlab.freedesktop.org/drm/intel/issues/889
  [i915#937]: https://gitlab.freedesktop.org/drm/intel/issues/937


Participating hosts (42 -> 44)
------------------------------

  Additional (8): fi-byt-j1900 fi-ivb-3770 fi-skl-lmem fi-blb-e6850 fi-byt-n2820 fi-bsw-nick fi-skl-6600u fi-snb-2600 
  Missing    (6): fi-hsw-4770r fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-gdg-551 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7748 -> Patchwork_16108

  CI-20190529: 20190529
  CI_DRM_7748: 1793de9a4215356790b87608fcfc9e99eeb6954d @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5365: e9ec0ed63b25c86861ffac3c8601cc4d1b910b65 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_16108: d72dea73722073cce4d092c81365a8609bf3f2a0 @ git://anongit.freedesktop.org/gfx-ci/linux


== Kernel 32bit build ==

Warning: Kernel 32bit buildtest failed:
https://intel-gfx-ci.01.org/Patchwork_16108/build_32bit.log

  CALL    scripts/checksyscalls.sh
  CALL    scripts/atomic/check-atomics.sh
  CHK     include/generated/compile.h
Kernel: arch/x86/boot/bzImage is ready  (#1)
  Building modules, stage 2.
  MODPOST 122 modules
ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
scripts/Makefile.modpost:93: recipe for target '__modpost' failed
make[1]: *** [__modpost] Error 1
Makefile:1282: recipe for target 'modules' failed
make: *** [modules] Error 2


== Linux commits ==

d72dea737220 drm/i915/execlists: Offline error capture
fdb604f0360e drm/i915/gt: Allow temporary suspension of inflight requests
ed160ebd901e drm/i915: Keep track of request among the scheduling lists

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BUILD: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3)
  2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
                   ` (6 preceding siblings ...)
  2020-01-15 14:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) Patchwork
@ 2020-01-15 14:37 ` Patchwork
  2020-01-17 20:47 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  8 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2020-01-15 14:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3)
URL   : https://patchwork.freedesktop.org/series/72048/
State : warning

== Summary ==

CALL    scripts/checksyscalls.sh
  CALL    scripts/atomic/check-atomics.sh
  CHK     include/generated/compile.h
Kernel: arch/x86/boot/bzImage is ready  (#1)
  Building modules, stage 2.
  MODPOST 122 modules
ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
scripts/Makefile.modpost:93: recipe for target '__modpost' failed
make[1]: *** [__modpost] Error 1
Makefile:1282: recipe for target 'modules' failed
make: *** [modules] Error 2

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/build_32bit.log
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915/gt: Allow temporary suspension of inflight requests
  2020-01-15 11:10   ` [Intel-gfx] [PATCH v3] " Chris Wilson
  2020-01-15 11:37     ` Tvrtko Ursulin
@ 2020-01-16 17:12     ` Tvrtko Ursulin
  1 sibling, 0 replies; 21+ messages in thread
From: Tvrtko Ursulin @ 2020-01-16 17:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2020 11:10, Chris Wilson wrote:
> In order to support out-of-line error capture, we need to remove the
> active request from HW and put it to one side while a worker compresses
> and stores all the details associated with that request. (As that
> compression may take an arbitrary user-controlled amount of time, we
> want to let the engine continue running on other workloads while the
> hanging request is dumped.) Not only do we need to remove the active
> request, but we also have to remove its context and all requests that
> were dependent on it (both in flight, queued and future submission).
> 
> Finally once the capture is complete, we need to be able to resubmit the
> request and its dependents and allow them to execute.
> 
> v2: Replace stack recursion with a simple list.
> v3: Check all the parents, not just the first, when searching for a
> stuck ancestor!
> 
> References: https://gitlab.freedesktop.org/drm/intel/issues/738
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    |   1 +
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |   1 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 160 ++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 103 ++++++++++++
>   drivers/gpu/drm/i915/i915_request.h          |  22 +++
>   5 files changed, 283 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index f451ef376548..c296aaf381e7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -671,6 +671,7 @@ void
>   intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
>   {
>   	INIT_LIST_HEAD(&engine->active.requests);
> +	INIT_LIST_HEAD(&engine->active.hold);
>   
>   	spin_lock_init(&engine->active.lock);
>   	lockdep_set_subclass(&engine->active.lock, subclass);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 00287515e7af..77e68c7643de 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -295,6 +295,7 @@ struct intel_engine_cs {
>   	struct {
>   		spinlock_t lock;
>   		struct list_head requests;
> +		struct list_head hold; /* ready requests, but on hold */
>   	} active;
>   
>   	struct llist_head barrier_tasks;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index f0cbd240a8c2..05a05ceeac6a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2353,6 +2353,146 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
>   	}
>   }
>   
> +static void __execlists_hold(struct i915_request *rq)
> +{
> +	LIST_HEAD(list);
> +
> +	do {
> +		struct i915_dependency *p;
> +
> +		if (i915_request_is_active(rq))
> +			__i915_request_unsubmit(rq);
> +
> +		RQ_TRACE(rq, "on hold\n");
> +		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
> +		i915_request_set_hold(rq);
> +
> +		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
> +			struct i915_request *w =
> +				container_of(p->waiter, typeof(*w), sched);
> +
> +			/* Leave semaphores spinning on the other engines */
> +			if (w->engine != rq->engine)
> +				continue;
> +
> +			if (list_empty(&w->sched.link))
> +				continue; /* Not yet submitted */
> +
> +			if (i915_request_completed(w))
> +				continue;
> +
> +			if (i915_request_has_hold(rq))
> +				continue;
> +
> +			list_move_tail(&w->sched.link, &list);
> +		}
> +
> +		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
> +	} while (rq);
> +}
> +
> +__maybe_unused
> +static void execlists_hold(struct intel_engine_cs *engine,
> +			   struct i915_request *rq)
> +{
> +	spin_lock_irq(&engine->active.lock);
> +
> +	/*
> +	 * Transfer this request onto the hold queue to prevent it
> +	 * being resumbitted to HW (and potentially completed) before we have
> +	 * released it. Since we may have already submitted following
> +	 * requests, we need to remove those as well.
> +	 */
> +	GEM_BUG_ON(i915_request_completed(rq));
> +	GEM_BUG_ON(i915_request_has_hold(rq));
> +	GEM_BUG_ON(rq->engine != engine);
> +	__execlists_hold(rq);
> +
> +	spin_unlock_irq(&engine->active.lock);
> +}
> +
> +static bool hold_request(const struct i915_request *rq)
> +{
> +	struct i915_dependency *p;
> +
> +	/*
> +	 * If one of our ancestors is on hold, we must also be on hold,
> +	 * otherwise we will bypass it and execute before it.
> +	 */
> +	list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
> +		const struct i915_request *s =
> +			container_of(p->signaler, typeof(*s), sched);
> +
> +		if (s->engine != rq->engine)
> +			continue;
> +
> +		if (i915_request_has_hold(s))
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +static void __execlists_unhold(struct i915_request *rq)
> +{
> +	LIST_HEAD(list);
> +
> +	do {
> +		struct i915_dependency *p;
> +
> +		GEM_BUG_ON(!i915_request_has_hold(rq));
> +		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
> +
> +		i915_request_clear_hold(rq);
> +		list_move_tail(&rq->sched.link,
> +			       i915_sched_lookup_priolist(rq->engine,
> +							  rq_prio(rq)));
> +		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +		RQ_TRACE(rq, "hold release\n");
> +
> +		/* Also release any children on this engine that are ready */
> +		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
> +			struct i915_request *w =
> +				container_of(p->waiter, typeof(*w), sched);
> +
> +			if (w->engine != rq->engine)
> +				continue;
> +
> +			if (!i915_request_has_hold(rq))
> +				continue;
> +
> +			/* Check that no other parents are on hold */
> +			if (hold_request(rq))
> +				continue;
> +
> +			list_move_tail(&w->sched.link, &list);
> +		}
> +
> +		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
> +	} while (rq);
> +}
> +
> +__maybe_unused
> +static void execlists_unhold(struct intel_engine_cs *engine,
> +			     struct i915_request *rq)
> +{
> +	spin_lock_irq(&engine->active.lock);
> +
> +	/*
> +	 * Move this request back to the priority queue, and all of its
> +	 * children and grandchildren that were suspended along with it.
> +	 */
> +	__execlists_unhold(rq);
> +
> +	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
> +		engine->execlists.queue_priority_hint = rq_prio(rq);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
> +	}
> +
> +	spin_unlock_irq(&engine->active.lock);
> +}
> +
>   static noinline void preempt_reset(struct intel_engine_cs *engine)
>   {
>   	const unsigned int bit = I915_RESET_ENGINE + engine->id;
> @@ -2465,6 +2605,13 @@ static void submit_queue(struct intel_engine_cs *engine,
>   	__submit_queue_imm(engine);
>   }
>   
> +static bool on_hold(const struct intel_engine_cs *engine,
> +		    const struct i915_request *rq)
> +{
> +	GEM_BUG_ON(i915_request_has_hold(rq));
> +	return !list_empty(&engine->active.hold) && hold_request(rq);
> +}
> +
>   static void execlists_submit_request(struct i915_request *request)
>   {
>   	struct intel_engine_cs *engine = request->engine;
> @@ -2473,12 +2620,17 @@ static void execlists_submit_request(struct i915_request *request)
>   	/* Will be called from irq-context when using foreign fences. */
>   	spin_lock_irqsave(&engine->active.lock, flags);
>   
> -	queue_request(engine, request);
> +	if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */
> +		list_add_tail(&request->sched.link, &engine->active.hold);
> +		i915_request_set_hold(request);
> +	} else {
> +		queue_request(engine, request);
>   
> -	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> -	GEM_BUG_ON(list_empty(&request->sched.link));
> +		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> +		GEM_BUG_ON(list_empty(&request->sched.link));
>   
> -	submit_queue(engine, request);
> +		submit_queue(engine, request);
> +	}
>   
>   	spin_unlock_irqrestore(&engine->active.lock, flags);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 15cda024e3e4..78501d79c0ea 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg)
>   	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
>   }
>   
> +static int live_hold_reset(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	struct igt_spinner spin;
> +	int err = 0;
> +
> +	/*
> +	 * In order to support offline error capture for fast preempt reset,
> +	 * we need to decouple the guilty request and ensure that it and its
> +	 * descendents are not executed while the capture is in progress.
> +	 */
> +
> +	if (!intel_has_reset_engine(gt))
> +		return 0;
> +
> +	if (igt_spinner_init(&spin, gt))
> +		return -ENOMEM;
> +
> +	for_each_engine(engine, gt, id) {
> +		struct intel_context *ce;
> +		unsigned long heartbeat;
> +		struct i915_request *rq;
> +
> +		ce = intel_context_create(engine);
> +		if (IS_ERR(ce)) {
> +			err = PTR_ERR(ce);
> +			break;
> +		}
> +
> +		engine_heartbeat_disable(engine, &heartbeat);
> +
> +		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			goto out;
> +		}
> +		i915_request_add(rq);
> +
> +		if (!igt_wait_for_spinner(&spin, rq)) {
> +			intel_gt_set_wedged(gt);
> +			err = -ETIME;
> +			goto out;
> +		}
> +
> +		/* We have our request executing, now remove it and reset */
> +
> +		if (test_and_set_bit(I915_RESET_ENGINE + id,
> +				     &gt->reset.flags)) {
> +			spin_unlock_irq(&engine->active.lock);
> +			intel_gt_set_wedged(gt);
> +			err = -EBUSY;
> +			goto out;
> +		}
> +		tasklet_disable(&engine->execlists.tasklet);
> +
> +		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
> +		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
> +
> +		execlists_hold(engine, rq);
> +		GEM_BUG_ON(!i915_request_has_hold(rq));
> +
> +		intel_engine_reset(engine, NULL);
> +		GEM_BUG_ON(rq->fence.error != -EIO);
> +
> +		tasklet_enable(&engine->execlists.tasklet);
> +		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
> +				      &gt->reset.flags);
> +
> +		/* Check that we do not resubmit the held request */
> +		i915_request_get(rq);
> +		if (!i915_request_wait(rq, 0, HZ / 5)) {
> +			pr_err("%s: on hold request completed!\n",
> +			       engine->name);
> +			i915_request_put(rq);
> +			err = -EIO;
> +			goto out;
> +		}
> +		GEM_BUG_ON(!i915_request_has_hold(rq));
> +
> +		/* But is resubmitted on release */
> +		execlists_unhold(engine, rq);
> +		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
> +			pr_err("%s: held request did not complete!\n",
> +			       engine->name);
> +			intel_gt_set_wedged(gt);
> +			err = -ETIME;
> +		}
> +		i915_request_put(rq);
> +
> +out:
> +		engine_heartbeat_enable(engine, heartbeat);
> +		intel_context_put(ce);
> +		if (err)
> +			break;
> +	}
> +
> +	igt_spinner_fini(&spin);
> +	return err;
> +}
> +
>   static int
>   emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
>   {
> @@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_sanitycheck),
>   		SUBTEST(live_unlite_switch),
>   		SUBTEST(live_unlite_preempt),
> +		SUBTEST(live_hold_reset),
>   		SUBTEST(live_timeslice_preempt),
>   		SUBTEST(live_timeslice_queue),
>   		SUBTEST(live_busywait_preempt),
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a9f0d3c8d8b7..47fa5419c74f 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -90,6 +90,13 @@ enum {
>   	 */
>   	I915_FENCE_FLAG_SIGNAL,
>   
> +	/*
> +	 * I915_FENCE_FLAG_HOLD - this request is currently on hold
> +	 *
> +	 * This request has been suspended, pending an ongoing investigation.
> +	 */
> +	I915_FENCE_FLAG_HOLD,
> +
>   	/*
>   	 * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
>   	 *
> @@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq)
>   	return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
>   }
>   
> +static inline bool i915_request_has_hold(const struct i915_request *rq)
> +{
> +	return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
> +}
> +
> +static inline void i915_request_set_hold(struct i915_request *rq)
> +{
> +	set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
> +}
> +
> +static inline void i915_request_clear_hold(struct i915_request *rq)
> +{
> +	clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
> +}
> +
>   static inline struct intel_timeline *
>   i915_request_timeline(struct i915_request *rq)
>   {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture
  2020-01-15  8:33 ` [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture Chris Wilson
@ 2020-01-16 17:22   ` Tvrtko Ursulin
  2020-01-16 17:48     ` Chris Wilson
  0 siblings, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2020-01-16 17:22 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2020 08:33, Chris Wilson wrote:
> Currently, we skip error capture upon forced preemption. We apply forced
> preemption when there is a higher priority request that should be
> running but is being blocked, and we skip inline error capture so that
> the preemption request is not further delayed by a user controlled
> capture -- extending the denial of service.
> 
> However, preemption reset is also used for heartbeats and regular GPU
> hangs. By skipping the error capture, we remove the ability to debug GPU
> hangs.
> 
> In order to capture the error without delaying the preemption request
> further, we can do an out-of-line capture by removing the guilty request
> from the execution queue and scheduling a work to dump that request.
> When removing a request, we need to remove the entire context and all
> descendants from the execution queue, so that they do not jump past.
> 
> Closes: https://gitlab.freedesktop.org/drm/intel/issues/738
> Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 120 +++++++++++++++++++++++++++-
>   1 file changed, 118 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 43c19dc9c0c7..a84477df32bd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2392,7 +2392,6 @@ static void __execlists_hold(struct i915_request *rq)
>   	} while(rq);
>   }
>   
> -__maybe_unused
>   static void execlists_hold(struct intel_engine_cs *engine,
>   			   struct i915_request *rq)
>   {
> @@ -2472,7 +2471,6 @@ static void __execlists_unhold(struct i915_request *rq)
>   	} while(rq);
>   }
>   
> -__maybe_unused
>   static void execlists_unhold(struct intel_engine_cs *engine,
>   			     struct i915_request *rq)
>   {
> @@ -2492,6 +2490,121 @@ static void execlists_unhold(struct intel_engine_cs *engine,
>   	spin_unlock_irq(&engine->active.lock);
>   }
>   
> +struct execlists_capture {
> +	struct work_struct work;
> +	struct i915_request *rq;
> +	struct i915_gpu_coredump *error;
> +};
> +
> +static void execlists_capture_work(struct work_struct *work)
> +{
> +	struct execlists_capture *cap = container_of(work, typeof(*cap), work);
> +	const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
> +	struct intel_engine_cs *engine = cap->rq->engine;
> +	struct intel_gt_coredump *gt = cap->error->gt;
> +	struct intel_engine_capture_vma *vma;
> +
> +	/* Compress all the objects attached to the request, slow! */
> +	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
> +	if (vma) {
> +		struct i915_vma_compress *compress =
> +			i915_vma_capture_prepare(gt);
> +
> +		intel_engine_coredump_add_vma(gt->engine, vma, compress);
> +		i915_vma_capture_finish(gt, compress);
> +	}
> +
> +	gt->simulated = gt->engine->simulated;
> +	cap->error->simulated = gt->simulated;
> +
> +	/* Publish the error state, and announce it to the world */
> +	i915_error_state_store(cap->error);
> +	i915_gpu_coredump_put(cap->error);
> +
> +	/* Return this request and all that depend upon it for signaling */
> +	execlists_unhold(engine, cap->rq);
> +
> +	kfree(cap);
> +}
> +
> +static struct i915_gpu_coredump *capture_regs(struct intel_engine_cs *engine)
> +{
> +	const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
> +	struct i915_gpu_coredump *e;
> +
> +	e = i915_gpu_coredump_alloc(engine->i915, gfp);
> +	if (!e)
> +		return NULL;
> +
> +	e->gt = intel_gt_coredump_alloc(engine->gt, gfp);
> +	if (!e->gt)
> +		goto err;
> +
> +	e->gt->engine = intel_engine_coredump_alloc(engine, gfp);
> +	if (!e->gt->engine)
> +		goto err_gt;
> +
> +	return e;
> +
> +err_gt:
> +	kfree(e->gt);
> +err:
> +	kfree(e);
> +	return NULL;
> +}
> +
> +static void execlists_capture(struct intel_engine_cs *engine)
> +{
> +	struct execlists_capture *cap;
> +
> +	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
> +		return;
> +
> +	cap = kmalloc(sizeof(*cap), GFP_ATOMIC);
> +	if (!cap)
> +		return;
> +
> +	cap->rq = execlists_active(&engine->execlists);
> +	GEM_BUG_ON(!cap->rq);
> +
> +	cap->rq = active_request(cap->rq->context->timeline, cap->rq);

Old code, but why is active_request taking the timeline as a separate 
param when it always seems to be rq->context->timeline?

> +
> +	/*
> +	 * We need to _quickly_ capture the engine state before we reset.
> +	 * We are inside an atomic section (softirq) here and we are delaying
> +	 * the forced preemption event.
> +	 */
> +	cap->error = capture_regs(engine);
> +	if (!cap->error)
> +		goto err_free;
> +
> +	if (i915_request_completed(cap->rq)) /* oops, not so guilty! */
> +		goto err_store;

Should this be a bug on? Doesn't look active_request() can return a 
non-completed request. Hm I guess we can make a wrong decision to reset 
the engine.

But in any case, if request has completed in the meantime, why go to 
i915_error_state_store which will log a hang in dmesg?

> +
> +	/*
> +	 * Remove the request from the execlists queue, and take ownership
> +	 * of the request. We pass it to our worker who will _slowly_ compress
> +	 * all the pages the _user_ requested for debugging their batch, after
> +	 * which we return it to the queue for signaling.
> +	 *
> +	 * By removing them from the execlists queue, we also remove the
> +	 * requests from being processed by __unwind_incomplete_requests()
> +	 * during the intel_engine_reset(), and so they will *not* be replayed
> +	 * afterwards.
> +	 */
> +	execlists_hold(engine, cap->rq);
> +
> +	INIT_WORK(&cap->work, execlists_capture_work);
> +	schedule_work(&cap->work);
> +	return;
> +
> +err_store:
> +	i915_error_state_store(cap->error);
> +	i915_gpu_coredump_put(cap->error);
> +err_free:
> +	kfree(cap);
> +}
> +
>   static noinline void preempt_reset(struct intel_engine_cs *engine)
>   {
>   	const unsigned int bit = I915_RESET_ENGINE + engine->id;
> @@ -2509,6 +2622,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
>   	ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
>   		     READ_ONCE(engine->props.preempt_timeout_ms),
>   		     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
> +
> +	ring_set_paused(engine, 1); /* Freeze the request in place */

Who unsets this flags?

> +	execlists_capture(engine);
>   	intel_engine_reset(engine, "preemption time out");
>   
>   	tasklet_enable(&engine->execlists.tasklet);
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists
  2020-01-15  9:02 ` [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists Chris Wilson
@ 2020-01-16 17:23   ` Tvrtko Ursulin
  0 siblings, 0 replies; 21+ messages in thread
From: Tvrtko Ursulin @ 2020-01-16 17:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2020 09:02, Chris Wilson wrote:
> If we keep track of when the i915_request.sched.link is on the HW
> runlist, or in the priority queue we can simplify our interactions with
> the request (such as during rescheduling). This also simplifies the next
> patch where we introduce a new in-between list, for requests that are
> ready but neither on the run list or in the queue.
> 
> v2: Update i915_sched_node.link explanation for current usage where it
> is a link on both the queue and on the runlists.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c   | 13 ++++++++-----
>   drivers/gpu/drm/i915/i915_request.c   |  4 +++-
>   drivers/gpu/drm/i915/i915_request.h   | 17 +++++++++++++++++
>   drivers/gpu/drm/i915/i915_scheduler.c | 22 ++++++++++------------
>   4 files changed, 38 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 9e430590fb3a..f0cbd240a8c2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -985,6 +985,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
>   			list_move(&rq->sched.link, pl);
> +			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +
>   			active = rq;
>   		} else {
>   			struct intel_engine_cs *owner = rq->context->engine;
> @@ -2430,11 +2432,12 @@ static void execlists_preempt(struct timer_list *timer)
>   }
>   
>   static void queue_request(struct intel_engine_cs *engine,
> -			  struct i915_sched_node *node,
> -			  int prio)
> +			  struct i915_request *rq)
>   {
> -	GEM_BUG_ON(!list_empty(&node->link));
> -	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
> +	GEM_BUG_ON(!list_empty(&rq->sched.link));
> +	list_add_tail(&rq->sched.link,
> +		      i915_sched_lookup_priolist(engine, rq_prio(rq)));
> +	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>   }
>   
>   static void __submit_queue_imm(struct intel_engine_cs *engine)
> @@ -2470,7 +2473,7 @@ static void execlists_submit_request(struct i915_request *request)
>   	/* Will be called from irq-context when using foreign fences. */
>   	spin_lock_irqsave(&engine->active.lock, flags);
>   
> -	queue_request(engine, &request->sched, rq_prio(request));
> +	queue_request(engine, request);
>   
>   	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   	GEM_BUG_ON(list_empty(&request->sched.link));
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index be185886e4fc..9ed0d3bc7249 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -408,8 +408,10 @@ bool __i915_request_submit(struct i915_request *request)
>   xfer:	/* We may be recursing from the signal callback of another i915 fence */
>   	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>   
> -	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
> +	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
>   		list_move_tail(&request->sched.link, &engine->active.requests);
> +		clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
> +	}
>   
>   	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
>   	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 031433691a06..a9f0d3c8d8b7 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -70,6 +70,18 @@ enum {
>   	 */
>   	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
>   
> +	/*
> +	 * I915_FENCE_FLAG_PQUEUE - this request is ready for execution
> +	 *
> +	 * Using the scheduler, when a request is ready for execution it is put
> +	 * into the priority queue, and removed from the queue when transferred
> +	 * to the HW runlists. We want to track its membership within that
> +	 * queue so that we can easily check before rescheduling.
> +	 *
> +	 * See i915_request_in_priority_queue()
> +	 */
> +	I915_FENCE_FLAG_PQUEUE,
> +
>   	/*
>   	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
>   	 *
> @@ -361,6 +373,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq)
>   	return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
>   }
>   
> +static inline bool i915_request_in_priority_queue(const struct i915_request *rq)
> +{
> +	return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +}
> +
>   /**
>    * Returns true if seq1 is later than seq2.
>    */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index bf87c70bfdd9..db3da81b7f05 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -326,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node,
>   
>   		node->attr.priority = prio;
>   
> -		if (list_empty(&node->link)) {
> -			/*
> -			 * If the request is not in the priolist queue because
> -			 * it is not yet runnable, then it doesn't contribute
> -			 * to our preemption decisions. On the other hand,
> -			 * if the request is on the HW, it too is not in the
> -			 * queue; but in that case we may still need to reorder
> -			 * the inflight requests.
> -			 */
> +		/*
> +		 * Once the request is ready, it will be place into the
> +		 * priority lists and then onto the HW runlist. Before the
> +		 * request is ready, it does not contribute to our preemption
> +		 * decisions and we can safely ignore it, as it will, and
> +		 * any preemption required, be dealt with upon submission.
> +		 * See engine->submit_request()
> +		 */
> +		if (list_empty(&node->link))
>   			continue;
> -		}
>   
> -		if (!intel_engine_is_virtual(engine) &&
> -		    !i915_request_is_active(node_to_request(node))) {
> +		if (i915_request_in_priority_queue(node_to_request(node))) {
>   			if (!cache.priolist)
>   				cache.priolist =
>   					i915_sched_lookup_priolist(engine,
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture
  2020-01-16 17:22   ` Tvrtko Ursulin
@ 2020-01-16 17:48     ` Chris Wilson
  2020-01-16 18:14       ` Tvrtko Ursulin
  0 siblings, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2020-01-16 17:48 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-01-16 17:22:10)
> 
> On 15/01/2020 08:33, Chris Wilson wrote:
> > Currently, we skip error capture upon forced preemption. We apply forced
> > preemption when there is a higher priority request that should be
> > running but is being blocked, and we skip inline error capture so that
> > the preemption request is not further delayed by a user controlled
> > capture -- extending the denial of service.
> > 
> > However, preemption reset is also used for heartbeats and regular GPU
> > hangs. By skipping the error capture, we remove the ability to debug GPU
> > hangs.
> > 
> > In order to capture the error without delaying the preemption request
> > further, we can do an out-of-line capture by removing the guilty request
> > from the execution queue and scheduling a work to dump that request.
> > When removing a request, we need to remove the entire context and all
> > descendants from the execution queue, so that they do not jump past.
> > 
> > Closes: https://gitlab.freedesktop.org/drm/intel/issues/738
> > Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c | 120 +++++++++++++++++++++++++++-
> >   1 file changed, 118 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 43c19dc9c0c7..a84477df32bd 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -2392,7 +2392,6 @@ static void __execlists_hold(struct i915_request *rq)
> >       } while(rq);
> >   }
> >   
> > -__maybe_unused
> >   static void execlists_hold(struct intel_engine_cs *engine,
> >                          struct i915_request *rq)
> >   {
> > @@ -2472,7 +2471,6 @@ static void __execlists_unhold(struct i915_request *rq)
> >       } while(rq);
> >   }
> >   
> > -__maybe_unused
> >   static void execlists_unhold(struct intel_engine_cs *engine,
> >                            struct i915_request *rq)
> >   {
> > @@ -2492,6 +2490,121 @@ static void execlists_unhold(struct intel_engine_cs *engine,
> >       spin_unlock_irq(&engine->active.lock);
> >   }
> >   
> > +struct execlists_capture {
> > +     struct work_struct work;
> > +     struct i915_request *rq;
> > +     struct i915_gpu_coredump *error;
> > +};
> > +
> > +static void execlists_capture_work(struct work_struct *work)
> > +{
> > +     struct execlists_capture *cap = container_of(work, typeof(*cap), work);
> > +     const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
> > +     struct intel_engine_cs *engine = cap->rq->engine;
> > +     struct intel_gt_coredump *gt = cap->error->gt;
> > +     struct intel_engine_capture_vma *vma;
> > +
> > +     /* Compress all the objects attached to the request, slow! */
> > +     vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
> > +     if (vma) {
> > +             struct i915_vma_compress *compress =
> > +                     i915_vma_capture_prepare(gt);
> > +
> > +             intel_engine_coredump_add_vma(gt->engine, vma, compress);
> > +             i915_vma_capture_finish(gt, compress);
> > +     }
> > +
> > +     gt->simulated = gt->engine->simulated;
> > +     cap->error->simulated = gt->simulated;
> > +
> > +     /* Publish the error state, and announce it to the world */
> > +     i915_error_state_store(cap->error);
> > +     i915_gpu_coredump_put(cap->error);
> > +
> > +     /* Return this request and all that depend upon it for signaling */
> > +     execlists_unhold(engine, cap->rq);
> > +
> > +     kfree(cap);
> > +}
> > +
> > +static struct i915_gpu_coredump *capture_regs(struct intel_engine_cs *engine)
> > +{
> > +     const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
> > +     struct i915_gpu_coredump *e;
> > +
> > +     e = i915_gpu_coredump_alloc(engine->i915, gfp);
> > +     if (!e)
> > +             return NULL;
> > +
> > +     e->gt = intel_gt_coredump_alloc(engine->gt, gfp);
> > +     if (!e->gt)
> > +             goto err;
> > +
> > +     e->gt->engine = intel_engine_coredump_alloc(engine, gfp);
> > +     if (!e->gt->engine)
> > +             goto err_gt;
> > +
> > +     return e;
> > +
> > +err_gt:
> > +     kfree(e->gt);
> > +err:
> > +     kfree(e);
> > +     return NULL;
> > +}
> > +
> > +static void execlists_capture(struct intel_engine_cs *engine)
> > +{
> > +     struct execlists_capture *cap;
> > +
> > +     if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
> > +             return;
> > +
> > +     cap = kmalloc(sizeof(*cap), GFP_ATOMIC);
> > +     if (!cap)
> > +             return;
> > +
> > +     cap->rq = execlists_active(&engine->execlists);
> > +     GEM_BUG_ON(!cap->rq);
> > +
> > +     cap->rq = active_request(cap->rq->context->timeline, cap->rq);
> 
> Old code, but why is active_request taking the timeline as a separate 
> param when it always seems to be rq->context->timeline?

It grew out of walking along the engine without a request. Old habits.

> > +     /*
> > +      * We need to _quickly_ capture the engine state before we reset.
> > +      * We are inside an atomic section (softirq) here and we are delaying
> > +      * the forced preemption event.
> > +      */
> > +     cap->error = capture_regs(engine);
> > +     if (!cap->error)
> > +             goto err_free;
> > +
> > +     if (i915_request_completed(cap->rq)) /* oops, not so guilty! */
> > +             goto err_store;
> 
> Should this be a bug on? Doesn't look active_request() can return a 
> non-completed request. Hm I guess we can make a wrong decision to reset 
> the engine.

Aye. Until we actually invoke the reset, the engine is still active and
so may have advanced. We call ring_set_paused() so it doesn't get too
far ahead, but that still lets the breadcrumb tick over, so it is still
possible for the active_request() to complete (but no more). 
 
> But in any case, if request has completed in the meantime, why go to 
> i915_error_state_store which will log a hang in dmesg?

Because we are about to call intel_reset_engine(), so want some debug
clue as to why we got into a situation where we invoked the forced
preemption. I thought it might be useful to see the engine state, and to
drop the "oops, please file a bug request" because of the reset.

> > +     /*
> > +      * Remove the request from the execlists queue, and take ownership
> > +      * of the request. We pass it to our worker who will _slowly_ compress
> > +      * all the pages the _user_ requested for debugging their batch, after
> > +      * which we return it to the queue for signaling.
> > +      *
> > +      * By removing them from the execlists queue, we also remove the
> > +      * requests from being processed by __unwind_incomplete_requests()
> > +      * during the intel_engine_reset(), and so they will *not* be replayed
> > +      * afterwards.
> > +      */
> > +     execlists_hold(engine, cap->rq);
> > +
> > +     INIT_WORK(&cap->work, execlists_capture_work);
> > +     schedule_work(&cap->work);
> > +     return;
> > +
> > +err_store:
> > +     i915_error_state_store(cap->error);
> > +     i915_gpu_coredump_put(cap->error);
> > +err_free:
> > +     kfree(cap);
> > +}
> > +
> >   static noinline void preempt_reset(struct intel_engine_cs *engine)
> >   {
> >       const unsigned int bit = I915_RESET_ENGINE + engine->id;
> > @@ -2509,6 +2622,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
> >       ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
> >                    READ_ONCE(engine->props.preempt_timeout_ms),
> >                    jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
> > +
> > +     ring_set_paused(engine, 1); /* Freeze the request in place */
> 
> Who unsets this flags?

Reset -> reset_csb_pointers -> ring_set_paused(0).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture
  2020-01-16 17:48     ` Chris Wilson
@ 2020-01-16 18:14       ` Tvrtko Ursulin
  2020-01-16 18:32         ` Chris Wilson
  0 siblings, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2020-01-16 18:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/01/2020 17:48, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-01-16 17:22:10)
>>
>> On 15/01/2020 08:33, Chris Wilson wrote:
>>> Currently, we skip error capture upon forced preemption. We apply forced
>>> preemption when there is a higher priority request that should be
>>> running but is being blocked, and we skip inline error capture so that
>>> the preemption request is not further delayed by a user controlled
>>> capture -- extending the denial of service.
>>>
>>> However, preemption reset is also used for heartbeats and regular GPU
>>> hangs. By skipping the error capture, we remove the ability to debug GPU
>>> hangs.
>>>
>>> In order to capture the error without delaying the preemption request
>>> further, we can do an out-of-line capture by removing the guilty request
>>> from the execution queue and scheduling a work to dump that request.
>>> When removing a request, we need to remove the entire context and all
>>> descendants from the execution queue, so that they do not jump past.
>>>
>>> Closes: https://gitlab.freedesktop.org/drm/intel/issues/738
>>> Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption")
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gt/intel_lrc.c | 120 +++++++++++++++++++++++++++-
>>>    1 file changed, 118 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> index 43c19dc9c0c7..a84477df32bd 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> @@ -2392,7 +2392,6 @@ static void __execlists_hold(struct i915_request *rq)
>>>        } while(rq);
>>>    }
>>>    
>>> -__maybe_unused
>>>    static void execlists_hold(struct intel_engine_cs *engine,
>>>                           struct i915_request *rq)
>>>    {
>>> @@ -2472,7 +2471,6 @@ static void __execlists_unhold(struct i915_request *rq)
>>>        } while(rq);
>>>    }
>>>    
>>> -__maybe_unused
>>>    static void execlists_unhold(struct intel_engine_cs *engine,
>>>                             struct i915_request *rq)
>>>    {
>>> @@ -2492,6 +2490,121 @@ static void execlists_unhold(struct intel_engine_cs *engine,
>>>        spin_unlock_irq(&engine->active.lock);
>>>    }
>>>    
>>> +struct execlists_capture {
>>> +     struct work_struct work;
>>> +     struct i915_request *rq;
>>> +     struct i915_gpu_coredump *error;
>>> +};
>>> +
>>> +static void execlists_capture_work(struct work_struct *work)
>>> +{
>>> +     struct execlists_capture *cap = container_of(work, typeof(*cap), work);
>>> +     const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
>>> +     struct intel_engine_cs *engine = cap->rq->engine;
>>> +     struct intel_gt_coredump *gt = cap->error->gt;
>>> +     struct intel_engine_capture_vma *vma;
>>> +
>>> +     /* Compress all the objects attached to the request, slow! */
>>> +     vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
>>> +     if (vma) {
>>> +             struct i915_vma_compress *compress =
>>> +                     i915_vma_capture_prepare(gt);
>>> +
>>> +             intel_engine_coredump_add_vma(gt->engine, vma, compress);
>>> +             i915_vma_capture_finish(gt, compress);
>>> +     }
>>> +
>>> +     gt->simulated = gt->engine->simulated;
>>> +     cap->error->simulated = gt->simulated;
>>> +
>>> +     /* Publish the error state, and announce it to the world */
>>> +     i915_error_state_store(cap->error);
>>> +     i915_gpu_coredump_put(cap->error);
>>> +
>>> +     /* Return this request and all that depend upon it for signaling */
>>> +     execlists_unhold(engine, cap->rq);
>>> +
>>> +     kfree(cap);
>>> +}
>>> +
>>> +static struct i915_gpu_coredump *capture_regs(struct intel_engine_cs *engine)
>>> +{
>>> +     const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
>>> +     struct i915_gpu_coredump *e;
>>> +
>>> +     e = i915_gpu_coredump_alloc(engine->i915, gfp);
>>> +     if (!e)
>>> +             return NULL;
>>> +
>>> +     e->gt = intel_gt_coredump_alloc(engine->gt, gfp);
>>> +     if (!e->gt)
>>> +             goto err;
>>> +
>>> +     e->gt->engine = intel_engine_coredump_alloc(engine, gfp);
>>> +     if (!e->gt->engine)
>>> +             goto err_gt;
>>> +
>>> +     return e;
>>> +
>>> +err_gt:
>>> +     kfree(e->gt);
>>> +err:
>>> +     kfree(e);
>>> +     return NULL;
>>> +}
>>> +
>>> +static void execlists_capture(struct intel_engine_cs *engine)
>>> +{
>>> +     struct execlists_capture *cap;
>>> +
>>> +     if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
>>> +             return;
>>> +
>>> +     cap = kmalloc(sizeof(*cap), GFP_ATOMIC);
>>> +     if (!cap)
>>> +             return;
>>> +
>>> +     cap->rq = execlists_active(&engine->execlists);
>>> +     GEM_BUG_ON(!cap->rq);
>>> +
>>> +     cap->rq = active_request(cap->rq->context->timeline, cap->rq);
>>
>> Old code, but why is active_request taking the timeline as a separate
>> param when it always seems to be rq->context->timeline?
> 
> It grew out of walking along the engine without a request. Old habits.
> 
>>> +     /*
>>> +      * We need to _quickly_ capture the engine state before we reset.
>>> +      * We are inside an atomic section (softirq) here and we are delaying
>>> +      * the forced preemption event.
>>> +      */
>>> +     cap->error = capture_regs(engine);
>>> +     if (!cap->error)
>>> +             goto err_free;
>>> +
>>> +     if (i915_request_completed(cap->rq)) /* oops, not so guilty! */
>>> +             goto err_store;
>>
>> Should this be a bug on? Doesn't look active_request() can return a
>> non-completed request. Hm I guess we can make a wrong decision to reset
>> the engine.
> 
> Aye. Until we actually invoke the reset, the engine is still active and
> so may have advanced. We call ring_set_paused() so it doesn't get too
> far ahead, but that still lets the breadcrumb tick over, so it is still
> possible for the active_request() to complete (but no more).

...

>> But in any case, if request has completed in the meantime, why go to
>> i915_error_state_store which will log a hang in dmesg?
> 
> Because we are about to call intel_reset_engine(), so want some debug
> clue as to why we got into a situation where we invoked the forced
> preemption. I thought it might be useful to see the engine state, and to
> drop the "oops, please file a bug request" because of the reset.

... so we could still decide to bail out if request completed in the 
meantime and give up on the whole reset business. Why not if not? I 
guess it is of little practical difference, micro-second here or there 
before a potential false positive.

>>> +     /*
>>> +      * Remove the request from the execlists queue, and take ownership
>>> +      * of the request. We pass it to our worker who will _slowly_ compress
>>> +      * all the pages the _user_ requested for debugging their batch, after
>>> +      * which we return it to the queue for signaling.
>>> +      *
>>> +      * By removing them from the execlists queue, we also remove the
>>> +      * requests from being processed by __unwind_incomplete_requests()
>>> +      * during the intel_engine_reset(), and so they will *not* be replayed
>>> +      * afterwards.
>>> +      */
>>> +     execlists_hold(engine, cap->rq);
>>> +
>>> +     INIT_WORK(&cap->work, execlists_capture_work);
>>> +     schedule_work(&cap->work);
>>> +     return;
>>> +
>>> +err_store:
>>> +     i915_error_state_store(cap->error);
>>> +     i915_gpu_coredump_put(cap->error);
>>> +err_free:
>>> +     kfree(cap);
>>> +}
>>> +
>>>    static noinline void preempt_reset(struct intel_engine_cs *engine)
>>>    {
>>>        const unsigned int bit = I915_RESET_ENGINE + engine->id;
>>> @@ -2509,6 +2622,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
>>>        ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
>>>                     READ_ONCE(engine->props.preempt_timeout_ms),
>>>                     jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
>>> +
>>> +     ring_set_paused(engine, 1); /* Freeze the request in place */
>>
>> Who unsets this flags?
> 
> Reset -> reset_csb_pointers -> ring_set_paused(0).

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture
  2020-01-16 18:14       ` Tvrtko Ursulin
@ 2020-01-16 18:32         ` Chris Wilson
  0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2020-01-16 18:32 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-01-16 18:14:24)
> 
> On 16/01/2020 17:48, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-01-16 17:22:10)
> >>
> >> On 15/01/2020 08:33, Chris Wilson wrote:
> >>> +     /*
> >>> +      * We need to _quickly_ capture the engine state before we reset.
> >>> +      * We are inside an atomic section (softirq) here and we are delaying
> >>> +      * the forced preemption event.
> >>> +      */
> >>> +     cap->error = capture_regs(engine);
> >>> +     if (!cap->error)
> >>> +             goto err_free;
> >>> +
> >>> +     if (i915_request_completed(cap->rq)) /* oops, not so guilty! */
> >>> +             goto err_store;
> >>
> >> Should this be a bug on? Doesn't look active_request() can return a
> >> non-completed request. Hm I guess we can make a wrong decision to reset
> >> the engine.
> > 
> > Aye. Until we actually invoke the reset, the engine is still active and
> > so may have advanced. We call ring_set_paused() so it doesn't get too
> > far ahead, but that still lets the breadcrumb tick over, so it is still
> > possible for the active_request() to complete (but no more).
> 
> ...
> 
> >> But in any case, if request has completed in the meantime, why go to
> >> i915_error_state_store which will log a hang in dmesg?
> > 
> > Because we are about to call intel_reset_engine(), so want some debug
> > clue as to why we got into a situation where we invoked the forced
> > preemption. I thought it might be useful to see the engine state, and to
> > drop the "oops, please file a bug request" because of the reset.
> 
> ... so we could still decide to bail out if request completed in the 
> meantime and give up on the whole reset business. Why not if not? I 
> guess it is of little practical difference, micro-second here or there 
> before a potential false positive.

(When I first added the check here, it was following a hacky
__intel_gt_reset() to ensure the engine had stopped, so I needed to
always do a real reset to cleanup the mess.)

Hmm. I was about to say "but the preemption window expired and we need
to reset". However, if we have completed this request and having done
since our earlier inspection, it must also hit an arbitration point
where the preemption will take place.

So yes, we can bail out here quietly if we find ourselves with a
completed request at the last moment.

For simplicity, I'm just going to ignore the troublemaker and put it on
the hold list.

         * Note that because we have not yet reset the engine at this point,
         * it is possible for the request that we have identified as being
         * guilty, did in fact complete and we will then hit an arbitration
         * point allowing the preemption to succeed. The likelihood of that
         * is very low (as the capturing of the engine registers should be
         * fast enough to run inside an irq-off atomic section!), so we will
         * simply hold that request accountable for being non-preemptible
         * long enough to force the reset.

We will then skip the completed request when it comes time to dequeue.
Business as usual in the land of preempt-to-busy.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3)
  2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
                   ` (7 preceding siblings ...)
  2020-01-15 14:37 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork
@ 2020-01-17 20:47 ` Patchwork
  8 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2020-01-17 20:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3)
URL   : https://patchwork.freedesktop.org/series/72048/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_7748_full -> Patchwork_16108_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_16108_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_16108_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_16108_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_exec_async@concurrent-writes-bsd1:
    - shard-tglb:         [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb1/igt@gem_exec_async@concurrent-writes-bsd1.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb4/igt@gem_exec_async@concurrent-writes-bsd1.html

  * igt@gem_exec_async@concurrent-writes-bsd2:
    - shard-tglb:         NOTRUN -> [FAIL][3]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb8/igt@gem_exec_async@concurrent-writes-bsd2.html

  * igt@runner@aborted:
    - shard-kbl:          NOTRUN -> ([FAIL][4], [FAIL][5], [FAIL][6]) ([i915#841])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl3/igt@runner@aborted.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@runner@aborted.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@runner@aborted.html

  
#### Warnings ####

  * igt@runner@aborted:
    - shard-apl:          [FAIL][7] ([i915#667]) -> ([FAIL][8], [FAIL][9])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl6/igt@runner@aborted.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl8/igt@runner@aborted.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl7/igt@runner@aborted.html

  
Known issues
------------

  Here are the changes found in Patchwork_16108_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_persistence@processes:
    - shard-glk:          [PASS][10] -> [FAIL][11] ([i915#570])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk1/igt@gem_ctx_persistence@processes.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk5/igt@gem_ctx_persistence@processes.html

  * igt@gem_ctx_persistence@vcs0-mixed-process:
    - shard-glk:          [PASS][12] -> [FAIL][13] ([i915#679])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk1/igt@gem_ctx_persistence@vcs0-mixed-process.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk8/igt@gem_ctx_persistence@vcs0-mixed-process.html

  * igt@gem_ctx_persistence@vcs1-persistence:
    - shard-iclb:         [PASS][14] -> [SKIP][15] ([fdo#109276] / [fdo#112080]) +1 similar issue
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb4/igt@gem_ctx_persistence@vcs1-persistence.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb6/igt@gem_ctx_persistence@vcs1-persistence.html

  * igt@gem_eio@in-flight-suspend:
    - shard-skl:          [PASS][16] -> [INCOMPLETE][17] ([i915#69])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl9/igt@gem_eio@in-flight-suspend.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl5/igt@gem_eio@in-flight-suspend.html

  * igt@gem_eio@kms:
    - shard-snb:          [PASS][18] -> [INCOMPLETE][19] ([i915#82])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-snb2/igt@gem_eio@kms.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-snb5/igt@gem_eio@kms.html

  * igt@gem_exec_balancer@hang:
    - shard-kbl:          [PASS][20] -> [INCOMPLETE][21] ([fdo#103665])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl7/igt@gem_exec_balancer@hang.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl3/igt@gem_exec_balancer@hang.html

  * igt@gem_exec_create@basic:
    - shard-tglb:         [PASS][22] -> [INCOMPLETE][23] ([fdo#111736] / [i915#472])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb7/igt@gem_exec_create@basic.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb3/igt@gem_exec_create@basic.html

  * igt@gem_exec_parallel@vcs1-fds:
    - shard-iclb:         [PASS][24] -> [SKIP][25] ([fdo#112080]) +9 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb1/igt@gem_exec_parallel@vcs1-fds.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb6/igt@gem_exec_parallel@vcs1-fds.html

  * igt@gem_exec_schedule@independent-bsd2:
    - shard-iclb:         [PASS][26] -> [SKIP][27] ([fdo#109276]) +24 similar issues
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb2/igt@gem_exec_schedule@independent-bsd2.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb8/igt@gem_exec_schedule@independent-bsd2.html

  * igt@gem_exec_schedule@pi-distinct-iova-bsd:
    - shard-iclb:         [PASS][28] -> [SKIP][29] ([i915#677])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@gem_exec_schedule@pi-distinct-iova-bsd.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@gem_exec_schedule@pi-distinct-iova-bsd.html

  * igt@gem_exec_schedule@preempt-other-chain-bsd:
    - shard-iclb:         [PASS][30] -> [SKIP][31] ([fdo#112146]) +7 similar issues
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb6/igt@gem_exec_schedule@preempt-other-chain-bsd.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@gem_exec_schedule@preempt-other-chain-bsd.html

  * igt@gem_exec_schedule@preempt-queue-render:
    - shard-tglb:         [PASS][32] -> [INCOMPLETE][33] ([fdo#111606] / [fdo#111677] / [i915#472]) +2 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb8/igt@gem_exec_schedule@preempt-queue-render.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb6/igt@gem_exec_schedule@preempt-queue-render.html

  * igt@gem_exec_schedule@preempt-queue-vebox:
    - shard-tglb:         [PASS][34] -> [INCOMPLETE][35] ([fdo#111677] / [i915#472])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb5/igt@gem_exec_schedule@preempt-queue-vebox.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb3/igt@gem_exec_schedule@preempt-queue-vebox.html

  * igt@gem_exec_schedule@smoketest-bsd1:
    - shard-tglb:         [PASS][36] -> [INCOMPLETE][37] ([i915#463] / [i915#472])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb8/igt@gem_exec_schedule@smoketest-bsd1.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb6/igt@gem_exec_schedule@smoketest-bsd1.html

  * igt@gem_persistent_relocs@forked-faulting-reloc-thrashing:
    - shard-kbl:          [PASS][38] -> [INCOMPLETE][39] ([fdo#103665] / [i915#530])
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl4/igt@gem_persistent_relocs@forked-faulting-reloc-thrashing.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl1/igt@gem_persistent_relocs@forked-faulting-reloc-thrashing.html

  * igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive:
    - shard-tglb:         [PASS][40] -> [TIMEOUT][41] ([fdo#112126] / [fdo#112271])
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb1/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb5/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html

  * igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing:
    - shard-apl:          [PASS][42] -> [TIMEOUT][43] ([fdo#112271] / [i915#530])
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl8/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl7/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html
    - shard-glk:          [PASS][44] -> [TIMEOUT][45] ([fdo#112271] / [i915#530])
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk4/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk4/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html

  * igt@gem_persistent_relocs@forked-interruptible-thrashing:
    - shard-skl:          [PASS][46] -> [TIMEOUT][47] ([fdo#112271] / [i915#530]) +1 similar issue
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl2/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl10/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
    - shard-glk:          [PASS][48] -> [TIMEOUT][49] ([fdo#112271])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk4/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk4/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
    - shard-iclb:         [PASS][50] -> [FAIL][51] ([i915#520])
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb4/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb6/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
    - shard-apl:          [PASS][52] -> [INCOMPLETE][53] ([CI#80] / [fdo#103927] / [i915#530])
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl6/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl1/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
    - shard-kbl:          [PASS][54] -> [TIMEOUT][55] ([fdo#112271] / [i915#530])
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl7/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl4/igt@gem_persistent_relocs@forked-interruptible-thrashing.html

  * igt@gem_persistent_relocs@forked-thrashing:
    - shard-hsw:          [PASS][56] -> [INCOMPLETE][57] ([i915#530] / [i915#61]) +1 similar issue
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-hsw5/igt@gem_persistent_relocs@forked-thrashing.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-hsw7/igt@gem_persistent_relocs@forked-thrashing.html

  * igt@i915_selftest@live_execlists:
    - shard-kbl:          [PASS][58] -> [DMESG-FAIL][59] ([i915#841])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl2/igt@i915_selftest@live_execlists.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@i915_selftest@live_execlists.html

  * igt@kms_color@pipe-a-ctm-0-5:
    - shard-skl:          [PASS][60] -> [DMESG-WARN][61] ([i915#109])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl5/igt@kms_color@pipe-a-ctm-0-5.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl5/igt@kms_color@pipe-a-ctm-0-5.html

  * igt@kms_cursor_crc@pipe-b-cursor-128x128-sliding:
    - shard-skl:          [PASS][62] -> [FAIL][63] ([i915#54])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl10/igt@kms_cursor_crc@pipe-b-cursor-128x128-sliding.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl10/igt@kms_cursor_crc@pipe-b-cursor-128x128-sliding.html

  * igt@kms_flip@flip-vs-expired-vblank:
    - shard-skl:          [PASS][64] -> [FAIL][65] ([i915#79])
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl7/igt@kms_flip@flip-vs-expired-vblank.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl9/igt@kms_flip@flip-vs-expired-vblank.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-blt:
    - shard-tglb:         [PASS][66] -> [FAIL][67] ([i915#49])
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb3/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-blt.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb3/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-blt.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - shard-kbl:          [PASS][68] -> [DMESG-WARN][69] ([i915#180]) +3 similar issues
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl3/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl1/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  * igt@kms_plane@plane-position-covered-pipe-c-planes:
    - shard-skl:          [PASS][70] -> [FAIL][71] ([i915#247])
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl2/igt@kms_plane@plane-position-covered-pipe-c-planes.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl4/igt@kms_plane@plane-position-covered-pipe-c-planes.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [PASS][72] -> [FAIL][73] ([fdo#108145] / [i915#265]) +1 similar issue
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl2/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl4/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_psr@psr2_cursor_mmap_cpu:
    - shard-iclb:         [PASS][74] -> [SKIP][75] ([fdo#109441]) +3 similar issues
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb8/igt@kms_psr@psr2_cursor_mmap_cpu.html

  * igt@kms_setmode@basic:
    - shard-apl:          [PASS][76] -> [FAIL][77] ([i915#31])
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl7/igt@kms_setmode@basic.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl3/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-c-ts-continuation-suspend:
    - shard-apl:          [PASS][78] -> [DMESG-WARN][79] ([i915#180]) +1 similar issue
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl6/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl1/igt@kms_vblank@pipe-c-ts-continuation-suspend.html

  * igt@prime_mmap_coherency@ioctl-errors:
    - shard-hsw:          [PASS][80] -> [INCOMPLETE][81] ([i915#61])
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-hsw1/igt@prime_mmap_coherency@ioctl-errors.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-hsw5/igt@prime_mmap_coherency@ioctl-errors.html

  
#### Possible fixes ####

  * igt@drm_import_export@prime:
    - shard-hsw:          [INCOMPLETE][82] ([CI#80] / [i915#61]) -> [PASS][83]
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-hsw2/igt@drm_import_export@prime.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-hsw5/igt@drm_import_export@prime.html

  * igt@gem_busy@close-race:
    - shard-tglb:         [INCOMPLETE][84] ([i915#472] / [i915#977]) -> [PASS][85]
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_busy@close-race.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb5/igt@gem_busy@close-race.html

  * igt@gem_ctx_persistence@bcs0-mixed-process:
    - shard-apl:          [FAIL][86] ([i915#679]) -> [PASS][87] +1 similar issue
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl4/igt@gem_ctx_persistence@bcs0-mixed-process.html
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl2/igt@gem_ctx_persistence@bcs0-mixed-process.html

  * igt@gem_ctx_persistence@vcs1-queued:
    - shard-iclb:         [SKIP][88] ([fdo#109276] / [fdo#112080]) -> [PASS][89] +4 similar issues
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@gem_ctx_persistence@vcs1-queued.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@gem_ctx_persistence@vcs1-queued.html

  * igt@gem_eio@in-flight-contexts-1us:
    - shard-snb:          [FAIL][90] ([i915#490]) -> [PASS][91]
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-snb6/igt@gem_eio@in-flight-contexts-1us.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-snb7/igt@gem_eio@in-flight-contexts-1us.html

  * igt@gem_eio@in-flight-external:
    - shard-tglb:         [INCOMPLETE][92] ([i915#472] / [i915#534]) -> [PASS][93]
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_eio@in-flight-external.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb3/igt@gem_eio@in-flight-external.html

  * igt@gem_exec_parallel@basic:
    - shard-tglb:         [INCOMPLETE][94] ([i915#472] / [i915#476]) -> [PASS][95]
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb3/igt@gem_exec_parallel@basic.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb4/igt@gem_exec_parallel@basic.html

  * igt@gem_exec_schedule@pi-common-bsd:
    - shard-iclb:         [SKIP][96] ([i915#677]) -> [PASS][97] +2 similar issues
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb1/igt@gem_exec_schedule@pi-common-bsd.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb6/igt@gem_exec_schedule@pi-common-bsd.html

  * igt@gem_exec_schedule@preempt-queue-contexts-render:
    - shard-tglb:         [INCOMPLETE][98] ([fdo#111606] / [fdo#111677] / [i915#472]) -> [PASS][99]
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_exec_schedule@preempt-queue-contexts-render.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb5/igt@gem_exec_schedule@preempt-queue-contexts-render.html

  * igt@gem_exec_schedule@preemptive-hang-bsd:
    - shard-iclb:         [SKIP][100] ([fdo#112146]) -> [PASS][101] +7 similar issues
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb2/igt@gem_exec_schedule@preemptive-hang-bsd.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb8/igt@gem_exec_schedule@preemptive-hang-bsd.html

  * igt@gem_exec_suspend@basic-s0:
    - shard-tglb:         [INCOMPLETE][102] ([i915#456] / [i915#472]) -> [PASS][103]
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb3/igt@gem_exec_suspend@basic-s0.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb4/igt@gem_exec_suspend@basic-s0.html

  * igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive:
    - shard-apl:          [TIMEOUT][104] ([fdo#112271]) -> [PASS][105]
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl6/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl6/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html

  * igt@gem_persistent_relocs@forked-interruptible-thrash-inactive:
    - shard-kbl:          [TIMEOUT][106] ([fdo#112271] / [i915#530]) -> [PASS][107] +1 similar issue
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl1/igt@gem_persistent_relocs@forked-interruptible-thrash-inactive.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl4/igt@gem_persistent_relocs@forked-interruptible-thrash-inactive.html

  * igt@gem_persistent_relocs@forked-thrashing:
    - shard-kbl:          [INCOMPLETE][108] ([fdo#103665] / [i915#530]) -> [PASS][109]
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl3/igt@gem_persistent_relocs@forked-thrashing.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@gem_persistent_relocs@forked-thrashing.html

  * igt@gem_pipe_control_store_loop@reused-buffer:
    - shard-tglb:         [INCOMPLETE][110] ([i915#472] / [i915#707] / [i915#796]) -> [PASS][111]
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_pipe_control_store_loop@reused-buffer.html
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb7/igt@gem_pipe_control_store_loop@reused-buffer.html

  * igt@gem_ppgtt@flink-and-close-vma-leak:
    - shard-kbl:          [FAIL][112] ([i915#644]) -> [PASS][113]
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl3/igt@gem_ppgtt@flink-and-close-vma-leak.html
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@gem_ppgtt@flink-and-close-vma-leak.html

  * igt@gem_softpin@noreloc-s3:
    - shard-apl:          [DMESG-WARN][114] ([i915#180]) -> [PASS][115] +1 similar issue
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl4/igt@gem_softpin@noreloc-s3.html
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl2/igt@gem_softpin@noreloc-s3.html

  * igt@gem_sync@basic-store-each:
    - shard-tglb:         [INCOMPLETE][116] ([i915#472]) -> [PASS][117] +1 similar issue
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_sync@basic-store-each.html
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb1/igt@gem_sync@basic-store-each.html

  * igt@i915_pm_rps@reset:
    - shard-iclb:         [FAIL][118] ([i915#413]) -> [PASS][119]
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb4/igt@i915_pm_rps@reset.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb1/igt@i915_pm_rps@reset.html

  * igt@kms_color@pipe-b-ctm-0-75:
    - shard-skl:          [DMESG-WARN][120] ([i915#109]) -> [PASS][121]
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl3/igt@kms_color@pipe-b-ctm-0-75.html
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl10/igt@kms_color@pipe-b-ctm-0-75.html

  * igt@kms_draw_crc@draw-method-rgb565-render-untiled:
    - shard-apl:          [DMESG-WARN][122] -> [PASS][123]
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl1/igt@kms_draw_crc@draw-method-rgb565-render-untiled.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl7/igt@kms_draw_crc@draw-method-rgb565-render-untiled.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-skl:          [FAIL][124] ([i915#79]) -> [PASS][125]
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl4/igt@kms_flip@flip-vs-expired-vblank-interruptible.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl1/igt@kms_flip@flip-vs-expired-vblank-interruptible.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-glk:          [INCOMPLETE][126] ([i915#58] / [k.org#198133]) -> [PASS][127]
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk8/igt@kms_flip@flip-vs-suspend-interruptible.html
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk1/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min:
    - shard-skl:          [FAIL][128] ([fdo#108145]) -> [PASS][129]
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl5/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl5/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html

  * igt@kms_psr2_su@frontbuffer:
    - shard-iclb:         [SKIP][130] ([fdo#109642] / [fdo#111068]) -> [PASS][131]
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@kms_psr2_su@frontbuffer.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@kms_psr2_su@frontbuffer.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [SKIP][132] ([fdo#109441]) -> [PASS][133] +3 similar issues
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@kms_psr@psr2_sprite_plane_move.html
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@kms_vblank@pipe-a-ts-continuation-suspend:
    - shard-kbl:          [DMESG-WARN][134] ([i915#180]) -> [PASS][135] +6 similar issues
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl4/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl3/igt@kms_vblank@pipe-a-ts-continuation-suspend.html

  * igt@perf_pmu@busy-no-semaphores-vcs1:
    - shard-iclb:         [SKIP][136] ([fdo#112080]) -> [PASS][137] +13 similar issues
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb6/igt@perf_pmu@busy-no-semaphores-vcs1.html
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb1/igt@perf_pmu@busy-no-semaphores-vcs1.html

  * igt@prime_vgem@fence-wait-bsd2:
    - shard-iclb:         [SKIP][138] ([fdo#109276]) -> [PASS][139] +21 similar issues
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb6/igt@prime_vgem@fence-wait-bsd2.html
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb1/igt@prime_vgem@fence-wait-bsd2.html

  
#### Warnings ####

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [SKIP][140] ([fdo#109349]) -> [DMESG-WARN][141] ([fdo#107724])
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  
  [CI#80]: https://gitlab.freedesktop.org/gfx-ci/i915-infra/issues/80
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111606]: https://bugs.freedesktop.org/show_bug.cgi?id=111606
  [fdo#111677]: https://bugs.freedesktop.org/show_bug.cgi?id=111677
  [fdo#111736]: https://bugs.freedesktop.org/show_bug.cgi?id=111736
  [fdo#112080]: https://bugs.freedesktop.org/show_bug.cgi?id=112080
  [fdo#112126]: https://bugs.freedesktop.org/show_bug.cgi?id=112126
  [fdo#112146]: https://bugs.freedesktop.org/show_bug.cgi?id=112146
  [fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271
  [i915#109]: https://gitlab.freedesktop.org/drm/intel/issues/109
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#247]: https://gitlab.freedesktop.org/drm/intel/issues/247
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#413]: https://gitlab.freedesktop.org/drm/intel/issues/413
  [i915#456]: https://gitlab.freedesktop.org/drm/intel/issues/456
  [i915#463]: https://gitlab.freedesktop.org/drm/intel/issues/463
  [i915#472]: https://gitlab.freedesktop.org/drm/intel/issues/472
  [i915#476]: https://gitlab.freedesktop.org/drm/intel/issues/476
  [i915#49]: https://gitlab.freedesktop.org/drm/intel/issues/49
  [i915#490]: https://gitlab.freedesktop.org/drm/intel/issues/490
  [i915#520]: https://gitlab.freedesktop.org/drm/intel/issues/520
  [i915#530]: https://gitlab.freedesktop.org/drm/intel/issues/530
  [i915#534]: https://gitlab.freedesktop.org/drm/intel/issues/534
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#570]: https://gitlab.freedesktop.org/drm/intel/issues/570
  [i915#58]: https://gitlab.freedesktop.org/drm/intel/issues/58
  [i915#61]: https://gitlab.freedesktop.org/drm/intel/issues/61
  [i915#644]: https://gitlab.freedesktop.org/drm/intel/issues/644
  [i915#667]: https://gitlab.freedesktop.org/drm/intel/issues/667
  [i915#677]: https://gitlab.freedesktop.org/drm/intel/issues/677
  [i915#679]: https://gitlab.freedesktop.org/drm/intel/issues/679
  [i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69
  [i915#707]: https://gitlab.freedesktop.org/drm/intel/issues/707
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#796]: https://gitlab.freedesktop.org/drm/intel/issues/796
  [i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82
  [i915#841]: https://gitlab.freedesktop.org/drm/intel/issues/841
  [i915#977]: https://gitlab.freedesktop.org/drm/intel/issues/977
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2020-01-17 20:47 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-15  8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson
2020-01-15  8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson
2020-01-15 10:58   ` Tvrtko Ursulin
2020-01-15 11:01     ` Chris Wilson
2020-01-15 11:10   ` [Intel-gfx] [PATCH v3] " Chris Wilson
2020-01-15 11:37     ` Tvrtko Ursulin
2020-01-15 11:46       ` Chris Wilson
2020-01-16 17:12     ` Tvrtko Ursulin
2020-01-15  8:33 ` [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture Chris Wilson
2020-01-16 17:22   ` Tvrtko Ursulin
2020-01-16 17:48     ` Chris Wilson
2020-01-16 18:14       ` Tvrtko Ursulin
2020-01-16 18:32         ` Chris Wilson
2020-01-15  9:02 ` [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists Chris Wilson
2020-01-16 17:23   ` Tvrtko Ursulin
2020-01-15  9:44 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) Patchwork
2020-01-15 10:06 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-01-15 10:06 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork
2020-01-15 14:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) Patchwork
2020-01-15 14:37 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork
2020-01-17 20:47 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.