All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Cancel all ready but queued requests when wedging
@ 2017-09-15 16:49 Chris Wilson
  2017-09-15 16:57 ` Chris Wilson
                   ` (5 more replies)
  0 siblings, 6 replies; 10+ messages in thread
From: Chris Wilson @ 2017-09-15 16:49 UTC (permalink / raw)
  To: intel-gfx

When wedging the hw, we want to mark all in-flight requests as -EIO.
This is made slightly more complex by execlists who store the ready but
not yet submitted-to-hw requests on a private queue (an rbtree
priolist). Call into execlists to cancel not only the ELSP tracking for
the submitted requests, but also the queue of unsubmitted requests.

Testcase: igt/gem_eio/in-flight-contexts
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         | 33 ++++++----------------
 drivers/gpu/drm/i915/intel_lrc.c        | 49 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  8 ++++++
 3 files changed, 66 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a5d1a4a9a011..625351792089 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3072,41 +3072,26 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
 	 */
 	engine->submit_request = nop_submit_request;
 
-	/* Mark all executing requests as skipped */
 	spin_lock_irqsave(&engine->timeline->lock, flags);
-	list_for_each_entry(request, &engine->timeline->requests, link)
+
+	/* Mark all executing requests as skipped */
+	list_for_each_entry(request, &engine->timeline->requests, link) {
+		GEM_BUG_ON(!request->global_seqno);
 		if (!i915_gem_request_completed(request))
 			dma_fence_set_error(&request->fence, -EIO);
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	}
 
 	/*
 	 * Clear the execlists queue up before freeing the requests, as those
 	 * are the ones that keep the context and ringbuffer backing objects
 	 * pinned in place.
 	 */
+	if (engine->cancel_all_requests)
+		engine->cancel_all_requests(engine);
 
-	if (i915.enable_execlists) {
-		struct execlist_port *port = engine->execlist_port;
-		unsigned long flags;
-		unsigned int n;
+	/* Remaing pending _unready_ requests will be nop'ed when submitted */
 
-		spin_lock_irqsave(&engine->timeline->lock, flags);
-
-		for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-			i915_gem_request_put(port_request(&port[n]));
-		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
-		engine->execlist_queue = RB_ROOT;
-		engine->execlist_first = NULL;
-
-		spin_unlock_irqrestore(&engine->timeline->lock, flags);
-
-		/* The port is checked prior to scheduling a tasklet, but
-		 * just in case we have suspended the tasklet to do the
-		 * wedging make sure that when it wakes, it decides there
-		 * is no work to do by clearing the irq_posted bit.
-		 */
-		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	}
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3151a13b5bf9..5f57d9c4a522 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -506,6 +506,54 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		execlists_submit_ports(engine);
 }
 
+static void execlists_cancel_all_requests(struct intel_engine_cs *engine)
+{
+	struct execlist_port *port = engine->execlist_port;
+	struct rb_node *rb;
+	unsigned long n;
+
+	lockdep_assert_held(&engine->timeline->lock);
+
+	/* Cancel the requests on the HW and clear the ELSP tracker. */
+	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+		i915_gem_request_put(port_request(&port[n]));
+	memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+
+	/* Flush the queued _ready_ requests to the executing lists. */
+	rb = engine->execlist_first;
+	while (rb) {
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
+
+			dma_fence_set_error(&rq->fence, -EIO);
+			__i915_gem_request_submit(rq);
+		}
+
+		rb = rb_next(rb);
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kmem_cache_free(engine->i915->priorities, p);
+	}
+
+	engine->execlist_queue = RB_ROOT;
+	engine->execlist_first = NULL;
+
+	GEM_BUG_ON(port_isset(&port[0]));
+
+	/*
+	 * The port is checked prior to scheduling a tasklet, but
+	 * just in case we have suspended the tasklet to do the
+	 * wedging make sure that when it wakes, it decides there
+	 * is no work to do by clearing the irq_posted bit.
+	 */
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+}
+
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
 	const struct execlist_port *port = engine->execlist_port;
@@ -1687,6 +1735,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
+	engine->cancel_all_requests = execlists_cancel_all_requests;
 	engine->schedule = execlists_schedule;
 	engine->irq_tasklet.func = intel_lrc_irq_handler;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index abf171c3cb9c..04fc50c993bf 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -306,6 +306,14 @@ struct intel_engine_cs {
 	void		(*schedule)(struct drm_i915_gem_request *request,
 				    int priority);
 
+	/*
+	 * Cancel all requests on the hardware, or queued for execution.
+	 *
+	 * This is called under the engine->timeline->lock when marking
+	 * the device as wedged.
+	 */
+	void		(*cancel_all_requests)(struct intel_engine_cs *engine);
+
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Cancel all ready but queued requests when wedging
  2017-09-15 16:49 [PATCH] drm/i915: Cancel all ready but queued requests when wedging Chris Wilson
@ 2017-09-15 16:57 ` Chris Wilson
  2017-09-15 16:59 ` Chris Wilson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-09-15 16:57 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2017-09-15 17:49:16)
> When wedging the hw, we want to mark all in-flight requests as -EIO.
> This is made slightly more complex by execlists who store the ready but
> not yet submitted-to-hw requests on a private queue (an rbtree
> priolist). Call into execlists to cancel not only the ELSP tracking for
> the submitted requests, but also the queue of unsubmitted requests.
> 

Reported-by: Michał Winiarski <michal.winiarski@intel.com>
> Testcase: igt/gem_eio/in-flight-contexts
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Cancel all ready but queued requests when wedging
  2017-09-15 16:49 [PATCH] drm/i915: Cancel all ready but queued requests when wedging Chris Wilson
  2017-09-15 16:57 ` Chris Wilson
@ 2017-09-15 16:59 ` Chris Wilson
  2017-09-15 17:16   ` Michał Winiarski
  2017-09-15 17:07 ` ✓ Fi.CI.BAT: success for " Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-09-15 16:59 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2017-09-15 17:49:16)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index abf171c3cb9c..04fc50c993bf 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -306,6 +306,14 @@ struct intel_engine_cs {
>         void            (*schedule)(struct drm_i915_gem_request *request,
>                                     int priority);
>  
> +       /*
> +        * Cancel all requests on the hardware, or queued for execution.
> +        *
> +        * This is called under the engine->timeline->lock when marking
> +        * the device as wedged.
> +        */
> +       void            (*cancel_all_requests)(struct intel_engine_cs *engine);

cancel_all_requests is a bit too broad, could just shorten it to
cancel_requests with the doc explaining that we only cancel the requests
that have been submitted to the engine (not the not-yet-ready requests
still floating in the aether).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Cancel all ready but queued requests when wedging
  2017-09-15 16:49 [PATCH] drm/i915: Cancel all ready but queued requests when wedging Chris Wilson
  2017-09-15 16:57 ` Chris Wilson
  2017-09-15 16:59 ` Chris Wilson
@ 2017-09-15 17:07 ` Patchwork
  2017-09-15 17:31 ` [PATCH v2] " Chris Wilson
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-09-15 17:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Cancel all ready but queued requests when wedging
URL   : https://patchwork.freedesktop.org/series/30434/
State : success

== Summary ==

Series 30434v1 drm/i915: Cancel all ready but queued requests when wedging
https://patchwork.freedesktop.org/api/1.0/series/30434/revisions/1/mbox/

Test pm_rpm:
        Subgroup basic-rte:
                pass       -> DMESG-WARN (fi-cfl-s) fdo#102294

fdo#102294 https://bugs.freedesktop.org/show_bug.cgi?id=102294

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:449s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:457s
fi-blb-e6850     total:289  pass:224  dwarn:1   dfail:0   fail:0   skip:64  time:383s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:524s
fi-bwr-2160      total:289  pass:184  dwarn:0   dfail:0   fail:0   skip:105 time:269s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:511s
fi-byt-j1900     total:289  pass:254  dwarn:1   dfail:0   fail:0   skip:34  time:501s
fi-byt-n2820     total:289  pass:250  dwarn:1   dfail:0   fail:0   skip:38  time:500s
fi-cfl-s         total:289  pass:222  dwarn:35  dfail:0   fail:0   skip:32  time:552s
fi-elk-e7500     total:289  pass:230  dwarn:0   dfail:0   fail:0   skip:59  time:425s
fi-glk-2a        total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:594s
fi-hsw-4770      total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:425s
fi-hsw-4770r     total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:410s
fi-ilk-650       total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:438s
fi-ivb-3520m     total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:489s
fi-ivb-3770      total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:464s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:489s
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:573s
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:590s
fi-pnv-d510      total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:552s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:456s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:521s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:496s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:460s
fi-skl-x1585l    total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:474s
fi-snb-2520m     total:289  pass:251  dwarn:0   dfail:0   fail:0   skip:38  time:573s
fi-snb-2600      total:289  pass:248  dwarn:0   dfail:0   fail:2   skip:39  time:425s

9adc9e93d6243c82bcefd175c2d11770802de194 drm-tip: 2017y-09m-15d-11h-44m-46s UTC integration manifest
427f54436a91 drm/i915: Cancel all ready but queued requests when wedging

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5714/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Cancel all ready but queued requests when wedging
  2017-09-15 16:59 ` Chris Wilson
@ 2017-09-15 17:16   ` Michał Winiarski
  2017-09-15 17:19     ` Chris Wilson
  0 siblings, 1 reply; 10+ messages in thread
From: Michał Winiarski @ 2017-09-15 17:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Sep 15, 2017 at 05:59:30PM +0100, Chris Wilson wrote:
> Quoting Chris Wilson (2017-09-15 17:49:16)
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > index abf171c3cb9c..04fc50c993bf 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > @@ -306,6 +306,14 @@ struct intel_engine_cs {
> >         void            (*schedule)(struct drm_i915_gem_request *request,
> >                                     int priority);
> >  
> > +       /*
> > +        * Cancel all requests on the hardware, or queued for execution.
> > +        *
> > +        * This is called under the engine->timeline->lock when marking
> > +        * the device as wedged.
> > +        */
> > +       void            (*cancel_all_requests)(struct intel_engine_cs *engine);
> 
> cancel_all_requests is a bit too broad, could just shorten it to
> cancel_requests with the doc explaining that we only cancel the requests
> that have been submitted to the engine (not the not-yet-ready requests
> still floating in the aether).
> -Chris

I agree.

Note, that we're still doing part of the work directly in submit_notify:

	/* Mark all executing requests as skipped */
	list_for_each_entry(request, &engine->timeline->requests, link) {
		GEM_BUG_ON(!request->global_seqno);
		if (!i915_gem_request_completed(request))
			dma_fence_set_error(&request->fence, -EIO);
	}

Perhaps it would be cleaner if we could extract that (cancel_requests_inflight?)
and us it as a cancel_requests for legacy ringbuf? (then just call into that in
cancel_requests for execlists)
Thoughts?

-Michał
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Cancel all ready but queued requests when wedging
  2017-09-15 17:16   ` Michał Winiarski
@ 2017-09-15 17:19     ` Chris Wilson
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-09-15 17:19 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx

Quoting Michał Winiarski (2017-09-15 18:16:16)
> On Fri, Sep 15, 2017 at 05:59:30PM +0100, Chris Wilson wrote:
> > Quoting Chris Wilson (2017-09-15 17:49:16)
> > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > index abf171c3cb9c..04fc50c993bf 100644
> > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > @@ -306,6 +306,14 @@ struct intel_engine_cs {
> > >         void            (*schedule)(struct drm_i915_gem_request *request,
> > >                                     int priority);
> > >  
> > > +       /*
> > > +        * Cancel all requests on the hardware, or queued for execution.
> > > +        *
> > > +        * This is called under the engine->timeline->lock when marking
> > > +        * the device as wedged.
> > > +        */
> > > +       void            (*cancel_all_requests)(struct intel_engine_cs *engine);
> > 
> > cancel_all_requests is a bit too broad, could just shorten it to
> > cancel_requests with the doc explaining that we only cancel the requests
> > that have been submitted to the engine (not the not-yet-ready requests
> > still floating in the aether).
> > -Chris
> 
> I agree.
> 
> Note, that we're still doing part of the work directly in submit_notify:
> 
>         /* Mark all executing requests as skipped */
>         list_for_each_entry(request, &engine->timeline->requests, link) {
>                 GEM_BUG_ON(!request->global_seqno);
>                 if (!i915_gem_request_completed(request))
>                         dma_fence_set_error(&request->fence, -EIO);
>         }
> 
> Perhaps it would be cleaner if we could extract that (cancel_requests_inflight?)
> and us it as a cancel_requests for legacy ringbuf? (then just call into that in
> cancel_requests for execlists)
> Thoughts?

But there is also the third phase of catching all the requests not on
the timeline (or expected to be on the timeline). I guess we just move
the entire engine_set_wedged to the backends and if we cared enough
about the duplication, pull the list_for_each_entry out to a helper.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] drm/i915: Cancel all ready but queued requests when wedging
  2017-09-15 16:49 [PATCH] drm/i915: Cancel all ready but queued requests when wedging Chris Wilson
                   ` (2 preceding siblings ...)
  2017-09-15 17:07 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2017-09-15 17:31 ` Chris Wilson
  2017-09-15 18:13   ` Michał Winiarski
  2017-09-15 17:52 ` ✓ Fi.CI.BAT: success for drm/i915: Cancel all ready but queued requests when wedging (rev2) Patchwork
  2017-09-15 18:53 ` ✗ Fi.CI.IGT: warning " Patchwork
  5 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-09-15 17:31 UTC (permalink / raw)
  To: intel-gfx

When wedging the hw, we want to mark all in-flight requests as -EIO.
This is made slightly more complex by execlists who store the ready but
not yet submitted-to-hw requests on a private queue (an rbtree
priolist). Call into execlists to cancel not only the ELSP tracking for
the submitted requests, but also the queue of unsubmitted requests.

v2: Move the majority of engine_set_wedged to the backends (both legacy
ringbuffer and execlists handling their own lists).

Reported-by: Michał Winiarski <michal.winiarski@intel.com>
Testcase: igt/gem_eio/in-flight-contexts
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         | 38 +--------------------
 drivers/gpu/drm/i915/intel_lrc.c        | 60 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  8 +++++
 4 files changed, 89 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a5d1a4a9a011..ce37a54b29d4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3061,9 +3061,6 @@ static void nop_submit_request(struct drm_i915_gem_request *request)
 
 static void engine_set_wedged(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *request;
-	unsigned long flags;
-
 	/* We need to be sure that no thread is running the old callback as
 	 * we install the nop handler (otherwise we would submit a request
 	 * to hardware that will never complete). In order to prevent this
@@ -3073,40 +3070,7 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
 	engine->submit_request = nop_submit_request;
 
 	/* Mark all executing requests as skipped */
-	spin_lock_irqsave(&engine->timeline->lock, flags);
-	list_for_each_entry(request, &engine->timeline->requests, link)
-		if (!i915_gem_request_completed(request))
-			dma_fence_set_error(&request->fence, -EIO);
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
-
-	/*
-	 * Clear the execlists queue up before freeing the requests, as those
-	 * are the ones that keep the context and ringbuffer backing objects
-	 * pinned in place.
-	 */
-
-	if (i915.enable_execlists) {
-		struct execlist_port *port = engine->execlist_port;
-		unsigned long flags;
-		unsigned int n;
-
-		spin_lock_irqsave(&engine->timeline->lock, flags);
-
-		for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-			i915_gem_request_put(port_request(&port[n]));
-		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
-		engine->execlist_queue = RB_ROOT;
-		engine->execlist_first = NULL;
-
-		spin_unlock_irqrestore(&engine->timeline->lock, flags);
-
-		/* The port is checked prior to scheduling a tasklet, but
-		 * just in case we have suspended the tasklet to do the
-		 * wedging make sure that when it wakes, it decides there
-		 * is no work to do by clearing the irq_posted bit.
-		 */
-		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	}
+	engine->cancel_requests(engine);
 
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3151a13b5bf9..2f9ebd32025c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -506,6 +506,65 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		execlists_submit_ports(engine);
 }
 
+static void execlists_cancel_requests(struct intel_engine_cs *engine)
+{
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *rq, *rn;
+	struct rb_node *rb;
+	unsigned long flags;
+	unsigned long n;
+
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+
+	/* Cancel the requests on the HW and clear the ELSP tracker. */
+	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+		i915_gem_request_put(port_request(&port[n]));
+	memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+
+	/* Mark all executing requests as skipped. */
+	list_for_each_entry(rq, &engine->timeline->requests, link) {
+		GEM_BUG_ON(!rq->global_seqno);
+		if (!i915_gem_request_completed(rq))
+			dma_fence_set_error(&rq->fence, -EIO);
+	}
+
+	/* Flush the queued requests to the timeline list (for retiring). */
+	rb = engine->execlist_first;
+	while (rb) {
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
+
+			dma_fence_set_error(&rq->fence, -EIO);
+			__i915_gem_request_submit(rq);
+		}
+
+		rb = rb_next(rb);
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kmem_cache_free(engine->i915->priorities, p);
+	}
+
+	/* Remaining _unready_ requests will be nop'ed when submitted */
+
+	engine->execlist_queue = RB_ROOT;
+	engine->execlist_first = NULL;
+	GEM_BUG_ON(port_isset(&port[0]));
+
+	/*
+	 * The port is checked prior to scheduling a tasklet, but
+	 * just in case we have suspended the tasklet to do the
+	 * wedging make sure that when it wakes, it decides there
+	 * is no work to do by clearing the irq_posted bit.
+	 */
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+}
+
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
 	const struct execlist_port *port = engine->execlist_port;
@@ -1687,6 +1746,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
+	engine->cancel_requests = execlists_cancel_requests;
 	engine->schedule = execlists_schedule;
 	engine->irq_tasklet.func = intel_lrc_irq_handler;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7e13bfa7e1cd..fe0e2674700f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -779,6 +779,24 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
 	return cs;
 }
 
+static void cancel_requests(struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_request *request;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+
+	/* Mark all submitted requests as skipped. */
+	list_for_each_entry(request, &engine->timeline->requests, link) {
+		GEM_BUG_ON(!request->global_seqno);
+		if (!i915_gem_request_completed(request))
+			dma_fence_set_error(&request->fence, -EIO);
+	}
+	/* Remaining _unready_ requests will be nop'ed when submitted */
+
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+}
+
 static void i9xx_submit_request(struct drm_i915_gem_request *request)
 {
 	struct drm_i915_private *dev_priv = request->i915;
@@ -1999,11 +2017,13 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 static void i9xx_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = i9xx_submit_request;
+	engine->cancel_requests = cancel_requests;
 }
 
 static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = gen6_bsd_submit_request;
+	engine->cancel_requests = cancel_requests;
 }
 
 static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index abf171c3cb9c..138116a3b537 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -306,6 +306,14 @@ struct intel_engine_cs {
 	void		(*schedule)(struct drm_i915_gem_request *request,
 				    int priority);
 
+	/*
+	 * Cancel all requests on the hardware, or queued for execution.
+	 * This should only cancel the ready requests that have been
+	 * submitted to the engine (via the engine->submit_request callback).
+	 * This is called when marking the device as wedged.
+	 */
+	void		(*cancel_requests)(struct intel_engine_cs *engine);
+
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Cancel all ready but queued requests when wedging (rev2)
  2017-09-15 16:49 [PATCH] drm/i915: Cancel all ready but queued requests when wedging Chris Wilson
                   ` (3 preceding siblings ...)
  2017-09-15 17:31 ` [PATCH v2] " Chris Wilson
@ 2017-09-15 17:52 ` Patchwork
  2017-09-15 18:53 ` ✗ Fi.CI.IGT: warning " Patchwork
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-09-15 17:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Cancel all ready but queued requests when wedging (rev2)
URL   : https://patchwork.freedesktop.org/series/30434/
State : success

== Summary ==

Series 30434v2 drm/i915: Cancel all ready but queued requests when wedging
https://patchwork.freedesktop.org/api/1.0/series/30434/revisions/2/mbox/

Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-legacy:
                fail       -> PASS       (fi-snb-2600) fdo#100215
Test kms_flip:
        Subgroup basic-flip-vs-modeset:
                skip       -> PASS       (fi-skl-x1585l) fdo#101781

fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215
fdo#101781 https://bugs.freedesktop.org/show_bug.cgi?id=101781

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:442s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:452s
fi-blb-e6850     total:289  pass:224  dwarn:1   dfail:0   fail:0   skip:64  time:378s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:537s
fi-bwr-2160      total:289  pass:184  dwarn:0   dfail:0   fail:0   skip:105 time:269s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:506s
fi-byt-j1900     total:289  pass:254  dwarn:1   dfail:0   fail:0   skip:34  time:498s
fi-byt-n2820     total:289  pass:250  dwarn:1   dfail:0   fail:0   skip:38  time:502s
fi-cfl-s         total:289  pass:223  dwarn:34  dfail:0   fail:0   skip:32  time:553s
fi-elk-e7500     total:289  pass:230  dwarn:0   dfail:0   fail:0   skip:59  time:425s
fi-glk-2a        total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:594s
fi-hsw-4770      total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:429s
fi-hsw-4770r     total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:406s
fi-ilk-650       total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:439s
fi-ivb-3520m     total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:484s
fi-ivb-3770      total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:469s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:490s
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:576s
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:584s
fi-pnv-d510      total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:555s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:457s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:517s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:501s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:456s
fi-skl-x1585l    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:492s
fi-snb-2520m     total:289  pass:251  dwarn:0   dfail:0   fail:0   skip:38  time:568s
fi-snb-2600      total:289  pass:249  dwarn:0   dfail:0   fail:1   skip:39  time:428s

9adc9e93d6243c82bcefd175c2d11770802de194 drm-tip: 2017y-09m-15d-11h-44m-46s UTC integration manifest
f7a3dce2b390 drm/i915: Cancel all ready but queued requests when wedging

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5715/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] drm/i915: Cancel all ready but queued requests when wedging
  2017-09-15 17:31 ` [PATCH v2] " Chris Wilson
@ 2017-09-15 18:13   ` Michał Winiarski
  0 siblings, 0 replies; 10+ messages in thread
From: Michał Winiarski @ 2017-09-15 18:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Sep 15, 2017 at 06:31:00PM +0100, Chris Wilson wrote:
> When wedging the hw, we want to mark all in-flight requests as -EIO.
> This is made slightly more complex by execlists who store the ready but
> not yet submitted-to-hw requests on a private queue (an rbtree
> priolist). Call into execlists to cancel not only the ELSP tracking for
> the submitted requests, but also the queue of unsubmitted requests.
> 
> v2: Move the majority of engine_set_wedged to the backends (both legacy
> ringbuffer and execlists handling their own lists).
> 
> Reported-by: Michał Winiarski <michal.winiarski@intel.com>
> Testcase: igt/gem_eio/in-flight-contexts
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>

-Michał

> ---
>  drivers/gpu/drm/i915/i915_gem.c         | 38 +--------------------
>  drivers/gpu/drm/i915/intel_lrc.c        | 60 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  8 +++++
>  4 files changed, 89 insertions(+), 37 deletions(-)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* ✗ Fi.CI.IGT: warning for drm/i915: Cancel all ready but queued requests when wedging (rev2)
  2017-09-15 16:49 [PATCH] drm/i915: Cancel all ready but queued requests when wedging Chris Wilson
                   ` (4 preceding siblings ...)
  2017-09-15 17:52 ` ✓ Fi.CI.BAT: success for drm/i915: Cancel all ready but queued requests when wedging (rev2) Patchwork
@ 2017-09-15 18:53 ` Patchwork
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-09-15 18:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Cancel all ready but queued requests when wedging (rev2)
URL   : https://patchwork.freedesktop.org/series/30434/
State : warning

== Summary ==

Test perf:
        Subgroup polling:
                pass       -> FAIL       (shard-hsw) fdo#102252
Test kms_plane:
        Subgroup plane-panning-bottom-right-suspend-pipe-A-planes:
                pass       -> SKIP       (shard-hsw)
Test kms_busy:
        Subgroup extended-modeset-hang-newfb-render-B:
                pass       -> SKIP       (shard-hsw)
Test kms_plane_multiple:
        Subgroup atomic-pipe-C-tiling-x:
                pass       -> SKIP       (shard-hsw)

fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252

shard-hsw        total:2313 pass:1241 dwarn:0   dfail:0   fail:14  skip:1058 time:9340s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5715/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-09-15 18:53 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-15 16:49 [PATCH] drm/i915: Cancel all ready but queued requests when wedging Chris Wilson
2017-09-15 16:57 ` Chris Wilson
2017-09-15 16:59 ` Chris Wilson
2017-09-15 17:16   ` Michał Winiarski
2017-09-15 17:19     ` Chris Wilson
2017-09-15 17:07 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-09-15 17:31 ` [PATCH v2] " Chris Wilson
2017-09-15 18:13   ` Michał Winiarski
2017-09-15 17:52 ` ✓ Fi.CI.BAT: success for drm/i915: Cancel all ready but queued requests when wedging (rev2) Patchwork
2017-09-15 18:53 ` ✗ Fi.CI.IGT: warning " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.