All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease
@ 2017-02-17 10:18 Chris Wilson
  2017-02-17 10:18 ` [PATCH v2 2/3] drm/i915: Break i915_spin_request() if we see an interrupt Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Chris Wilson @ 2017-02-17 10:18 UTC (permalink / raw)
  To: intel-gfx

When the timer expires for checking on interrupt processing, check to
see if any interrupts arrived within the last time period. If real
interrupts are still being delivered, we can be reassured that we
haven't missed the final interrupt as the waiter will still be woken.
Only once all activity ceases, do we have to worry about the waiter
never being woken and so need to install a timer to kick the waiter for
a slow arrival of a seqno.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c          |  1 +
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  3 ++-
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 91be31617e78..57fa1bf78a85 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1033,6 +1033,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 
 static void notify_ring(struct intel_engine_cs *engine)
 {
+	atomic_inc(&engine->irq_count);
 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 	if (intel_engine_wakeup(engine))
 		trace_i915_gem_request_notify(engine);
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 74cb7b91b5db..4395b177493e 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -26,6 +26,11 @@
 
 #include "i915_drv.h"
 
+static unsigned long wait_timeout(void)
+{
+	return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
+}
+
 static void intel_breadcrumbs_hangcheck(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@@ -34,8 +39,9 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
 	if (!b->irq_enabled)
 		return;
 
-	if (time_before(jiffies, b->timeout)) {
-		mod_timer(&b->hangcheck, b->timeout);
+	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
+		b->hangcheck_interrupts = atomic_read(&engine->irq_count);
+		mod_timer(&b->hangcheck, wait_timeout());
 		return;
 	}
 
@@ -55,11 +61,6 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
 	i915_queue_hangcheck(engine->i915);
 }
 
-static unsigned long wait_timeout(void)
-{
-	return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
-}
-
 static void intel_breadcrumbs_fake_irq(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@@ -140,8 +141,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 		i915_queue_hangcheck(i915);
 	} else {
 		/* Ensure we never sleep indefinitely */
-		GEM_BUG_ON(!time_after(b->timeout, jiffies));
-		mod_timer(&b->hangcheck, b->timeout);
+		mod_timer(&b->hangcheck, wait_timeout());
 	}
 }
 
@@ -258,7 +258,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 		GEM_BUG_ON(!next && !first);
 		if (next && next != &wait->node) {
 			GEM_BUG_ON(first);
-			b->timeout = wait_timeout();
 			b->first_wait = to_wait(next);
 			rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
 			/* As there is a delay between reading the current
@@ -286,7 +285,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 
 	if (first) {
 		GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
-		b->timeout = wait_timeout();
 		b->first_wait = wait;
 		rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
 		/* After assigning ourselves as the new bottom-half, we must
@@ -396,7 +394,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			 * the interrupt, or if we have to handle an
 			 * exception rather than a seqno completion.
 			 */
-			b->timeout = wait_timeout();
 			b->first_wait = to_wait(next);
 			rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
 			if (b->first_wait->seqno != wait->seqno)
@@ -627,7 +624,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 
 	__intel_breadcrumbs_disable_irq(b);
 	if (intel_engine_has_waiter(engine)) {
-		b->timeout = wait_timeout();
 		__intel_breadcrumbs_enable_irq(b);
 		if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted))
 			wake_up_process(b->first_wait->tsk);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 16714096810d..ef929d2866ee 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -213,6 +213,7 @@ struct intel_engine_cs {
 
 	struct intel_render_state *render_state;
 
+	atomic_t irq_count;
 	unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
 #define ENGINE_IRQ_EXECLIST 1
@@ -245,7 +246,7 @@ struct intel_engine_cs {
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		struct timer_list hangcheck; /* detect missed interrupts */
 
-		unsigned long timeout;
+		unsigned int hangcheck_interrupts;
 
 		bool irq_enabled : 1;
 		bool rpm_wakelock : 1;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 2/3] drm/i915: Break i915_spin_request() if we see an interrupt
  2017-02-17 10:18 [PATCH v2 1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease Chris Wilson
@ 2017-02-17 10:18 ` Chris Wilson
  2017-02-17 10:18 ` [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep Chris Wilson
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-02-17 10:18 UTC (permalink / raw)
  To: intel-gfx

If an interrupt has been posted, and we were spinning on the active
seqno waiting for it to advance but it did not, then we can expect that
it will not see its advance in the immediate future and should call into
the irq-seqno barrier. We can stop spinning at this point, and leave the
difficulty of handling the coherency to the caller.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 2f6cfa47dc61..a5fac40d2a4f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -963,7 +963,8 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 bool __i915_spin_request(const struct drm_i915_gem_request *req,
 			 int state, unsigned long timeout_us)
 {
-	unsigned int cpu;
+	struct intel_engine_cs *engine = req->engine;
+	unsigned int irq, cpu;
 
 	/* When waiting for high frequency requests, e.g. during synchronous
 	 * rendering split between the CPU and GPU, the finite amount of time
@@ -975,11 +976,20 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
+	irq = atomic_read(&engine->irq_count);
 	timeout_us += local_clock_us(&cpu);
 	do {
 		if (__i915_gem_request_completed(req))
 			return true;
 
+		/* Seqno are meant to be ordered *before* the interrupt. If
+		 * we see an interrupt without a corresponding seqno advance,
+		 * assume we won't see one in the near future but require
+		 * the engine->seqno_barrier() to fixup coherency.
+		 */
+		if (atomic_read(&engine->irq_count) != irq)
+			break;
+
 		if (signal_pending_state(state, current))
 			break;
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep
  2017-02-17 10:18 [PATCH v2 1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease Chris Wilson
  2017-02-17 10:18 ` [PATCH v2 2/3] drm/i915: Break i915_spin_request() if we see an interrupt Chris Wilson
@ 2017-02-17 10:18 ` Chris Wilson
  2017-02-17 10:48   ` Tvrtko Ursulin
  2017-02-17 11:35   ` [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep Mika Kuoppala
  2017-02-17 11:22 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease Patchwork
  2017-02-17 12:22 ` ✗ Fi.CI.BAT: warning for series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease (rev2) Patchwork
  3 siblings, 2 replies; 15+ messages in thread
From: Chris Wilson @ 2017-02-17 10:18 UTC (permalink / raw)
  To: intel-gfx

If the waiter was currently running, assume it hasn't had a chance
to process the pending interupt (e.g, low priority task on a loaded
system) and wait until it sleeps before declaring a missed interrupt.

References: https://bugs.freedesktop.org/show_bug.cgi?id=99816
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 4395b177493e..2ad29fb77b2d 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
 		return;
 	}
 
+	/* If the waiter was currently running, assume it hasn't had a chance
+	 * to process the pending interupt (e.g, low priority task on a loaded
+	 * system) and wait until it sleeps before declaring a missed interrupt.
+	 */
+	if (!intel_engine_wakeup(engine)) {
+		mod_timer(&b->hangcheck, wait_timeout());
+		return;
+	}
+
 	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep
  2017-02-17 10:18 ` [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep Chris Wilson
@ 2017-02-17 10:48   ` Tvrtko Ursulin
  2017-02-17 10:58     ` Chris Wilson
  2017-02-17 11:35   ` [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep Mika Kuoppala
  1 sibling, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-02-17 10:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/02/2017 10:18, Chris Wilson wrote:
> If the waiter was currently running, assume it hasn't had a chance
> to process the pending interupt (e.g, low priority task on a loaded
> system) and wait until it sleeps before declaring a missed interrupt.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=99816
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++
>  1 file changed, 9 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 4395b177493e..2ad29fb77b2d 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
>  		return;
>  	}
>
> +	/* If the waiter was currently running, assume it hasn't had a chance
> +	 * to process the pending interupt (e.g, low priority task on a loaded
> +	 * system) and wait until it sleeps before declaring a missed interrupt.
> +	 */
> +	if (!intel_engine_wakeup(engine)) {
> +		mod_timer(&b->hangcheck, wait_timeout());
> +		return;
> +	}
> +
>  	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
>  	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
>  	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
>

Change here is that we would never declare a GPU hang is userspace would 
just wait indefinitely, or in other words with this patch we would rely 
on userspace timing out on their waits in order to declare a hang.

Hm, in fact even with the current code, if the userspace keeps exiting 
and re-entering the wait we would be re-arming the hangcheck timer and 
so also never notice a GPU hang.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep
  2017-02-17 10:48   ` Tvrtko Ursulin
@ 2017-02-17 10:58     ` Chris Wilson
  2017-02-17 11:33       ` Tvrtko Ursulin
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-02-17 10:58 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Feb 17, 2017 at 10:48:50AM +0000, Tvrtko Ursulin wrote:
> 
> On 17/02/2017 10:18, Chris Wilson wrote:
> >If the waiter was currently running, assume it hasn't had a chance
> >to process the pending interupt (e.g, low priority task on a loaded
> >system) and wait until it sleeps before declaring a missed interrupt.
> >
> >References: https://bugs.freedesktop.org/show_bug.cgi?id=99816
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> >---
> > drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++
> > 1 file changed, 9 insertions(+)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >index 4395b177493e..2ad29fb77b2d 100644
> >--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >@@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
> > 		return;
> > 	}
> >
> >+	/* If the waiter was currently running, assume it hasn't had a chance
> >+	 * to process the pending interupt (e.g, low priority task on a loaded
> >+	 * system) and wait until it sleeps before declaring a missed interrupt.
> >+	 */
> >+	if (!intel_engine_wakeup(engine)) {
> >+		mod_timer(&b->hangcheck, wait_timeout());
> >+		return;
> >+	}
> >+
> > 	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
> > 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
> > 	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> >
> 
> Change here is that we would never declare a GPU hang is userspace
> would just wait indefinitely, or in other words with this patch we
> would rely on userspace timing out on their waits in order to
> declare a hang.

Surely you mean the other way around? The only way we get to now declare a
missed-interrupt and then queue a hangcheck here is if userspace sleeps.

> Hm, in fact even with the current code, if the userspace keeps
> exiting and re-entering the wait we would be re-arming the hangcheck
> timer and so also never notice a GPU hang.

Correct. It is not the only way we arm the GPU hangcheck.
gem_busy/hang, gem_wait/busy-hang check that we do detect hangs even if
userspace never sleeps.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease
  2017-02-17 10:18 [PATCH v2 1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease Chris Wilson
  2017-02-17 10:18 ` [PATCH v2 2/3] drm/i915: Break i915_spin_request() if we see an interrupt Chris Wilson
  2017-02-17 10:18 ` [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep Chris Wilson
@ 2017-02-17 11:22 ` Patchwork
  2017-02-17 12:22 ` ✗ Fi.CI.BAT: warning for series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease (rev2) Patchwork
  3 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-02-17 11:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease
URL   : https://patchwork.freedesktop.org/series/19827/
State : success

== Summary ==

Series 19827v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/19827/revisions/1/mbox/

fi-bdw-5557u     total:252  pass:241  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:252  pass:213  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:252  pass:233  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:252  pass:225  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:252  pass:202  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:252  pass:235  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:252  pass:230  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:252  pass:224  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:252  pass:223  dwarn:0   dfail:0   fail:0   skip:29 

c2033e7aa383d062000e024c5fac5f46560327cd drm-tip: 2017y-02m-16d-20h-42m-04s UTC integration manifest
b4e595c drm/i915: Defer declaration of missed-interrupt until the waiter is asleep
51ecfa8 drm/i915: Break i915_spin_request() if we see an interrupt
feef42d drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3875/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep
  2017-02-17 10:58     ` Chris Wilson
@ 2017-02-17 11:33       ` Tvrtko Ursulin
  2017-02-17 11:55         ` [PATCH] drm/i915: Only start with the fake-irq timer if interrupts are dead Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-02-17 11:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/02/2017 10:58, Chris Wilson wrote:
> On Fri, Feb 17, 2017 at 10:48:50AM +0000, Tvrtko Ursulin wrote:
>>
>> On 17/02/2017 10:18, Chris Wilson wrote:
>>> If the waiter was currently running, assume it hasn't had a chance
>>> to process the pending interupt (e.g, low priority task on a loaded
>>> system) and wait until it sleeps before declaring a missed interrupt.
>>>
>>> References: https://bugs.freedesktop.org/show_bug.cgi?id=99816
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>> ---
>>> drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++
>>> 1 file changed, 9 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
>>> index 4395b177493e..2ad29fb77b2d 100644
>>> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
>>> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
>>> @@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
>>> 		return;
>>> 	}
>>>
>>> +	/* If the waiter was currently running, assume it hasn't had a chance
>>> +	 * to process the pending interupt (e.g, low priority task on a loaded
>>> +	 * system) and wait until it sleeps before declaring a missed interrupt.
>>> +	 */
>>> +	if (!intel_engine_wakeup(engine)) {
>>> +		mod_timer(&b->hangcheck, wait_timeout());
>>> +		return;
>>> +	}
>>> +
>>> 	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
>>> 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
>>> 	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
>>>
>>
>> Change here is that we would never declare a GPU hang is userspace
>> would just wait indefinitely, or in other words with this patch we
>> would rely on userspace timing out on their waits in order to
>> declare a hang.
>
> Surely you mean the other way around? The only way we get to now declare a
> missed-interrupt and then queue a hangcheck here is if userspace sleeps.
>
>> Hm, in fact even with the current code, if the userspace keeps
>> exiting and re-entering the wait we would be re-arming the hangcheck
>> timer and so also never notice a GPU hang.
>
> Correct. It is not the only way we arm the GPU hangcheck.
> gem_busy/hang, gem_wait/busy-hang check that we do detect hangs even if
> userspace never sleeps.

Looks good after some more digging through the code and a brief IRC 
discussion. We only fall back to rapid wakeups (fake_irq) if there are 
waiters now, which is inline with the rest of the code.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep
  2017-02-17 10:18 ` [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep Chris Wilson
  2017-02-17 10:48   ` Tvrtko Ursulin
@ 2017-02-17 11:35   ` Mika Kuoppala
  2017-02-17 11:43     ` Chris Wilson
  1 sibling, 1 reply; 15+ messages in thread
From: Mika Kuoppala @ 2017-02-17 11:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> If the waiter was currently running, assume it hasn't had a chance
> to process the pending interupt (e.g, low priority task on a loaded
> system) and wait until it sleeps before declaring a missed interrupt.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=99816
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++
>  1 file changed, 9 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 4395b177493e..2ad29fb77b2d 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
>  		return;
>  	}
>  
> +	/* If the waiter was currently running, assume it hasn't had a chance
> +	 * to process the pending interupt (e.g, low priority task on a loaded
> +	 * system) and wait until it sleeps before declaring a missed interrupt.
> +	 */
> +	if (!intel_engine_wakeup(engine)) {

But this will happen if there is waiter running, or that there is
no waiter at all.

Don't we need to exclude the latter?

-Mika


> +		mod_timer(&b->hangcheck, wait_timeout());
> +		return;
> +	}
> +
>  	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
>  	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
>  	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> -- 
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep
  2017-02-17 11:35   ` [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep Mika Kuoppala
@ 2017-02-17 11:43     ` Chris Wilson
  2017-02-17 11:51       ` Chris Wilson
  2017-02-17 12:00       ` Mika Kuoppala
  0 siblings, 2 replies; 15+ messages in thread
From: Chris Wilson @ 2017-02-17 11:43 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Fri, Feb 17, 2017 at 01:35:15PM +0200, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > If the waiter was currently running, assume it hasn't had a chance
> > to process the pending interupt (e.g, low priority task on a loaded
> > system) and wait until it sleeps before declaring a missed interrupt.
> >
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=99816
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++
> >  1 file changed, 9 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > index 4395b177493e..2ad29fb77b2d 100644
> > --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > @@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
> >  		return;
> >  	}
> >  
> > +	/* If the waiter was currently running, assume it hasn't had a chance
> > +	 * to process the pending interupt (e.g, low priority task on a loaded
> > +	 * system) and wait until it sleeps before declaring a missed interrupt.
> > +	 */
> > +	if (!intel_engine_wakeup(engine)) {
> 
> But this will happen if there is waiter running, or that there is
> no waiter at all.
> 
> Don't we need to exclude the latter?

We already exclude the latter by cancelling this timer above.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep
  2017-02-17 11:43     ` Chris Wilson
@ 2017-02-17 11:51       ` Chris Wilson
  2017-02-17 12:00       ` Mika Kuoppala
  1 sibling, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-02-17 11:51 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx, Tvrtko Ursulin

On Fri, Feb 17, 2017 at 11:43:15AM +0000, Chris Wilson wrote:
> On Fri, Feb 17, 2017 at 01:35:15PM +0200, Mika Kuoppala wrote:
> > Chris Wilson <chris@chris-wilson.co.uk> writes:
> > 
> > > If the waiter was currently running, assume it hasn't had a chance
> > > to process the pending interupt (e.g, low priority task on a loaded
> > > system) and wait until it sleeps before declaring a missed interrupt.
> > >
> > > References: https://bugs.freedesktop.org/show_bug.cgi?id=99816
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++
> > >  1 file changed, 9 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > > index 4395b177493e..2ad29fb77b2d 100644
> > > --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > > +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> > > @@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
> > >  		return;
> > >  	}
> > >  
> > > +	/* If the waiter was currently running, assume it hasn't had a chance
> > > +	 * to process the pending interupt (e.g, low priority task on a loaded
> > > +	 * system) and wait until it sleeps before declaring a missed interrupt.
> > > +	 */
> > > +	if (!intel_engine_wakeup(engine)) {
> > 
> > But this will happen if there is waiter running, or that there is
> > no waiter at all.
> > 
> > Don't we need to exclude the latter?
> 
> We already exclude the latter by cancelling this timer above.

Ah, it's not as clear in the current tree as it will be! Here is the
b->irq_enabled check that is equivalent to checking for a waiter.
When there are no more waiters, we set irq_enabled to false and so
cancel the timer.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] drm/i915: Only start with the fake-irq timer if interrupts are dead
  2017-02-17 11:33       ` Tvrtko Ursulin
@ 2017-02-17 11:55         ` Chris Wilson
  2017-02-17 14:59           ` Tvrtko Ursulin
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-02-17 11:55 UTC (permalink / raw)
  To: intel-gfx

As a backup to waiting on a user-interrupt from the GPU, we use a heavy
and frequent timer to wake up the waiting process should we detect an
inconsistency whilst waiting. After seeing a "missed interrupt", the
next time we wait, we restart the heavy timer. This patch is more
reluctant to restart the timer and will only do so if we have not see any
interrupts since when we started the fake irq timer. If we are seeing
interrupts, then the waiters are being woken normally and we had an
incoherency that caused to miss last time - that is unlikely to reoccur
and so taking the risk of stalling again seems pragmatic.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 83a8b67d6427..1719f9da13b8 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -107,6 +107,23 @@ static void irq_disable(struct intel_engine_cs *engine)
 	spin_unlock(&engine->i915->irq_lock);
 }
 
+static bool use_fake_irq(const struct intel_breadcrumbs *b)
+{
+	const struct intel_engine_cs *engine =
+		container_of(b, struct intel_engine_cs, breadcrumbs);
+
+	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
+		return false;
+
+	/* Only start with the heavy weight fake irq timer if we have not
+	 * seen any interrupts since enabling it the first time. If the
+	 * interrupts are still arriving, it means we made a mistake in our
+	 * engine->seqno_barrier(), a timing error that should be transient
+	 * and unlikely to reoccur.
+	 */
+	return atomic_read(&engine->irq_count) == b->hangcheck_interrupts;
+}
+
 static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 {
 	struct intel_engine_cs *engine =
@@ -145,8 +162,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 		b->irq_enabled = true;
 	}
 
-	if (!b->irq_enabled ||
-	    test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
+	if (!b->irq_enabled || use_fake_irq(b)) {
 		mod_timer(&b->fake_irq, jiffies + 1);
 		i915_queue_hangcheck(i915);
 	} else {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep
  2017-02-17 11:43     ` Chris Wilson
  2017-02-17 11:51       ` Chris Wilson
@ 2017-02-17 12:00       ` Mika Kuoppala
  1 sibling, 0 replies; 15+ messages in thread
From: Mika Kuoppala @ 2017-02-17 12:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Fri, Feb 17, 2017 at 01:35:15PM +0200, Mika Kuoppala wrote:
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > If the waiter was currently running, assume it hasn't had a chance
>> > to process the pending interupt (e.g, low priority task on a loaded
>> > system) and wait until it sleeps before declaring a missed interrupt.
>> >
>> > References: https://bugs.freedesktop.org/show_bug.cgi?id=99816
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++
>> >  1 file changed, 9 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
>> > index 4395b177493e..2ad29fb77b2d 100644
>> > --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
>> > +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
>> > @@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
>> >  		return;
>> >  	}
>> >  
>> > +	/* If the waiter was currently running, assume it hasn't had a chance
>> > +	 * to process the pending interupt (e.g, low priority task on a loaded
>> > +	 * system) and wait until it sleeps before declaring a missed interrupt.
>> > +	 */
>> > +	if (!intel_engine_wakeup(engine)) {
>> 
>> But this will happen if there is waiter running, or that there is
>> no waiter at all.
>> 
>> Don't we need to exclude the latter?
>
> We already exclude the latter by cancelling this timer above.

Got it now,
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> -Chris
>
> -- 
> Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.BAT: warning for series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease (rev2)
  2017-02-17 10:18 [PATCH v2 1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease Chris Wilson
                   ` (2 preceding siblings ...)
  2017-02-17 11:22 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease Patchwork
@ 2017-02-17 12:22 ` Patchwork
  3 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-02-17 12:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease (rev2)
URL   : https://patchwork.freedesktop.org/series/19827/
State : warning

== Summary ==

Series 19827v2 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/19827/revisions/2/mbox/

Test drv_module_reload:
        Subgroup basic-reload-final:
                pass       -> DMESG-WARN (fi-skl-6770hq)

fi-bdw-5557u     total:252  pass:241  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:252  pass:213  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:252  pass:233  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:252  pass:225  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:252  pass:202  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:252  pass:235  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:252  pass:230  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:252  pass:241  dwarn:1   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:252  pass:224  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:252  pass:223  dwarn:0   dfail:0   fail:0   skip:29 

c2033e7aa383d062000e024c5fac5f46560327cd drm-tip: 2017y-02m-16d-20h-42m-04s UTC integration manifest
4e3dd8b drm/i915: Only start with the fake-irq timer if interrupts are dead
c2118d9 drm/i915: Break i915_spin_request() if we see an interrupt
db2c9bd drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3876/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Only start with the fake-irq timer if interrupts are dead
  2017-02-17 11:55         ` [PATCH] drm/i915: Only start with the fake-irq timer if interrupts are dead Chris Wilson
@ 2017-02-17 14:59           ` Tvrtko Ursulin
  2017-02-17 22:16             ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-02-17 14:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/02/2017 11:55, Chris Wilson wrote:
> As a backup to waiting on a user-interrupt from the GPU, we use a heavy
> and frequent timer to wake up the waiting process should we detect an
> inconsistency whilst waiting. After seeing a "missed interrupt", the
> next time we wait, we restart the heavy timer. This patch is more
> reluctant to restart the timer and will only do so if we have not see any
> interrupts since when we started the fake irq timer. If we are seeing
> interrupts, then the waiters are being woken normally and we had an
> incoherency that caused to miss last time - that is unlikely to reoccur
> and so taking the risk of stalling again seems pragmatic.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 83a8b67d6427..1719f9da13b8 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -107,6 +107,23 @@ static void irq_disable(struct intel_engine_cs *engine)
>  	spin_unlock(&engine->i915->irq_lock);
>  }
>
> +static bool use_fake_irq(const struct intel_breadcrumbs *b)
> +{
> +	const struct intel_engine_cs *engine =
> +		container_of(b, struct intel_engine_cs, breadcrumbs);
> +
> +	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
> +		return false;
> +
> +	/* Only start with the heavy weight fake irq timer if we have not
> +	 * seen any interrupts since enabling it the first time. If the
> +	 * interrupts are still arriving, it means we made a mistake in our
> +	 * engine->seqno_barrier(), a timing error that should be transient
> +	 * and unlikely to reoccur.
> +	 */
> +	return atomic_read(&engine->irq_count) == b->hangcheck_interrupts;
> +}
> +
>  static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>  {
>  	struct intel_engine_cs *engine =
> @@ -145,8 +162,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>  		b->irq_enabled = true;
>  	}
>
> -	if (!b->irq_enabled ||
> -	    test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
> +	if (!b->irq_enabled || use_fake_irq(b)) {
>  		mod_timer(&b->fake_irq, jiffies + 1);
>  		i915_queue_hangcheck(i915);
>  	} else {
>

Very soothing now that I have discovered the pre-existing behaviour. :)

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Only start with the fake-irq timer if interrupts are dead
  2017-02-17 14:59           ` Tvrtko Ursulin
@ 2017-02-17 22:16             ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-02-17 22:16 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Feb 17, 2017 at 02:59:44PM +0000, Tvrtko Ursulin wrote:
> 
> On 17/02/2017 11:55, Chris Wilson wrote:
> >As a backup to waiting on a user-interrupt from the GPU, we use a heavy
> >and frequent timer to wake up the waiting process should we detect an
> >inconsistency whilst waiting. After seeing a "missed interrupt", the
> >next time we wait, we restart the heavy timer. This patch is more
> >reluctant to restart the timer and will only do so if we have not see any
> >interrupts since when we started the fake irq timer. If we are seeing
> >interrupts, then the waiters are being woken normally and we had an
> >incoherency that caused to miss last time - that is unlikely to reoccur
> >and so taking the risk of stalling again seems pragmatic.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> Very soothing now that I have discovered the pre-existing behaviour. :)
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Thanks both of you for the review and suggestions, pushed to try and
soothe CI even more.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-02-17 22:16 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-17 10:18 [PATCH v2 1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease Chris Wilson
2017-02-17 10:18 ` [PATCH v2 2/3] drm/i915: Break i915_spin_request() if we see an interrupt Chris Wilson
2017-02-17 10:18 ` [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep Chris Wilson
2017-02-17 10:48   ` Tvrtko Ursulin
2017-02-17 10:58     ` Chris Wilson
2017-02-17 11:33       ` Tvrtko Ursulin
2017-02-17 11:55         ` [PATCH] drm/i915: Only start with the fake-irq timer if interrupts are dead Chris Wilson
2017-02-17 14:59           ` Tvrtko Ursulin
2017-02-17 22:16             ` Chris Wilson
2017-02-17 11:35   ` [PATCH v2 3/3] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep Mika Kuoppala
2017-02-17 11:43     ` Chris Wilson
2017-02-17 11:51       ` Chris Wilson
2017-02-17 12:00       ` Mika Kuoppala
2017-02-17 11:22 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease Patchwork
2017-02-17 12:22 ` ✗ Fi.CI.BAT: warning for series starting with [v2,1/3] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.