All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts
@ 2013-08-07  1:03 Chris Wilson
  2013-08-13 13:36 ` Rodrigo Vivi
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2013-08-07  1:03 UTC (permalink / raw)
  To: intel-gfx

Rather than continue to fix up the timeouts to work around the interface
impedence in wait_event_*(), open code the combination of
wait_event[_interruptible][_timeout]. And note the code size reduction,
and dare say readability?, in doing so..

Food for thought.
---
 drivers/gpu/drm/i915/i915_gem.c | 82 ++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99ba622..8adbce9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1088,23 +1088,18 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 			bool interruptible, struct timespec *timeout)
 {
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	struct timespec before, now, wait_time={1,0};
-	unsigned long timeout_jiffies;
-	long end;
-	bool wait_forever = true;
+	struct timespec before, now;
+	DEFINE_WAIT(__wait);
+	long timeout_jiffies;
 	int ret;
 
 	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
 		return 0;
 
-	trace_i915_gem_request_wait_begin(ring, seqno);
-
-	if (timeout != NULL) {
-		wait_time = *timeout;
-		wait_forever = false;
-	}
+	if (timeout)
+		timeout_jiffies = timespec_to_jiffies_timeout(timeout);
 
-	timeout_jiffies = timespec_to_jiffies_timeout(&wait_time);
+	trace_i915_gem_request_wait_begin(ring, seqno);
 
 	if (WARN_ON(!ring->irq_get(ring)))
 		return -ENODEV;
@@ -1112,36 +1107,47 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 	/* Record current time in case interrupted by signal, or wedged * */
 	getrawmonotonic(&before);
 
-#define EXIT_COND \
-	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
-	 i915_reset_in_progress(&dev_priv->gpu_error) || \
-	 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
-	do {
-		if (interruptible)
-			end = wait_event_interruptible_timeout(ring->irq_queue,
-							       EXIT_COND,
-							       timeout_jiffies);
-		else
-			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
-						 timeout_jiffies);
+	for (;;) {
+		prepare_to_wait(&ring->irq_queue, &__wait,
+				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
 
 		/* We need to check whether any gpu reset happened in between
 		 * the caller grabbing the seqno and now ... */
-		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
-			end = -EAGAIN;
+		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
+			/* ... but upgrade the -EGAIN to an -EIO if the gpu
+			 * is truely gone. */
+			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
+			if (ret == 0)
+				ret = -EAGAIN;
+			break;
+		}
 
-		/* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
-		 * gone. */
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-		if (ret)
-			end = ret;
-	} while (end == 0 && wait_forever);
+		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
+			ret = 0;
+			break;
+		}
+
+		if (interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (timeout) {
+			if (timeout_jiffies == 0) {
+				ret = -ETIME;
+				break;
+			}
+
+			timeout_jiffies = schedule_timeout(timeout_jiffies);
+		} else
+			schedule();
+	}
+	finish_wait(&ring->irq_queue, &__wait);
 
 	getrawmonotonic(&now);
 
 	ring->irq_put(ring);
 	trace_i915_gem_request_wait_end(ring, seqno);
-#undef EXIT_COND
 
 	if (timeout) {
 		struct timespec sleep_time = timespec_sub(now, before);
@@ -1150,17 +1156,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 			set_normalized_timespec(timeout, 0, 0);
 	}
 
-	switch (end) {
-	case -EIO:
-	case -EAGAIN: /* Wedged */
-	case -ERESTARTSYS: /* Signal */
-		return (int)end;
-	case 0: /* Timeout */
-		return -ETIME;
-	default: /* Completed */
-		WARN_ON(end < 0); /* We're not aware of other errors */
-		return 0;
-	}
+	return ret;
 }
 
 /**
-- 
1.8.4.rc1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts
  2013-08-07  1:03 [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts Chris Wilson
@ 2013-08-13 13:36 ` Rodrigo Vivi
  2013-08-13 13:39   ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Rodrigo Vivi @ 2013-08-13 13:36 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

applied to experimental drm-intel-collector.

drivers/gpu/drm/i915/i915_gem.c: In function ‘__wait_seqno’:
drivers/gpu/drm/i915/i915_gem.c:1033:20: warning: ‘timeout_jiffies’ may be used
+uninitialized in this function



On Tue, Aug 6, 2013 at 10:03 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Rather than continue to fix up the timeouts to work around the interface
> impedence in wait_event_*(), open code the combination of
> wait_event[_interruptible][_timeout]. And note the code size reduction,
> and dare say readability?, in doing so..
>
> Food for thought.
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 82 ++++++++++++++++++++---------------------
>  1 file changed, 39 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 99ba622..8adbce9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1088,23 +1088,18 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>                         bool interruptible, struct timespec *timeout)
>  {
>         drm_i915_private_t *dev_priv = ring->dev->dev_private;
> -       struct timespec before, now, wait_time={1,0};
> -       unsigned long timeout_jiffies;
> -       long end;
> -       bool wait_forever = true;
> +       struct timespec before, now;
> +       DEFINE_WAIT(__wait);
> +       long timeout_jiffies;
>         int ret;
>
>         if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
>                 return 0;
>
> -       trace_i915_gem_request_wait_begin(ring, seqno);
> -
> -       if (timeout != NULL) {
> -               wait_time = *timeout;
> -               wait_forever = false;
> -       }
> +       if (timeout)
> +               timeout_jiffies = timespec_to_jiffies_timeout(timeout);
>
> -       timeout_jiffies = timespec_to_jiffies_timeout(&wait_time);
> +       trace_i915_gem_request_wait_begin(ring, seqno);
>
>         if (WARN_ON(!ring->irq_get(ring)))
>                 return -ENODEV;
> @@ -1112,36 +1107,47 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>         /* Record current time in case interrupted by signal, or wedged * */
>         getrawmonotonic(&before);
>
> -#define EXIT_COND \
> -       (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
> -        i915_reset_in_progress(&dev_priv->gpu_error) || \
> -        reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
> -       do {
> -               if (interruptible)
> -                       end = wait_event_interruptible_timeout(ring->irq_queue,
> -                                                              EXIT_COND,
> -                                                              timeout_jiffies);
> -               else
> -                       end = wait_event_timeout(ring->irq_queue, EXIT_COND,
> -                                                timeout_jiffies);
> +       for (;;) {
> +               prepare_to_wait(&ring->irq_queue, &__wait,
> +                               interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
>
>                 /* We need to check whether any gpu reset happened in between
>                  * the caller grabbing the seqno and now ... */
> -               if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
> -                       end = -EAGAIN;
> +               if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
> +                       /* ... but upgrade the -EGAIN to an -EIO if the gpu
> +                        * is truely gone. */
> +                       ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
> +                       if (ret == 0)
> +                               ret = -EAGAIN;
> +                       break;
> +               }
>
> -               /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
> -                * gone. */
> -               ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
> -               if (ret)
> -                       end = ret;
> -       } while (end == 0 && wait_forever);
> +               if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
> +                       ret = 0;
> +                       break;
> +               }
> +
> +               if (interruptible && signal_pending(current)) {
> +                       ret = -ERESTARTSYS;
> +                       break;
> +               }
> +
> +               if (timeout) {
> +                       if (timeout_jiffies == 0) {
> +                               ret = -ETIME;
> +                               break;
> +                       }
> +
> +                       timeout_jiffies = schedule_timeout(timeout_jiffies);
> +               } else
> +                       schedule();
> +       }
> +       finish_wait(&ring->irq_queue, &__wait);
>
>         getrawmonotonic(&now);
>
>         ring->irq_put(ring);
>         trace_i915_gem_request_wait_end(ring, seqno);
> -#undef EXIT_COND
>
>         if (timeout) {
>                 struct timespec sleep_time = timespec_sub(now, before);
> @@ -1150,17 +1156,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>                         set_normalized_timespec(timeout, 0, 0);
>         }
>
> -       switch (end) {
> -       case -EIO:
> -       case -EAGAIN: /* Wedged */
> -       case -ERESTARTSYS: /* Signal */
> -               return (int)end;
> -       case 0: /* Timeout */
> -               return -ETIME;
> -       default: /* Completed */
> -               WARN_ON(end < 0); /* We're not aware of other errors */
> -               return 0;
> -       }
> +       return ret;
>  }
>
>  /**
> --
> 1.8.4.rc1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx



-- 
Rodrigo Vivi
Blog: http://blog.vivi.eng.br

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts
  2013-08-13 13:36 ` Rodrigo Vivi
@ 2013-08-13 13:39   ` Chris Wilson
  2013-08-13 14:04     ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2013-08-13 13:39 UTC (permalink / raw)
  To: Rodrigo Vivi; +Cc: intel-gfx

On Tue, Aug 13, 2013 at 10:36:04AM -0300, Rodrigo Vivi wrote:
> applied to experimental drm-intel-collector.
> 
> drivers/gpu/drm/i915/i915_gem.c: In function ‘__wait_seqno’:
> drivers/gpu/drm/i915/i915_gem.c:1033:20: warning: ‘timeout_jiffies’ may be used
> +uninitialized in this function

Dumb gcc.

Note the lack of sign-off. Ben would hate this patch since it may unmask
simulator bugs.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts
  2013-08-13 13:39   ` Chris Wilson
@ 2013-08-13 14:04     ` Daniel Vetter
  0 siblings, 0 replies; 9+ messages in thread
From: Daniel Vetter @ 2013-08-13 14:04 UTC (permalink / raw)
  To: Chris Wilson, Rodrigo Vivi, intel-gfx

On Tue, Aug 13, 2013 at 02:39:55PM +0100, Chris Wilson wrote:
> On Tue, Aug 13, 2013 at 10:36:04AM -0300, Rodrigo Vivi wrote:
> > applied to experimental drm-intel-collector.
> > 
> > drivers/gpu/drm/i915/i915_gem.c: In function ‘__wait_seqno’:
> > drivers/gpu/drm/i915/i915_gem.c:1033:20: warning: ‘timeout_jiffies’ may be used
> > +uninitialized in this function
> 
> Dumb gcc.
> 
> Note the lack of sign-off. Ben would hate this patch since it may unmask
> simulator bugs.

Hm, I'd prefer it with sob line since I very much want to again have good
reporting on missed interrupts (instead of silently papering over them).
If the simulator is broken we need to apply special duct-tape for it
(maybe in the form of a timer that regularly checks the seqno), since 1s
delay is simply too slow, even for simulators.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts
@ 2013-08-23 11:12 Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2013-08-23 11:12 UTC (permalink / raw)
  To: intel-gfx

When we switched to always using a timeout in conjunction with
wait_seqno, we lost the ability to detect missed interrupts. Since, we
have had issues with interrupts on a number of generations, and they are
required to be delivered in a timely fashion for a smooth UX, it is
important that we do log errors found in the wild and prevent the
display stalling for upwards of 1s every time the seqno interrupt is
missed.

Rather than continue to fix up the timeouts to work around the interface
impedence in wait_event_*(), open code the combination of
wait_event[_interruptible][_timeout], and use the exposed timer to
poll for seqno should we detect a lost interrupt.

v2: In order to satisfy the debug requirement of logging missed
interrupts with the real world requirments of making machines work even
if interrupts are hosed, we revert to polling after detecting a missed
interrupt.

v3: Throw in a debugfs interface to simulate broken hw not reporting
interrupts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  68 ++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h       |   5 ++
 drivers/gpu/drm/i915/i915_gem.c       | 114 ++++++++++++++++++++--------------
 drivers/gpu/drm/i915/i915_gpu_error.c |   1 +
 drivers/gpu/drm/i915/i915_irq.c       |  11 ++--
 5 files changed, 148 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8356254..320fb03 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1870,6 +1870,72 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_stop_fops,
 			i915_ring_stop_get, i915_ring_stop_set,
 			"0x%08llx\n");
 
+static int
+i915_ring_missed_irq_get(void *data, u64 *val)
+{
+	struct drm_device *dev = data;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+
+	*val = dev_priv->gpu_error.missed_irq_rings;
+	return 0;
+}
+
+static int
+i915_ring_missed_irq_set(void *data, u64 val)
+{
+	struct drm_device *dev = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	/* Lock against concurrent debugfs callers */
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+	dev_priv->gpu_error.missed_irq_rings = val;
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops,
+			i915_ring_missed_irq_get, i915_ring_missed_irq_set,
+			"0x%08llx\n");
+
+static int
+i915_ring_test_irq_get(void *data, u64 *val)
+{
+	struct drm_device *dev = data;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+
+	*val = dev_priv->gpu_error.test_irq_rings;
+
+	return 0;
+}
+
+static int
+i915_ring_test_irq_set(void *data, u64 val)
+{
+	struct drm_device *dev = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val);
+
+	/* Lock against concurrent debugfs callers */
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	dev_priv->gpu_error.test_irq_rings = val;
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
+			i915_ring_test_irq_get, i915_ring_test_irq_set,
+			"0x%08llx\n");
+
 #define DROP_UNBOUND 0x1
 #define DROP_BOUND 0x2
 #define DROP_RETIRE 0x4
@@ -2263,6 +2329,8 @@ static struct i915_debugfs_files {
 	{"i915_min_freq", &i915_min_freq_fops},
 	{"i915_cache_sharing", &i915_cache_sharing_fops},
 	{"i915_ring_stop", &i915_ring_stop_fops},
+	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
+	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
 	{"i915_error_state", &i915_error_state_fops},
 	{"i915_next_seqno", &i915_next_seqno_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 321159f..45c7790 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -994,6 +994,8 @@ struct i915_gpu_error {
 
 	unsigned long last_reset;
 
+	unsigned long missed_irq_rings;
+
 	/**
 	 * State variable and reset counter controlling the reset flow
 	 *
@@ -1032,6 +1034,9 @@ struct i915_gpu_error {
 
 	/* For gpu hang simulation. */
 	unsigned int stop_rings;
+
+	/* For missed irq/seqno simulation. */
+	unsigned int test_irq_rings;
 };
 
 enum modeset_restore {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4f1ca12..a8034c0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1067,6 +1067,17 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 	return ret;
 }
 
+static void fake_irq(unsigned long data)
+{
+	wake_up_process((struct task_struct *)data);
+}
+
+static bool missed_irq(struct drm_i915_private *dev_priv,
+		       struct intel_ring_buffer *ring)
+{
+	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
+}
+
 /**
  * __wait_seqno - wait until execution of seqno has finished
  * @ring: the ring expected to report seqno
@@ -1090,10 +1101,9 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 			bool interruptible, struct timespec *timeout)
 {
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	struct timespec before, now, wait_time={1,0};
-	unsigned long timeout_jiffies;
-	long end;
-	bool wait_forever = true;
+	struct timespec before, now;
+	DEFINE_WAIT(wait);
+	long timeout_jiffies;
 	int ret;
 
 	WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n");
@@ -1101,51 +1111,71 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
 		return 0;
 
-	trace_i915_gem_request_wait_begin(ring, seqno);
-
-	if (timeout != NULL) {
-		wait_time = *timeout;
-		wait_forever = false;
-	}
-
-	timeout_jiffies = timespec_to_jiffies_timeout(&wait_time);
+	timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1;
 
-	if (WARN_ON(!ring->irq_get(ring)))
+	if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)) &&
+	    WARN_ON(!ring->irq_get(ring)))
 		return -ENODEV;
 
-	/* Record current time in case interrupted by signal, or wedged * */
+	/* Record current time in case interrupted by signal, or wedged */
+	trace_i915_gem_request_wait_begin(ring, seqno);
 	getrawmonotonic(&before);
+	for (;;) {
+		struct timer_list timer;
+		unsigned long expire;
 
-#define EXIT_COND \
-	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
-	 i915_reset_in_progress(&dev_priv->gpu_error) || \
-	 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
-	do {
-		if (interruptible)
-			end = wait_event_interruptible_timeout(ring->irq_queue,
-							       EXIT_COND,
-							       timeout_jiffies);
-		else
-			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
-						 timeout_jiffies);
+		prepare_to_wait(&ring->irq_queue, &wait,
+				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
 
 		/* We need to check whether any gpu reset happened in between
 		 * the caller grabbing the seqno and now ... */
-		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
-			end = -EAGAIN;
+		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
+			/* ... but upgrade the -EGAIN to an -EIO if the gpu
+			 * is truely gone. */
+			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
+			if (ret == 0)
+				ret = -EAGAIN;
+			break;
+		}
 
-		/* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
-		 * gone. */
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-		if (ret)
-			end = ret;
-	} while (end == 0 && wait_forever);
+		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
+			ret = 0;
+			break;
+		}
+
+		if (interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (timeout_jiffies <= 0) {
+			ret = -ETIME;
+			break;
+		}
 
+		timer.function = NULL;
+		if (timeout || missed_irq(dev_priv, ring)) {
+			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
+			expire = jiffies + (missed_irq(dev_priv, ring) ? 1: timeout_jiffies);
+			mod_timer(&timer, expire);
+		}
+
+		schedule();
+
+		if (timeout)
+			timeout_jiffies = expire - jiffies;
+
+		if (timer.function) {
+			del_singleshot_timer_sync(&timer);
+			destroy_timer_on_stack(&timer);
+		}
+	}
 	getrawmonotonic(&now);
+	trace_i915_gem_request_wait_end(ring, seqno);
 
 	ring->irq_put(ring);
-	trace_i915_gem_request_wait_end(ring, seqno);
-#undef EXIT_COND
+
+	finish_wait(&ring->irq_queue, &wait);
 
 	if (timeout) {
 		struct timespec sleep_time = timespec_sub(now, before);
@@ -1154,17 +1184,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 			set_normalized_timespec(timeout, 0, 0);
 	}
 
-	switch (end) {
-	case -EIO:
-	case -EAGAIN: /* Wedged */
-	case -ERESTARTSYS: /* Signal */
-		return (int)end;
-	case 0: /* Timeout */
-		return -ETIME;
-	default: /* Completed */
-		WARN_ON(end < 0); /* We're not aware of other errors */
-		return 0;
-	}
+	return ret;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index fe47274..e8955e7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -289,6 +289,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
 	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
 	err_printf(m, "CCID: 0x%08x\n", error->ccid);
+	err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings);
 
 	for (i = 0; i < dev_priv->num_fence_regs; i++)
 		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b7532cb..d268b6d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1992,10 +1992,13 @@ static void i915_hangcheck_elapsed(unsigned long data)
 			if (ring_idle(ring, seqno)) {
 				if (waitqueue_active(&ring->irq_queue)) {
 					/* Issue a wake-up to catch stuck h/w. */
-					DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
-						  ring->name);
-					wake_up_all(&ring->irq_queue);
-					ring->hangcheck.score += HUNG;
+					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
+						DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
+							  ring->name);
+						wake_up_all(&ring->irq_queue);
+					}
+					/* Safeguard against driver failure */
+					ring->hangcheck.score += BUSY;
 				} else
 					busy = false;
 			} else {
-- 
1.8.4.rc3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts
  2013-08-23  8:06   ` Chris Wilson
@ 2013-08-23  8:37     ` Daniel Vetter
  0 siblings, 0 replies; 9+ messages in thread
From: Daniel Vetter @ 2013-08-23  8:37 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Fri, Aug 23, 2013 at 10:06 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> Yeah, I think this approach should work. A few comments:
>> - I think we need a debugfs file to shut the safety quirk off - when
>> testing on a machine where we actually miss interrupts it might be
>> usful to get the warning output every time.
>
> I did consider a modparam. Exposing it would indeed necessitate some
> protection against concurrent modificatin.
>
>> - I'd go for a new bool dev_priv->unreliable_seqno_signalling or so to
>> avoid any races due to the rmw cycle you now do on dev_priv->quirks.
>
> There isn't a race during writing, as hangcheck should never be run
> concurrently (or at least any concurrent calls filtered out at the start
> of the function). The read side is inherently racey.

It's more that thus far we've used dev_priv->quirks only for stuff
which never changes, now we have a quirk which gets only set at
runtime. It just feels conceptually wrong ;-) And if we add a 2nd such
quirk it'll break a bit, hence why I'd prefer a distinct piece of
state tracking

>> - I'd have opted for a faster timeout of the fake irq, but one that rearms.
>
> Whoops, that was a mistake. The intention was to run at 100Hz, do you
> want even faster? We could switch to a hrtimer and kill two birds with
> one stone (as timer is singleshot only).

I'd simply go with a 1 jiffies rearming timer.

>> Also I'd love to be able to test all this (both the missed irq
>> detection stuff and the fake irq) but I don't have a good idea right
>> now ... So I guess we need to again hope that it won't break too
>> quickly (since it eventually will break again).
>
> Disable the call to ring->get_irq. Perhaps the high word of
> dev_priv->stop_rings?

Yeah, something like stop_ring_irqs or so could work, maybe by
wrapping the wake_up_all calls into a little helper like wake_up_ring
which noops if the respective bit is set in stop_ring_irqs. But I'm
not sure whether this is worth the fuzz. Otoh we've just proven that
for untested code the question isn't if it breaks, but when ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts
  2013-08-23  7:27 ` Daniel Vetter
@ 2013-08-23  8:06   ` Chris Wilson
  2013-08-23  8:37     ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2013-08-23  8:06 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, Aug 23, 2013 at 09:27:42AM +0200, Daniel Vetter wrote:
> On Fri, Aug 23, 2013 at 3:05 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > Rather than continue to fix up the timeouts to work around the interface
> > impedence in wait_event_*(), open code the combination of
> > wait_event[_interruptible][_timeout]. And note the code size reduction,
> > and dare say readability?, in doing so..
> >
> > v2: In order to satisfy the debug requirement of logging missed
> > interrupts with the real world requirments of making machines work even
> > if interrupts are hosed, we revert to polling after detecting a missed
> > interrupt.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> 
> Yeah, I think this approach should work. A few comments:
> - I think we need a debugfs file to shut the safety quirk off - when
> testing on a machine where we actually miss interrupts it might be
> usful to get the warning output every time.

I did consider a modparam. Exposing it would indeed necessitate some
protection against concurrent modificatin.

> - I'd go for a new bool dev_priv->unreliable_seqno_signalling or so to
> avoid any races due to the rmw cycle you now do on dev_priv->quirks.

There isn't a race during writing, as hangcheck should never be run
concurrently (or at least any concurrent calls filtered out at the start
of the function). The read side is inherently racey.

> - I'd have opted for a faster timeout of the fake irq, but one that rearms.

Whoops, that was a mistake. The intention was to run at 100Hz, do you
want even faster? We could switch to a hrtimer and kill two birds with
one stone (as timer is singleshot only).
 
> Also I'd love to be able to test all this (both the missed irq
> detection stuff and the fake irq) but I don't have a good idea right
> now ... So I guess we need to again hope that it won't break too
> quickly (since it eventually will break again).

Disable the call to ring->get_irq. Perhaps the high word of
dev_priv->stop_rings?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts
  2013-08-23  1:05 Chris Wilson
@ 2013-08-23  7:27 ` Daniel Vetter
  2013-08-23  8:06   ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Vetter @ 2013-08-23  7:27 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Aug 23, 2013 at 3:05 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Rather than continue to fix up the timeouts to work around the interface
> impedence in wait_event_*(), open code the combination of
> wait_event[_interruptible][_timeout]. And note the code size reduction,
> and dare say readability?, in doing so..
>
> v2: In order to satisfy the debug requirement of logging missed
> interrupts with the real world requirments of making machines work even
> if interrupts are hosed, we revert to polling after detecting a missed
> interrupt.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>


Yeah, I think this approach should work. A few comments:
- I think we need a debugfs file to shut the safety quirk off - when
testing on a machine where we actually miss interrupts it might be
usful to get the warning output every time.
- I'd go for a new bool dev_priv->unreliable_seqno_signalling or so to
avoid any races due to the rmw cycle you now do on dev_priv->quirks.
- I'd have opted for a faster timeout of the fake irq, but one that rearms.

Also I'd love to be able to test all this (both the missed irq
detection stuff and the fake irq) but I don't have a good idea right
now ... So I guess we need to again hope that it won't break too
quickly (since it eventually will break again).
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts
@ 2013-08-23  1:05 Chris Wilson
  2013-08-23  7:27 ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2013-08-23  1:05 UTC (permalink / raw)
  To: intel-gfx

Rather than continue to fix up the timeouts to work around the interface
impedence in wait_event_*(), open code the combination of
wait_event[_interruptible][_timeout]. And note the code size reduction,
and dare say readability?, in doing so..

v2: In order to satisfy the debug requirement of logging missed
interrupts with the real world requirments of making machines work even
if interrupts are hosed, we revert to polling after detecting a missed
interrupt.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h |   1 +
 drivers/gpu/drm/i915/i915_gem.c | 103 ++++++++++++++++++++++------------------
 drivers/gpu/drm/i915/i915_irq.c |   8 ++--
 3 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 321159f..ad298f4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -658,6 +658,7 @@ enum intel_sbi_destination {
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
 #define QUIRK_NO_PCH_PWM_ENABLE (1<<3)
+#define QUIRK_MISSED_SEQNO_INTERRUPTS (1<<4)
 
 struct intel_fbdev;
 struct intel_fbc_work;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4f1ca12..b265def 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1067,6 +1067,11 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 	return ret;
 }
 
+static void fake_irq(unsigned long data)
+{
+	wake_up((wait_queue_head_t *)data);
+}
+
 /**
  * __wait_seqno - wait until execution of seqno has finished
  * @ring: the ring expected to report seqno
@@ -1090,10 +1095,9 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 			bool interruptible, struct timespec *timeout)
 {
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	struct timespec before, now, wait_time={1,0};
-	unsigned long timeout_jiffies;
-	long end;
-	bool wait_forever = true;
+	struct timespec before, now;
+	DEFINE_WAIT(wait);
+	long timeout_jiffies;
 	int ret;
 
 	WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n");
@@ -1101,51 +1105,68 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
 		return 0;
 
-	trace_i915_gem_request_wait_begin(ring, seqno);
-
-	if (timeout != NULL) {
-		wait_time = *timeout;
-		wait_forever = false;
-	}
-
-	timeout_jiffies = timespec_to_jiffies_timeout(&wait_time);
+	timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 0;
 
 	if (WARN_ON(!ring->irq_get(ring)))
 		return -ENODEV;
 
-	/* Record current time in case interrupted by signal, or wedged * */
+	/* Record current time in case interrupted by signal, or wedged */
+	trace_i915_gem_request_wait_begin(ring, seqno);
 	getrawmonotonic(&before);
+	for (;;) {
+		struct timer_list timer;
 
-#define EXIT_COND \
-	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
-	 i915_reset_in_progress(&dev_priv->gpu_error) || \
-	 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
-	do {
-		if (interruptible)
-			end = wait_event_interruptible_timeout(ring->irq_queue,
-							       EXIT_COND,
-							       timeout_jiffies);
-		else
-			end = wait_event_timeout(ring->irq_queue, EXIT_COND,
-						 timeout_jiffies);
+		prepare_to_wait(&ring->irq_queue, &wait,
+				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
 
 		/* We need to check whether any gpu reset happened in between
 		 * the caller grabbing the seqno and now ... */
-		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter))
-			end = -EAGAIN;
+		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
+			/* ... but upgrade the -EGAIN to an -EIO if the gpu
+			 * is truely gone. */
+			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
+			if (ret == 0)
+				ret = -EAGAIN;
+			break;
+		}
 
-		/* ... but upgrade the -EGAIN to an -EIO if the gpu is truely
-		 * gone. */
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-		if (ret)
-			end = ret;
-	} while (end == 0 && wait_forever);
+		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
+			ret = 0;
+			break;
+		}
 
+		if (interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		timer.function = NULL;
+		if (dev_priv->quirks & QUIRK_MISSED_SEQNO_INTERRUPTS) {
+			setup_timer_on_stack(&timer, fake_irq, (unsigned long)&ring->irq_queue);
+			mod_timer(&timer, round_jiffies_up_relative(HZ/100));
+		}
+
+		if (timeout) {
+			if (timeout_jiffies == 0) {
+				ret = -ETIME;
+				break;
+			}
+
+			timeout_jiffies = schedule_timeout(timeout_jiffies);
+		} else
+			schedule();
+
+		if (timer.function) {
+			del_singleshot_timer_sync(&timer);
+			destroy_timer_on_stack(&timer);
+		}
+	}
 	getrawmonotonic(&now);
+	trace_i915_gem_request_wait_end(ring, seqno);
 
 	ring->irq_put(ring);
-	trace_i915_gem_request_wait_end(ring, seqno);
-#undef EXIT_COND
+
+	finish_wait(&ring->irq_queue, &wait);
 
 	if (timeout) {
 		struct timespec sleep_time = timespec_sub(now, before);
@@ -1154,17 +1175,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 			set_normalized_timespec(timeout, 0, 0);
 	}
 
-	switch (end) {
-	case -EIO:
-	case -EAGAIN: /* Wedged */
-	case -ERESTARTSYS: /* Signal */
-		return (int)end;
-	case 0: /* Timeout */
-		return -ETIME;
-	default: /* Completed */
-		WARN_ON(end < 0); /* We're not aware of other errors */
-		return 0;
-	}
+	return ret;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b7532cb..fefb87a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1992,10 +1992,12 @@ static void i915_hangcheck_elapsed(unsigned long data)
 			if (ring_idle(ring, seqno)) {
 				if (waitqueue_active(&ring->irq_queue)) {
 					/* Issue a wake-up to catch stuck h/w. */
-					DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
-						  ring->name);
+					if ((dev_priv->quirks & QUIRK_MISSED_SEQNO_INTERRUPTS) == 0) {
+						DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
+							  ring->name);
+						dev_priv->quirks |= QUIRK_MISSED_SEQNO_INTERRUPTS;
+					}
 					wake_up_all(&ring->irq_queue);
-					ring->hangcheck.score += HUNG;
 				} else
 					busy = false;
 			} else {
-- 
1.8.4.rc3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-08-23 11:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-07  1:03 [PATCH] drm/i915: Fix __wait_seqno to use true infinite timeouts Chris Wilson
2013-08-13 13:36 ` Rodrigo Vivi
2013-08-13 13:39   ` Chris Wilson
2013-08-13 14:04     ` Daniel Vetter
2013-08-23  1:05 Chris Wilson
2013-08-23  7:27 ` Daniel Vetter
2013-08-23  8:06   ` Chris Wilson
2013-08-23  8:37     ` Daniel Vetter
2013-08-23 11:12 Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.