All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/5] drm/i915: Harden detection of missed interrupts
Date: Tue, 16 Feb 2016 14:51:48 +0200	[thread overview]
Message-ID: <877fi4rf6j.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <1455623268-10023-4-git-send-email-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Only declare a missed interrupt if we find that the GPU is idle with
> waiters and a hangcheck interval has passed in which no new user
> interrupts have been raised.
>
> v2: Clear the stuck interrupt marker between successful batches
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     | 11 +++++++----
>  drivers/gpu/drm/i915/i915_irq.c         | 10 +++++++++-
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
>  3 files changed, 18 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c4df580ed0de..f3ba97ad3e00 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -730,10 +730,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
>  static void i915_ring_seqno_info(struct seq_file *m,
>  				 struct intel_engine_cs *ring)
>  {
> -	if (ring->get_seqno) {
> -		seq_printf(m, "Current sequence (%s): %x\n",
> -			   ring->name, ring->get_seqno(ring));
> -	}
> +	seq_printf(m, "Current sequence (%s): %x\n",
> +		   ring->name, ring->get_seqno(ring));
> +	seq_printf(m, "Current user interrupts (%s): %x\n",
> +		   ring->name, READ_ONCE(ring->user_interrupts));
>  }
>  
>  static int i915_gem_seqno_info(struct seq_file *m, void *data)
> @@ -1361,6 +1361,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  		seq_printf(m, "%s:\n", ring->name);
>  		seq_printf(m, "\tseqno = %x [current %x]\n",
>  			   ring->hangcheck.seqno, seqno[i]);
> +		seq_printf(m, "\tuser interrupts = %x [current %x]\n",
> +			   ring->hangcheck.user_interrupts,
> +			   ring->user_interrupts);
>  		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
>  			   (long long)ring->hangcheck.acthd,
>  			   (long long)acthd[i]);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 07bc2cdd6252..c0aeff607130 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1000,6 +1000,7 @@ static void notify_ring(struct intel_engine_cs *ring)
>  		return;
>  
>  	trace_i915_gem_request_notify(ring);
> +	ring->user_interrupts++;
>  
>  	wake_up_all(&ring->irq_queue);
>  }
> @@ -3097,6 +3098,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>  	for_each_ring(ring, dev_priv, i) {
>  		u64 acthd;
>  		u32 seqno;
> +		unsigned user_interrupts;
>  		bool busy = true;
>  
>  		semaphore_clear_deadlocks(dev_priv);
> @@ -3113,6 +3115,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>  
>  		acthd = intel_ring_get_active_head(ring);
>  		seqno = ring->get_seqno(ring);
> +		user_interrupts = READ_ONCE(ring->user_interrupts);
>  
>  		if (ring->hangcheck.seqno == seqno) {
>  			if (ring_idle(ring, seqno)) {
> @@ -3120,7 +3123,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>  
>  				if (waitqueue_active(&ring->irq_queue)) {
>  					/* Issue a wake-up to catch stuck h/w. */
> -					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
> +					if (ring->hangcheck.user_interrupts == user_interrupts &&
> +					    !test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
>  						if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
>  							DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
>  								  ring->name);
> @@ -3183,10 +3187,14 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>  
>  			memset(ring->hangcheck.instdone, 0,
>  			       sizeof(ring->hangcheck.instdone));
> +
> +			/* Reset stuck interrupts between batch advances */
> +			user_interrupts = 0;
>  		}
>  
>  		ring->hangcheck.seqno = seqno;
>  		ring->hangcheck.acthd = acthd;
> +		ring->hangcheck.user_interrupts = user_interrupts;
>  		busy_count += busy;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 4cea04491392..dfb14bfe5bc8 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -90,6 +90,7 @@ struct intel_ring_hangcheck {
>  	u64 acthd;
>  	u64 max_acthd;
>  	u32 seqno;
> +	unsigned user_interrupts;
>  	int score;
>  	enum intel_ring_hangcheck_action action;
>  	int deadlock;
> @@ -306,6 +307,7 @@ struct  intel_engine_cs {
>  	 * inspecting request list.
>  	 */
>  	u32 last_submitted_seqno;
> +	unsigned user_interrupts;
>  
>  	bool gpu_caches_dirty;
>  
> -- 
> 2.7.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2016-02-16 12:53 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-16 11:47 Missed interrupt false positives mitigation Chris Wilson
2016-02-16 11:47 ` [PATCH 1/5] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ Chris Wilson
2016-02-16 11:47 ` [PATCH 2/5] drm/i915: Separate out the seqno-barrier from engine->get_seqno Chris Wilson
2016-02-16 12:51   ` Mika Kuoppala
2016-02-16 11:47 ` [PATCH 3/5] drm/i915: Harden detection of missed interrupts Chris Wilson
2016-02-16 12:51   ` Mika Kuoppala [this message]
2016-02-16 11:47 ` [PATCH 4/5] drm/i915: Use simplest form for flushing the single cacheline in the HWS Chris Wilson
2016-02-16 12:58   ` Mika Kuoppala
2016-02-19 11:49     ` Chris Wilson
2016-02-19 13:42   ` Mika Kuoppala
2016-02-16 11:47 ` [PATCH 5/5] drm/i915: Replace manual barrier() with READ_ONCE() in HWS accessor Chris Wilson
2016-02-16 12:14 ` ✗ Fi.CI.BAT: warning for series starting with [1/5] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=877fi4rf6j.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.