All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/8] drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
Date: Tue, 15 Jan 2019 13:56:11 +0200	[thread overview]
Message-ID: <878szmyw10.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190114210408.4561-2-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more
> consistently if called concurrently.

More is needed in here. The purpose is to make them wait in turns
on top of mutex, instead of racing on the bit? Where is
the inconsistency tho.

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c               | 32 ++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gpu_error.h         |  4 ++-
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
>  3 files changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0bfed33178e1..910c49befc50 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3173,10 +3173,15 @@ static void nop_submit_request(struct i915_request *request)
>  
>  void i915_gem_set_wedged(struct drm_i915_private *i915)
>  {
> +	struct i915_gpu_error *error = &i915->gpu_error;
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
>  
> -	GEM_TRACE("start\n");
> +	mutex_lock(&error->wedge_mutex);
> +	if (test_bit(I915_WEDGED, &error->flags)) {
> +		mutex_unlock(&error->wedge_mutex);
> +		return;
> +	}
>  
>  	if (GEM_SHOW_DEBUG()) {
>  		struct drm_printer p = drm_debug_printer(__func__);
> @@ -3185,8 +3190,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
>  			intel_engine_dump(engine, &p, "%s\n", engine->name);
>  	}
>  
> -	if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
> -		goto out;
> +	GEM_TRACE("start\n");
>  
>  	/*
>  	 * First, stop submission to hw, but do not yet complete requests by
> @@ -3222,23 +3226,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
>  		intel_engine_wakeup(engine);
>  	}
>  
> -out:
> +	smp_mb__before_atomic();

I was thinking of what state you want to guard against as you
now hold the mutex for wedging. But the answer must: any other
external state. Make everything visible before flipping the bit.

-Mika

> +	set_bit(I915_WEDGED, &error->flags);
> +
>  	GEM_TRACE("end\n");
> +	mutex_unlock(&error->wedge_mutex);
>  
> -	wake_up_all(&i915->gpu_error.reset_queue);
> +	wake_up_all(&error->reset_queue);
>  }
>  
>  bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  {
> +	struct i915_gpu_error *error = &i915->gpu_error;
>  	struct i915_timeline *tl;
> +	bool ret = false;
>  
>  	lockdep_assert_held(&i915->drm.struct_mutex);
> -	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
> +
> +	if (!test_bit(I915_WEDGED, &error->flags))
>  		return true;
>  
>  	if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
>  		return false;
>  
> +	mutex_lock(&error->wedge_mutex);
> +
>  	GEM_TRACE("start\n");
>  
>  	/*
> @@ -3272,7 +3284,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  		 */
>  		if (dma_fence_default_wait(&rq->fence, true,
>  					   MAX_SCHEDULE_TIMEOUT) < 0)
> -			return false;
> +			goto unlock;
>  	}
>  	i915_retire_requests(i915);
>  	GEM_BUG_ON(i915->gt.active_requests);
> @@ -3295,8 +3307,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  
>  	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
>  	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
> +	ret = true;
> +unlock:
> +	mutex_unlock(&i915->gpu_error.wedge_mutex);
>  
> -	return true;
> +	return ret;
>  }
>  
>  static void
> @@ -5692,6 +5707,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>  			  i915_gem_idle_work_handler);
>  	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
>  	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
> +	mutex_init(&dev_priv->gpu_error.wedge_mutex);
>  
>  	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 6d9f45468ac1..604291f7762d 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -271,8 +271,8 @@ struct i915_gpu_error {
>  #define I915_RESET_BACKOFF	0
>  #define I915_RESET_HANDOFF	1
>  #define I915_RESET_MODESET	2
> +#define I915_RESET_ENGINE	3
>  #define I915_WEDGED		(BITS_PER_LONG - 1)
> -#define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
>  
>  	/** Number of times an engine has been reset */
>  	u32 reset_engine_count[I915_NUM_ENGINES];
> @@ -283,6 +283,8 @@ struct i915_gpu_error {
>  	/** Reason for the current *global* reset */
>  	const char *reason;
>  
> +	struct mutex wedge_mutex; /* serialises wedging/unwedging */
> +
>  	/**
>  	 * Waitqueue to signal when a hang is detected. Used to for waiters
>  	 * to release the struct_mutex for the reset to procede.
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 082809569681..3cda66292e76 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -188,6 +188,7 @@ struct drm_i915_private *mock_gem_device(void)
>  
>  	init_waitqueue_head(&i915->gpu_error.wait_queue);
>  	init_waitqueue_head(&i915->gpu_error.reset_queue);
> +	mutex_init(&i915->gpu_error.wedge_mutex);
>  
>  	i915->wq = alloc_ordered_workqueue("mock", 0);
>  	if (!i915->wq)
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2019-01-15 11:57 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-14 21:04 Mika's reward Chris Wilson
2019-01-14 21:04 ` [PATCH 1/8] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Chris Wilson
2019-01-15 11:56   ` Mika Kuoppala [this message]
2019-01-15 12:05     ` Chris Wilson
2019-01-16  9:27       ` Chris Wilson
2019-01-16 15:04         ` Mika Kuoppala
2019-01-14 21:04 ` [PATCH 2/8] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
2019-01-14 21:04 ` [PATCH 3/8] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
2019-01-16 15:06   ` Mika Kuoppala
2019-01-16 15:31     ` Chris Wilson
2019-01-14 21:04 ` [PATCH 4/8] drm/i915: Make all GPU resets atomic Chris Wilson
2019-01-17 14:14   ` Mika Kuoppala
2019-01-14 21:04 ` [PATCH 5/8] drm/i915/guc: Disable global reset Chris Wilson
2019-01-17 14:24   ` Mika Kuoppala
2019-01-17 18:27     ` Daniele Ceraolo Spurio
2019-01-14 21:04 ` [PATCH 6/8] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
2019-01-14 21:04 ` [PATCH 7/8] drm/i915/selftests: Trim struct_mutex duration for set-wedged selftest Chris Wilson
2019-01-14 21:04 ` [PATCH 8/8] drm/i915: Issue engine resets onto idle engines Chris Wilson
2019-01-14 21:26 ` ✗ Fi.CI.BAT: failure for series starting with [1/8] drm/i915: Serialise concurrent calls to i915_gem_set_wedged() Patchwork
2019-01-14 21:59   ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=878szmyw10.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.