Re: [PATCH v5 17/35] drm/i915: Added scheduler support to __wait_request() calls

From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
To: John.C.Harrison@Intel.com, Intel-GFX@Lists.FreeDesktop.Org
Subject: Re: [PATCH v5 17/35] drm/i915: Added scheduler support to __wait_request() calls
Date: Tue, 01 Mar 2016 12:02:10 +0200	[thread overview]
Message-ID: <1456826530.6867.27.camel@linux.intel.com> (raw)
In-Reply-To: <1455805644-6450-18-git-send-email-John.C.Harrison@Intel.com>

On to, 2016-02-18 at 14:27 +0000, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> The scheduler can cause batch buffers, and hence requests, to be
> submitted to the ring out of order and asynchronously to their
> submission to the driver. Thus at the point of waiting for the
> completion of a given request, it is not even guaranteed that the
> request has actually been sent to the hardware yet. Even it is has
> been sent, it is possible that it could be pre-empted and thus
> 'unsent'.
> 
> This means that it is necessary to be able to submit requests to the
> hardware during the wait call itself. Unfortunately, while some
> callers of __wait_request() release the mutex lock first, others do
> not (and apparently can not). Hence there is the ability to deadlock
> as the wait stalls for submission but the asynchronous submission is
> stalled for the mutex lock.
> 
> This change hooks the scheduler in to the __wait_request() code to
> ensure correct behaviour. That is, flush the target batch buffer
> through to the hardware and do not deadlock waiting for something that
> cannot currently be submitted. Instead, the wait call must return
> EAGAIN at least as far back as necessary to release the mutex lock and
> allow the scheduler's asynchronous processing to get in and handle the
> pre-emption operation and eventually (re-)submit the work.
> 
> v3: Removed the explicit scheduler flush from i915_wait_request().
> This is no longer necessary and was causing unintended changes to the
> scheduler priority level which broke a validation team test.
> 
> v4: Corrected the format of some comments to keep the style checker
> happy.
> 
> v5: Added function description. [Joonas Lahtinen]
> 
> For: VIZ-1587
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  3 ++-
>  drivers/gpu/drm/i915/i915_gem.c         | 37 ++++++++++++++++++++++++++-------
>  drivers/gpu/drm/i915/i915_scheduler.c   | 31 +++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_scheduler.h   |  2 ++
>  drivers/gpu/drm/i915/intel_display.c    |  5 +++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
>  6 files changed, 69 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4d544f1..5eeeced 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3071,7 +3071,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>  			unsigned reset_counter,
>  			bool interruptible,
>  			s64 *timeout,
> -			struct intel_rps_client *rps);
> +			struct intel_rps_client *rps,
> +			bool is_locked);

Multiple bool parameters are better converted to int flags, it makes
the callsite code more readable if you have enum I915_WAIT_REQ_* flags.

>  int __must_check i915_wait_request(struct drm_i915_gem_request *req);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
>  int __must_check
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 2dd9b55..17b44b3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1258,7 +1258,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>  			unsigned reset_counter,
>  			bool interruptible,
>  			s64 *timeout,
> -			struct intel_rps_client *rps)
> +			struct intel_rps_client *rps,
> +			bool is_locked)
>  {
>  	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
>  	struct drm_device *dev = ring->dev;
> @@ -1268,8 +1269,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>  	DEFINE_WAIT(wait);
>  	unsigned long timeout_expire;
>  	s64 before = 0; /* Only to silence a compiler warning. */
> -	int ret;
> +	int ret = 0;
> +	bool    busy;

Strange whitespace.

>  
> +	might_sleep();

This will splat if is_locked is true? So maybe it should be protected
by if (!is_locked)?

>  	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
>  
>  	if (i915_gem_request_completed(req))
> @@ -1324,6 +1327,26 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>  			break;
>  		}
>  
> +		if (is_locked) {
> +			/*
> +			 * If this request is being processed by the scheduler
> +			 * then it is unsafe to sleep with the mutex lock held
> +			 * as the scheduler may require the lock in order to
> +			 * progress the request.
> +			 */

This is becoming a monstrous function, I think it could be split down
to two functions, __i915_wait_request and
__i915_wait_request_locked/nonblocking, because if you take all the
timeout/sleeping related stuff away, not much is left.

> +			if (i915_scheduler_is_request_tracked(req, NULL, &busy)) {
> +				if (busy) {
> +					ret = -EAGAIN;
> +					break;
> +				}
> +			}

If this is kept in single function, would not it be enough to execute
this before the loop?

> +
> +			/*
> +			 * If the request is not tracked by the scheduler
> +			 * then the regular test can be done.
> +			 */
> +		}
> +
>  		if (i915_gem_request_completed(req)) {
>  			ret = 0;
>  			break;
> @@ -1542,7 +1565,7 @@ i915_wait_request(struct drm_i915_gem_request *req)
>  
>  	ret = __i915_wait_request(req,
>  				  atomic_read(&dev_priv->gpu_error.reset_counter),
> -				  interruptible, NULL, NULL);
> +				  interruptible, NULL, NULL, true);
>  	if (ret)
>  		return ret;
>  
> @@ -1655,7 +1678,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>  	mutex_unlock(&dev->struct_mutex);
>  	for (i = 0; ret == 0 && i < n; i++)
>  		ret = __i915_wait_request(requests[i], reset_counter, true,
> -					  NULL, rps);
> +					  NULL, rps, false);
>  	mutex_lock(&dev->struct_mutex);
>  
>  	for (i = 0; i < n; i++) {
> @@ -3511,7 +3534,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		if (ret == 0)
>  			ret = __i915_wait_request(req[i], reset_counter, true,
>  						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
> -						  to_rps_client(file));
> +						  to_rps_client(file), false);
>  		i915_gem_request_unreference(req[i]);
>  	}
>  	return ret;
> @@ -3544,7 +3567,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  					  atomic_read(&i915->gpu_error.reset_counter),
>  					  i915->mm.interruptible,
>  					  NULL,
> -					  &i915->rps.semaphores);
> +					  &i915->rps.semaphores, true);
>  		if (ret)
>  			return ret;
>  
> @@ -4523,7 +4546,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  	if (target == NULL)
>  		return 0;
>  
> -	ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
> +	ret = __i915_wait_request(target, reset_counter, true, NULL, NULL, false);
>  	if (ret == 0)
>  		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
>  
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 60a59d3..edab63d 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -918,6 +918,37 @@ void i915_scheduler_work_handler(struct work_struct *work)
>  }
>  
>  /**
> + * i915_scheduler_is_request_tracked - return info to say what the scheduler's
> + * connection to this request is (if any).
> + * @req: request to be queried
> + * @compeleted: if non-null, set to completion status
> + * @busy: if non-null set to busy status
> + *
> + * Looks up the given request in the scheduler's internal queue and reports
> + * on whether the request has completed or is still pending.
> + * Returns true if the request was found or false if it was not.
> + */
> +bool i915_scheduler_is_request_tracked(struct drm_i915_gem_request *req,
> +				       bool *completed, bool *busy)
> +{
> +	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
> +	struct i915_scheduler *scheduler = dev_priv->scheduler;
> +
> +	if (!scheduler)
> +		return false;
> +
> +	if (req->scheduler_qe == NULL)
> +		return false;
> +

These better be their own functions, making the code more readable.

_is_request_completed()
_is_request_busy()

> +	if (completed)
> +		*completed = I915_SQS_IS_COMPLETE(req->scheduler_qe);
> +	if (busy)
> +		*busy      = I915_SQS_IS_QUEUED(req->scheduler_qe);
> +
> +	return true;
> +}
> +
> +/**
>   * i915_scheduler_closefile - notify the scheduler that a DRM file handle
>   * has been closed.
>   * @dev: DRM device
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 180d75f..a88adce 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -94,5 +94,7 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
>  bool i915_scheduler_notify_request(struct drm_i915_gem_request *req);
>  void i915_scheduler_wakeup(struct drm_device *dev);
>  void i915_scheduler_work_handler(struct work_struct *work);
> +bool i915_scheduler_is_request_tracked(struct drm_i915_gem_request *req,
> +				       bool *completed, bool *busy);
>  
>  #endif  /* _I915_SCHEDULER_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 731d20a..5953590 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11458,7 +11458,8 @@ static void intel_mmio_flip_work_func(struct work_struct *work)
>  		WARN_ON(__i915_wait_request(mmio_flip->req,
>  					    mmio_flip->crtc->reset_counter,
>  					    false, NULL,
> -					    &mmio_flip->i915->rps.mmioflips));
> +					    &mmio_flip->i915->rps.mmioflips,
> +					    false));
>  		i915_gem_request_unreference(mmio_flip->req);
>  	}
>  
> @@ -13523,7 +13524,7 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
>  
>  			ret = __i915_wait_request(intel_plane_state->wait_req,
>  						  reset_counter, true,
> -						  NULL, NULL);
> +						  NULL, NULL, false);
>  
>  			/* Swallow -EIO errors to allow updates during hw lockup. */
>  			if (ret == -EIO)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index ca7b8af..a2093f5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2304,7 +2304,7 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>  	return __i915_wait_request(req,
>  				   atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
>  				   to_i915(ring->dev)->mm.interruptible,
> -				   NULL, NULL);
> +				   NULL, NULL, true);
>  }
>  
>  int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx