All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Tvrtko Ursulin <tursulin@ursulin.net>, Intel-gfx@lists.freedesktop.org
Subject: Re: [RFC] drm/i915: Simplify waiting for registers
Date: Wed, 12 Apr 2017 11:42:55 +0100	[thread overview]
Message-ID: <6a9e0046-54f0-2aca-b204-2f88168bef26@linux.intel.com> (raw)
In-Reply-To: <20170412103624.13384-1-tvrtko.ursulin@linux.intel.com>


On 12/04/2017 11:36, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> I was thinking if we could get away with simplifying the API
> a bit by getting rid of the _fw variant and only have these
> three functions with a common implementation:
>
>   intel_wait_for_register
>   intel_wait_for_register_atomic
>   __intel_wait_for_register
>
> The fast/busy loop in all cases grabs it's own forcewake and
> is done under the uncore lock. The extra overhead for call
> sites which already have the forcewake, or do not need it is
> there, but not sure that it matters for where wait_for_register
> functions are used.

This is probably quite bad for pcode, since AFAIR those can be quite 
slow. So scratch this idea I think..

Regards,

Tvrtko

> This makes the difference between the normal and atomic API
> just in the fact atomic doesn't sleep while the normal can.
>
> I've also put in the change to move the BUILD_BUG_ON from
> _wait_for_atomic to wait_for_atomic(_us) macros, as Michal
> suggested at one point, which should fix the GCC 4.4 build
> issue.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  75 ++++++++++++++-----
>  drivers/gpu/drm/i915/intel_drv.h        |  18 ++++-
>  drivers/gpu/drm/i915/intel_i2c.c        |   4 +-
>  drivers/gpu/drm/i915/intel_pm.c         |  10 ++-
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   9 ++-
>  drivers/gpu/drm/i915/intel_uc.c         |  10 +--
>  drivers/gpu/drm/i915/intel_uncore.c     | 123 +++++++++-----------------------
>  7 files changed, 121 insertions(+), 128 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ed079c244b5d..ba95a54b9dfa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3083,27 +3083,68 @@ u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
>
>  void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
>
> -int intel_wait_for_register(struct drm_i915_private *dev_priv,
> -			    i915_reg_t reg,
> -			    u32 mask,
> -			    u32 value,
> -			    unsigned int timeout_ms);
> -int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
> -				 i915_reg_t reg,
> -				 u32 mask,
> -				 u32 value,
> -				 unsigned int fast_timeout_us,
> -				 unsigned int slow_timeout_ms,
> -				 u32 *out_value);
> -static inline
> -int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
> +int __intel_wait_for_register(struct drm_i915_private *dev_priv,
> +			      i915_reg_t reg,
> +			      u32 mask,
> +			      u32 value,
> +			      unsigned int fast_timeout_us,
> +			      unsigned int slow_timeout_ms,
> +			      u32 *out_value);
> +
> +/**
> + * intel_wait_for_register - wait until register matches expected state
> + * @dev_priv: the i915 device
> + * @reg: the register to read
> + * @mask: mask to apply to register value
> + * @value: expected value
> + * @timeout_ms: timeout in millisecond
> + *
> + * This routine waits until the target register @reg contains the expected
> + * @value after applying the @mask, i.e. it waits until ::
> + *
> + *     (I915_READ(reg) & mask) == value
> + *
> + * Otherwise, the wait will timeout after @timeout_ms milliseconds.
> + *
> + * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> + */
> +static inline int
> +intel_wait_for_register(struct drm_i915_private *dev_priv,
> +			i915_reg_t reg,
> +			u32 mask,
> +			u32 value,
> +			unsigned int timeout_ms)
> +{
> +	return __intel_wait_for_register(dev_priv, reg, mask, value,
> +					 2, timeout_ms, NULL);
> +}
> +
> +/**
> + * intel_wait_for_register_atomic - wait until register matches expected state
> + * @dev_priv: the i915 device
> + * @reg: the register to read
> + * @mask: mask to apply to register value
> + * @value: expected value
> + * @timeout_us: timeout in microsecond
> + *
> + * This routine waits until the target register @reg contains the expected
> + * @value after applying the @mask, i.e. it waits until ::
> + *
> + *     (I915_READ(reg) & mask) == value
> + *
> + * The wait is done with busy-looping.
> + *
> + * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> + */
> +static inline int
> +intel_wait_for_register_atomic(struct drm_i915_private *dev_priv,
>  			       i915_reg_t reg,
>  			       u32 mask,
>  			       u32 value,
> -			       unsigned int timeout_ms)
> +			       unsigned int timeout_us)
>  {
> -	return __intel_wait_for_register_fw(dev_priv, reg, mask, value,
> -					    2, timeout_ms, NULL);
> +	return __intel_wait_for_register(dev_priv, reg, mask, value,
> +					 timeout_us, 0, NULL);
>  }
>
>  static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 43ea74882f9a..d7018d44371c 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -88,7 +88,6 @@
>  	int cpu, ret, timeout = (US) * 1000; \
>  	u64 base; \
>  	_WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
> -	BUILD_BUG_ON((US) > 50000); \
>  	if (!(ATOMIC)) { \
>  		preempt_disable(); \
>  		cpu = smp_processor_id(); \
> @@ -130,8 +129,21 @@
>  	ret__; \
>  })
>
> -#define wait_for_atomic(COND, MS)	_wait_for_atomic((COND), (MS) * 1000, 1)
> -#define wait_for_atomic_us(COND, US)	_wait_for_atomic((COND), (US), 1)
> +#define wait_for_atomic(COND, MS) \
> +({ \
> +	int ret__; \
> +	BUILD_BUG_ON((MS) > 50); \
> +	ret__ = _wait_for_atomic((COND), (MS) * 1000, 1); \
> +	ret__; \
> +})
> +
> +#define wait_for_atomic_us(COND, US) \
> +({ \
> +	int ret__; \
> +	BUILD_BUG_ON((US) > 50000); \
> +	ret__ = _wait_for_atomic((COND), (US), 1); \
> +	ret__; \
> +})
>
>  #define KHz(x) (1000 * (x))
>  #define MHz(x) KHz(1000 * (x))
> diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
> index b6401e8f1bd6..ddc615d3d40d 100644
> --- a/drivers/gpu/drm/i915/intel_i2c.c
> +++ b/drivers/gpu/drm/i915/intel_i2c.c
> @@ -297,9 +297,7 @@ gmbus_wait_idle(struct drm_i915_private *dev_priv)
>  	add_wait_queue(&dev_priv->gmbus_wait_queue, &wait);
>  	I915_WRITE_FW(GMBUS4, irq_enable);
>
> -	ret = intel_wait_for_register_fw(dev_priv,
> -					 GMBUS2, GMBUS_ACTIVE, 0,
> -					 10);
> +	ret = intel_wait_for_register(dev_priv, GMBUS2, GMBUS_ACTIVE, 0, 10);
>
>  	I915_WRITE_FW(GMBUS4, 0);
>  	remove_wait_queue(&dev_priv->gmbus_wait_queue, &wait);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index cacb65fa2dd5..a93e6427aabf 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -8135,9 +8135,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
>  	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
>  	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
>
> -	if (__intel_wait_for_register_fw(dev_priv,
> -					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
> -					 500, 0, NULL)) {
> +	if (intel_wait_for_register_atomic(dev_priv, GEN6_PCODE_MAILBOX,
> +					   GEN6_PCODE_READY, 0, 500)) {
>  		DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
>  		return -ETIMEDOUT;
>  	}
> @@ -8180,9 +8179,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
>  	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
>  	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
>
> -	if (__intel_wait_for_register_fw(dev_priv,
> -					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
> -					 500, 0, NULL)) {
> +	if (intel_wait_for_register_atomic(dev_priv, GEN6_PCODE_MAILBOX,
> +					   GEN6_PCODE_READY, 0, 500)) {
>  		DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
>  		return -ETIMEDOUT;
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 97d5fcca8805..af31d786a517 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1729,11 +1729,10 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
>  	I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
>
>  	/* Wait for the ring not to be idle, i.e. for it to wake up. */
> -	if (__intel_wait_for_register_fw(dev_priv,
> -					 GEN6_BSD_SLEEP_PSMI_CONTROL,
> -					 GEN6_BSD_SLEEP_INDICATOR,
> -					 0,
> -					 1000, 0, NULL))
> +	if (intel_wait_for_register_atomic(dev_priv,
> +					   GEN6_BSD_SLEEP_PSMI_CONTROL,
> +					   GEN6_BSD_SLEEP_INDICATOR, 0,
> +					   1000))
>  		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
>
>  	/* Now that the ring is fully powered up, update the tail */
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 4364b1a9064e..bae60f5c52d6 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -389,11 +389,11 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
>  	 * No GuC command should ever take longer than 10ms.
>  	 * Fast commands should still complete in 10us.
>  	 */
> -	ret = __intel_wait_for_register_fw(dev_priv,
> -					   SOFT_SCRATCH(0),
> -					   INTEL_GUC_RECV_MASK,
> -					   INTEL_GUC_RECV_MASK,
> -					   10, 10, &status);
> +	ret = __intel_wait_for_register(dev_priv,
> +					SOFT_SCRATCH(0),
> +					INTEL_GUC_RECV_MASK,
> +					INTEL_GUC_RECV_MASK,
> +					10, 10, &status);
>  	if (status != INTEL_GUC_STATUS_SUCCESS) {
>  		/*
>  		 * Either the GuC explicitly returned an error (which
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index fb38c7692fc2..215fbed8808c 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1535,9 +1535,8 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
>  	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
>
>  	/* Spin waiting for the device to ack the reset requests */
> -	return intel_wait_for_register_fw(dev_priv,
> -					  GEN6_GDRST, hw_domain_mask, 0,
> -					  500);
> +	return intel_wait_for_register(dev_priv, GEN6_GDRST, hw_domain_mask, 0,
> +				       500);
>  }
>
>  /**
> @@ -1585,7 +1584,7 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
>  }
>
>  /**
> - * __intel_wait_for_register_fw - wait until register matches expected state
> + * __intel_wait_for_register - wait until register matches expected state
>   * @dev_priv: the i915 device
>   * @reg: the register to read
>   * @mask: mask to apply to register value
> @@ -1597,90 +1596,47 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
>   * This routine waits until the target register @reg contains the expected
>   * @value after applying the @mask, i.e. it waits until ::
>   *
> - *     (I915_READ_FW(reg) & mask) == value
> + *     (I915_READ(reg) & mask) == value
>   *
>   * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
> - * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
> - * must be not larger than 20,0000 microseconds.
> - *
> - * Note that this routine assumes the caller holds forcewake asserted, it is
> - * not suitable for very long waits. See intel_wait_for_register() if you
> - * wish to wait without holding forcewake for the duration (i.e. you expect
> - * the wait to be slow).
>   *
>   * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
>   */
> -int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
> -				 i915_reg_t reg,
> -				 u32 mask,
> -				 u32 value,
> -				 unsigned int fast_timeout_us,
> -				 unsigned int slow_timeout_ms,
> -				 u32 *out_value)
> -{
> +int __intel_wait_for_register(struct drm_i915_private *dev_priv,
> +			      i915_reg_t reg,
> +			      u32 mask,
> +			      u32 value,
> +			      unsigned int fast_timeout_us,
> +			      unsigned int slow_timeout_ms,
> +			      u32 *out_value)
> +{
> +	unsigned long flags;
> +	unsigned int fw_domains;
>  	u32 reg_value;
> -#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
>  	int ret;
>
>  	/* Catch any overuse of this function */
>  	might_sleep_if(slow_timeout_ms);
> -	GEM_BUG_ON(fast_timeout_us > 20000);
> -
> -	ret = -ETIMEDOUT;
> -	if (fast_timeout_us && fast_timeout_us <= 20000)
> -		ret = _wait_for_atomic(done, fast_timeout_us, 0);
> -	if (ret)
> -		ret = wait_for(done, slow_timeout_ms);
> +	GEM_BUG_ON(fast_timeout_us > 1000);
>
> -	if (out_value)
> -		*out_value = reg_value;
> +	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> +	fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
> +	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
>
> -	return ret;
> +#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
> +	ret = _wait_for_atomic(done, fast_timeout_us, 1);
>  #undef done
> -}
>
> -/**
> - * intel_wait_for_register - wait until register matches expected state
> - * @dev_priv: the i915 device
> - * @reg: the register to read
> - * @mask: mask to apply to register value
> - * @value: expected value
> - * @timeout_ms: timeout in millisecond
> - *
> - * This routine waits until the target register @reg contains the expected
> - * @value after applying the @mask, i.e. it waits until ::
> - *
> - *     (I915_READ(reg) & mask) == value
> - *
> - * Otherwise, the wait will timeout after @timeout_ms milliseconds.
> - *
> - * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> - */
> -int intel_wait_for_register(struct drm_i915_private *dev_priv,
> -			    i915_reg_t reg,
> -			    u32 mask,
> -			    u32 value,
> -			    unsigned int timeout_ms)
> -{
> -	unsigned fw =
> -		intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
> -	int ret;
> -
> -	might_sleep();
> -
> -	spin_lock_irq(&dev_priv->uncore.lock);
> -	intel_uncore_forcewake_get__locked(dev_priv, fw);
> -
> -	ret = __intel_wait_for_register_fw(dev_priv,
> -					   reg, mask, value,
> -					   2, 0, NULL);
> +	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
> +	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>
> -	intel_uncore_forcewake_put__locked(dev_priv, fw);
> -	spin_unlock_irq(&dev_priv->uncore.lock);
> +#define done (((reg_value = I915_READ_NOTRACE(reg)) & mask) == value)
> +	if (ret && slow_timeout_ms)
> +		ret = wait_for(done, slow_timeout_ms);
> +#undef done
>
> -	if (ret)
> -		ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value,
> -			       timeout_ms);
> +	if (out_value)
> +		*out_value = reg_value;
>
>  	return ret;
>  }
> @@ -1693,11 +1649,11 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
>  	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
>  		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
>
> -	ret = intel_wait_for_register_fw(dev_priv,
> -					 RING_RESET_CTL(engine->mmio_base),
> -					 RESET_CTL_READY_TO_RESET,
> -					 RESET_CTL_READY_TO_RESET,
> -					 700);
> +	ret = intel_wait_for_register(dev_priv,
> +				      RING_RESET_CTL(engine->mmio_base),
> +				      RESET_CTL_READY_TO_RESET,
> +				      RESET_CTL_READY_TO_RESET,
> +				      700);
>  	if (ret)
>  		DRM_ERROR("%s: reset request timeout\n", engine->name);
>
> @@ -1780,21 +1736,10 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
>
>  int intel_guc_reset(struct drm_i915_private *dev_priv)
>  {
> -	int ret;
> -	unsigned long irqflags;
> -
>  	if (!HAS_GUC(dev_priv))
>  		return -EINVAL;
>
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
> -
> -	ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
> -
> -	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -
> -	return ret;
> +	return gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
>  }
>
>  bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2017-04-12 10:42 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-12 10:36 [RFC] drm/i915: Simplify waiting for registers Tvrtko Ursulin
2017-04-12 10:42 ` Tvrtko Ursulin [this message]
2017-04-12 10:54   ` Chris Wilson
2017-04-12 10:56 ` ✗ Fi.CI.BAT: warning for " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6a9e0046-54f0-2aca-b204-2f88168bef26@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=Intel-gfx@lists.freedesktop.org \
    --cc=tursulin@ursulin.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.