All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Always update RPS interrupts thresholds along with frequency
@ 2012-04-28  7:56 Chris Wilson
  2012-04-29  2:18 ` Ben Widawsky
  0 siblings, 1 reply; 3+ messages in thread
From: Chris Wilson @ 2012-04-28  7:56 UTC (permalink / raw)
  To: intel-gfx

In order to avoid missed down-interrupts when coming out of RC6, it is
advised that we always reset the down-threshold upon a PM event. This is
due to that the PM unit goes through a little dance when coming out of
RC6, it first brings the GPU up at the lowest frequency then a short
time later it restores the thresholds. During that interval, the
down-interval may expire and the interrupt be suppressed.

Now aware of the dance taking place within the GPU when coming out of
RC6, one wonders what other writes need to be queued in the fifo buffer
in order to be properly sequenced; setting the RP state appears to be
one.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44006
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c |   37 +++++--------------------
 drivers/gpu/drm/i915/intel_pm.c |   57 +++++++++++++++++++++++++++------------
 2 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c2511fe..1ea39e4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -353,8 +353,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
 						    rps_work);
-	u8 new_delay = dev_priv->cur_delay;
 	u32 pm_iir, pm_imr;
+	u8 new_delay;
 
 	spin_lock_irq(&dev_priv->rps_lock);
 	pm_iir = dev_priv->pm_iir;
@@ -363,41 +363,18 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	I915_WRITE(GEN6_PMIMR, 0);
 	spin_unlock_irq(&dev_priv->rps_lock);
 
-	if (!pm_iir)
+	if ((pm_iir & GEN6_PM_DEFERRED_EVENTS) == 0)
 		return;
 
 	mutex_lock(&dev_priv->dev->struct_mutex);
-	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
-		if (dev_priv->cur_delay != dev_priv->max_delay)
-			new_delay = dev_priv->cur_delay + 1;
-		if (new_delay > dev_priv->max_delay)
-			new_delay = dev_priv->max_delay;
-	} else if (pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT)) {
-		gen6_gt_force_wake_get(dev_priv);
-		if (dev_priv->cur_delay != dev_priv->min_delay)
-			new_delay = dev_priv->cur_delay - 1;
-		if (new_delay < dev_priv->min_delay) {
-			new_delay = dev_priv->min_delay;
-			I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-				   I915_READ(GEN6_RP_INTERRUPT_LIMITS) |
-				   ((new_delay << 16) & 0x3f0000));
-		} else {
-			/* Make sure we continue to get down interrupts
-			 * until we hit the minimum frequency */
-			I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-				   I915_READ(GEN6_RP_INTERRUPT_LIMITS) & ~0x3f0000);
-		}
-		gen6_gt_force_wake_put(dev_priv);
-	}
+
+	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD)
+		new_delay = dev_priv->cur_delay + 1;
+	else
+		new_delay = dev_priv->cur_delay - 1;
 
 	gen6_set_rps(dev_priv->dev, new_delay);
-	dev_priv->cur_delay = new_delay;
 
-	/*
-	 * rps_lock not held here because clearing is non-destructive. There is
-	 * an *extremely* unlikely race with gen6_rps_enable() that is prevented
-	 * by holding struct_mutex for the duration of the write.
-	 */
 	mutex_unlock(&dev_priv->dev->struct_mutex);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 3b05ba3..5e6b0c8 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2127,10 +2127,33 @@ void ironlake_disable_drps(struct drm_device *dev)
 void gen6_set_rps(struct drm_device *dev, u8 val)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 swreq;
+	u32 limits;
 
-	swreq = (val & 0x3ff) << 25;
-	I915_WRITE(GEN6_RPNSWREQ, swreq);
+	limits = 0;
+	if (val >= dev_priv->max_delay)
+		val = dev_priv->max_delay;
+	else
+		limits |= dev_priv->max_delay << 24;
+
+	if (val <= dev_priv->min_delay)
+		val = dev_priv->min_delay;
+	else
+		limits |= dev_priv->min_delay << 16;
+
+	if (val == dev_priv->cur_delay)
+		return;
+
+	I915_WRITE(GEN6_RPNSWREQ,
+		   GEN6_FREQUENCY(val) |
+		   GEN6_OFFSET(0) |
+		   GEN6_AGGRESSIVE_TURBO);
+
+	/* Make sure we continue to get interrupts
+	 * until we hit the minimum or maximum frequencies.
+	 */
+	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits);
+
+	dev_priv->cur_delay = val;
 }
 
 void gen6_disable_rps(struct drm_device *dev)
@@ -2183,11 +2206,10 @@ int intel_enable_rc6(const struct drm_device *dev)
 
 void gen6_enable_rps(struct drm_i915_private *dev_priv)
 {
-	u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-	u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
+	u32 rp_state_cap;
+	u32 gt_perf_status;
 	u32 pcu_mbox, rc6_mask = 0;
 	u32 gtfifodbg;
-	int cur_freq, min_freq, max_freq;
 	int rc6_mode;
 	int i;
 
@@ -2208,6 +2230,14 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 
 	gen6_gt_force_wake_get(dev_priv);
 
+	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+	gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
+
+	/* In units of 100MHz */
+	dev_priv->max_delay = rp_state_cap & 0xff;
+	dev_priv->min_delay = (rp_state_cap & 0xff0000) >> 16;
+	dev_priv->cur_delay = 0;
+
 	/* disable the counters and set deterministic thresholds */
 	I915_WRITE(GEN6_RC_CONTROL, 0);
 
@@ -2255,8 +2285,8 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 
 	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
 	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-		   18 << 24 |
-		   6 << 16);
+		   dev_priv->max_delay << 24 |
+		   dev_priv->min_delay << 16);
 	I915_WRITE(GEN6_RP_UP_THRESHOLD, 10000);
 	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 1000000);
 	I915_WRITE(GEN6_RP_UP_EI, 100000);
@@ -2282,10 +2312,6 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 		     500))
 		DRM_ERROR("timeout waiting for pcode mailbox to finish\n");
 
-	min_freq = (rp_state_cap & 0xff0000) >> 16;
-	max_freq = rp_state_cap & 0xff;
-	cur_freq = (gt_perf_status & 0xff00) >> 8;
-
 	/* Check for overclock support */
 	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
 		     500))
@@ -2296,14 +2322,11 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 		     500))
 		DRM_ERROR("timeout waiting for pcode mailbox to finish\n");
 	if (pcu_mbox & (1<<31)) { /* OC supported */
-		max_freq = pcu_mbox & 0xff;
+		dev_priv->max_delay = pcu_mbox & 0xff;
 		DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50);
 	}
 
-	/* In units of 100MHz */
-	dev_priv->max_delay = max_freq;
-	dev_priv->min_delay = min_freq;
-	dev_priv->cur_delay = cur_freq;
+	gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8);
 
 	/* requires MSI enabled */
 	I915_WRITE(GEN6_PMIER,
-- 
1.7.10

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/i915: Always update RPS interrupts thresholds along with frequency
  2012-04-28  7:56 [PATCH] drm/i915: Always update RPS interrupts thresholds along with frequency Chris Wilson
@ 2012-04-29  2:18 ` Ben Widawsky
  2012-05-22 18:56   ` Daniel Vetter
  0 siblings, 1 reply; 3+ messages in thread
From: Ben Widawsky @ 2012-04-29  2:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Sat, 28 Apr 2012 08:56:39 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> In order to avoid missed down-interrupts when coming out of RC6, it is
> advised that we always reset the down-threshold upon a PM event. This
> is due to that the PM unit goes through a little dance when coming
> out of RC6, it first brings the GPU up at the lowest frequency then a
> short time later it restores the thresholds. During that interval, the
> down-interval may expire and the interrupt be suppressed.
> 
> Now aware of the dance taking place within the GPU when coming out of
> RC6, one wonders what other writes need to be queued in the fifo
> buffer in order to be properly sequenced; setting the RP state
> appears to be one.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44006
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I tried really hard to review this, but it was too much. The code
definitely is cleaner as a result, so this is:
Acked-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_irq.c |   37 +++++--------------------
>  drivers/gpu/drm/i915/intel_pm.c |   57
> +++++++++++++++++++++++++++------------ 2 files changed, 47
> insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c
> b/drivers/gpu/drm/i915/i915_irq.c index c2511fe..1ea39e4 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -353,8 +353,8 @@ static void gen6_pm_rps_work(struct work_struct
> *work) {
>  	drm_i915_private_t *dev_priv = container_of(work,
> drm_i915_private_t, rps_work);
> -	u8 new_delay = dev_priv->cur_delay;
>  	u32 pm_iir, pm_imr;
> +	u8 new_delay;
>  
>  	spin_lock_irq(&dev_priv->rps_lock);
>  	pm_iir = dev_priv->pm_iir;
> @@ -363,41 +363,18 @@ static void gen6_pm_rps_work(struct work_struct
> *work) I915_WRITE(GEN6_PMIMR, 0);
>  	spin_unlock_irq(&dev_priv->rps_lock);
>  
> -	if (!pm_iir)
> +	if ((pm_iir & GEN6_PM_DEFERRED_EVENTS) == 0)
>  		return;
>  
>  	mutex_lock(&dev_priv->dev->struct_mutex);
> -	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> -		if (dev_priv->cur_delay != dev_priv->max_delay)
> -			new_delay = dev_priv->cur_delay + 1;
> -		if (new_delay > dev_priv->max_delay)
> -			new_delay = dev_priv->max_delay;
> -	} else if (pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD |
> GEN6_PM_RP_DOWN_TIMEOUT)) {
> -		gen6_gt_force_wake_get(dev_priv);
> -		if (dev_priv->cur_delay != dev_priv->min_delay)
> -			new_delay = dev_priv->cur_delay - 1;
> -		if (new_delay < dev_priv->min_delay) {
> -			new_delay = dev_priv->min_delay;
> -			I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
> -
> I915_READ(GEN6_RP_INTERRUPT_LIMITS) |
> -				   ((new_delay << 16) & 0x3f0000));
> -		} else {
> -			/* Make sure we continue to get down
> interrupts
> -			 * until we hit the minimum frequency */
> -			I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
> -
> I915_READ(GEN6_RP_INTERRUPT_LIMITS) & ~0x3f0000);
> -		}
> -		gen6_gt_force_wake_put(dev_priv);
> -	}
> +
> +	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD)
> +		new_delay = dev_priv->cur_delay + 1;
> +	else
> +		new_delay = dev_priv->cur_delay - 1;
>  
>  	gen6_set_rps(dev_priv->dev, new_delay);
> -	dev_priv->cur_delay = new_delay;
>  
> -	/*
> -	 * rps_lock not held here because clearing is
> non-destructive. There is
> -	 * an *extremely* unlikely race with gen6_rps_enable() that
> is prevented
> -	 * by holding struct_mutex for the duration of the write.
> -	 */
>  	mutex_unlock(&dev_priv->dev->struct_mutex);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c index 3b05ba3..5e6b0c8 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2127,10 +2127,33 @@ void ironlake_disable_drps(struct drm_device
> *dev) void gen6_set_rps(struct drm_device *dev, u8 val)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	u32 swreq;
> +	u32 limits;
>  
> -	swreq = (val & 0x3ff) << 25;
> -	I915_WRITE(GEN6_RPNSWREQ, swreq);
> +	limits = 0;
> +	if (val >= dev_priv->max_delay)
> +		val = dev_priv->max_delay;
> +	else
> +		limits |= dev_priv->max_delay << 24;
> +
> +	if (val <= dev_priv->min_delay)
> +		val = dev_priv->min_delay;
> +	else
> +		limits |= dev_priv->min_delay << 16;
> +
> +	if (val == dev_priv->cur_delay)
> +		return;
> +
> +	I915_WRITE(GEN6_RPNSWREQ,
> +		   GEN6_FREQUENCY(val) |
> +		   GEN6_OFFSET(0) |
> +		   GEN6_AGGRESSIVE_TURBO);
> +
> +	/* Make sure we continue to get interrupts
> +	 * until we hit the minimum or maximum frequencies.
> +	 */
> +	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits);
> +
> +	dev_priv->cur_delay = val;
>  }
>  
>  void gen6_disable_rps(struct drm_device *dev)
> @@ -2183,11 +2206,10 @@ int intel_enable_rc6(const struct drm_device
> *dev) 
>  void gen6_enable_rps(struct drm_i915_private *dev_priv)
>  {
> -	u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
> -	u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
> +	u32 rp_state_cap;
> +	u32 gt_perf_status;
>  	u32 pcu_mbox, rc6_mask = 0;
>  	u32 gtfifodbg;
> -	int cur_freq, min_freq, max_freq;
>  	int rc6_mode;
>  	int i;
>  
> @@ -2208,6 +2230,14 @@ void gen6_enable_rps(struct drm_i915_private
> *dev_priv) 
>  	gen6_gt_force_wake_get(dev_priv);
>  
> +	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
> +	gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
> +
> +	/* In units of 100MHz */
> +	dev_priv->max_delay = rp_state_cap & 0xff;
> +	dev_priv->min_delay = (rp_state_cap & 0xff0000) >> 16;
> +	dev_priv->cur_delay = 0;
> +
>  	/* disable the counters and set deterministic thresholds */
>  	I915_WRITE(GEN6_RC_CONTROL, 0);
>  
> @@ -2255,8 +2285,8 @@ void gen6_enable_rps(struct drm_i915_private
> *dev_priv) 
>  	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
>  	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
> -		   18 << 24 |
> -		   6 << 16);
> +		   dev_priv->max_delay << 24 |
> +		   dev_priv->min_delay << 16);
>  	I915_WRITE(GEN6_RP_UP_THRESHOLD, 10000);
>  	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 1000000);
>  	I915_WRITE(GEN6_RP_UP_EI, 100000);
> @@ -2282,10 +2312,6 @@ void gen6_enable_rps(struct drm_i915_private
> *dev_priv) 500))
>  		DRM_ERROR("timeout waiting for pcode mailbox to
> finish\n"); 
> -	min_freq = (rp_state_cap & 0xff0000) >> 16;
> -	max_freq = rp_state_cap & 0xff;
> -	cur_freq = (gt_perf_status & 0xff00) >> 8;
> -
>  	/* Check for overclock support */
>  	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) &
> GEN6_PCODE_READY) == 0, 500))
> @@ -2296,14 +2322,11 @@ void gen6_enable_rps(struct drm_i915_private
> *dev_priv) 500))
>  		DRM_ERROR("timeout waiting for pcode mailbox to
> finish\n"); if (pcu_mbox & (1<<31)) { /* OC supported */
> -		max_freq = pcu_mbox & 0xff;
> +		dev_priv->max_delay = pcu_mbox & 0xff;
>  		DRM_DEBUG_DRIVER("overclocking supported, adjusting
> frequency max to %dMHz\n", pcu_mbox * 50); }
>  
> -	/* In units of 100MHz */
> -	dev_priv->max_delay = max_freq;
> -	dev_priv->min_delay = min_freq;
> -	dev_priv->cur_delay = cur_freq;
> +	gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8);
>  
>  	/* requires MSI enabled */
>  	I915_WRITE(GEN6_PMIER,

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/i915: Always update RPS interrupts thresholds along with frequency
  2012-04-29  2:18 ` Ben Widawsky
@ 2012-05-22 18:56   ` Daniel Vetter
  0 siblings, 0 replies; 3+ messages in thread
From: Daniel Vetter @ 2012-05-22 18:56 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Sat, Apr 28, 2012 at 07:18:30PM -0700, Ben Widawsky wrote:
> On Sat, 28 Apr 2012 08:56:39 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > In order to avoid missed down-interrupts when coming out of RC6, it is
> > advised that we always reset the down-threshold upon a PM event. This
> > is due to that the PM unit goes through a little dance when coming
> > out of RC6, it first brings the GPU up at the lowest frequency then a
> > short time later it restores the thresholds. During that interval, the
> > down-interval may expire and the interrupt be suppressed.
> > 
> > Now aware of the dance taking place within the GPU when coming out of
> > RC6, one wonders what other writes need to be queued in the fifo
> > buffer in order to be properly sequenced; setting the RP state
> > appears to be one.
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44006
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> I tried really hard to review this, but it was too much. The code
> definitely is cleaner as a result, so this is:
> Acked-by: Ben Widawsky <ben@bwidawsk.net>
Picked up for -fixes with Jesse's irc r-b added, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-05-22 18:55 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-28  7:56 [PATCH] drm/i915: Always update RPS interrupts thresholds along with frequency Chris Wilson
2012-04-29  2:18 ` Ben Widawsky
2012-05-22 18:56   ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.