All of lore.kernel.org
 help / color / mirror / Atom feed
* RPS tuning for VLV and pageflips
@ 2015-03-18  9:48 Chris Wilson
  2015-03-18  9:48 ` [PATCH v2 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Chris Wilson @ 2015-03-18  9:48 UTC (permalink / raw)
  To: intel-gfx

A few r-bs and suggestions from Deepak rebased onto -nightly.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle
  2015-03-18  9:48 RPS tuning for VLV and pageflips Chris Wilson
@ 2015-03-18  9:48 ` Chris Wilson
  2015-03-18 11:12   ` Deepak S
  2015-03-18  9:48 ` [PATCH v2 2/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2015-03-18  9:48 UTC (permalink / raw)
  To: intel-gfx

When we idle, we set the GPU frequency to the hardware minimum (not user
minimum). We introduce a new variable to distinguish between the
different roles, and to allow easy tuning of the idle frequency without
impacting over aspects of RPS. Setting the minimum frequency should be a
safety blanket as the pcu on the GPU should be power gating itself
anyway. However, in order for us to do set the absolute minimum
frequency, we need to relax a few of our assertions that we do not
exceed the user limits.

v2: Add idle_freq
v3: Init idle_freq for vlv and add a bunch of WARNs

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |  6 +++++
 drivers/gpu/drm/i915/i915_drv.h     |  1 +
 drivers/gpu/drm/i915/intel_pm.c     | 44 +++++++++++++++++++++++--------------
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index aaf756047a20..007c7d7d8295 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1200,6 +1200,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 
 		seq_printf(m, "Max overclocked frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
+
+		seq_printf(m, "Idle freq: %d MHz\n",
+			   intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
 	} else if (IS_VALLEYVIEW(dev)) {
 		u32 freq_sts;
 
@@ -1214,6 +1217,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		seq_printf(m, "min GPU freq: %d MHz\n",
 			   intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
 
+		seq_printf(m, "idle GPU freq: %d MHz\n",
+			   intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
+
 		seq_printf(m,
 			   "efficient (RPe) frequency: %d MHz\n",
 			   intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 81f60b48def2..a06536cfce6d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1025,6 +1025,7 @@ struct intel_gen6_power_mgmt {
 	u8 max_freq_softlimit;	/* Max frequency permitted by the driver */
 	u8 max_freq;		/* Maximum frequency, RP0 if not overclocking */
 	u8 min_freq;		/* AKA RPn. Minimum frequency */
+	u8 idle_freq;		/* Frequency to request when we are idle */
 	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
 	u8 rp1_freq;		/* "less than" RP0 power/freqency */
 	u8 rp0_freq;		/* Non-overclocked max frequency. */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 288c9d24098e..beab305e320d 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3855,9 +3855,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		break;
 	}
 	/* Max/min bins are special */
-	if (val == dev_priv->rps.min_freq_softlimit)
+	if (val <= dev_priv->rps.min_freq_softlimit)
 		new_power = LOW_POWER;
-	if (val == dev_priv->rps.max_freq_softlimit)
+	if (val >= dev_priv->rps.max_freq_softlimit)
 		new_power = HIGH_POWER;
 	if (new_power == dev_priv->rps.power)
 		return;
@@ -3940,8 +3940,8 @@ static void gen6_set_rps(struct drm_device *dev, u8 val)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
-	WARN_ON(val > dev_priv->rps.max_freq_softlimit);
-	WARN_ON(val < dev_priv->rps.min_freq_softlimit);
+	WARN_ON(val > dev_priv->rps.max_freq);
+	WARN_ON(val < dev_priv->rps.min_freq);
 
 	/* min/max delay may still have been modified so be sure to
 	 * write the limits value.
@@ -3979,8 +3979,8 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
-	WARN_ON(val > dev_priv->rps.max_freq_softlimit);
-	WARN_ON(val < dev_priv->rps.min_freq_softlimit);
+	WARN_ON(val > dev_priv->rps.max_freq);
+	WARN_ON(val < dev_priv->rps.min_freq);
 
 	if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
 		      "Odd GPU freq value\n"))
@@ -4007,10 +4007,11 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
+	u32 val = dev_priv->rps.idle_freq;
 
 	/* CHV and latest VLV don't need to force the gfx clock */
 	if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
-		valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+		valleyview_set_rps(dev_priv->dev, val);
 		return;
 	}
 
@@ -4018,7 +4019,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 	 * When we are idle.  Drop to min voltage state.
 	 */
 
-	if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
+	if (dev_priv->rps.cur_freq <= val)
 		return;
 
 	/* Mask turbo interrupt so that they will not come in between */
@@ -4027,10 +4028,9 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 
 	vlv_force_gfx_clock(dev_priv, true);
 
-	dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
+	dev_priv->rps.cur_freq = val;
 
-	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
-					dev_priv->rps.min_freq_softlimit);
+	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
 
 	if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
 				& GENFREQSTATUS) == 0, 100))
@@ -4038,8 +4038,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 
 	vlv_force_gfx_clock(dev_priv, false);
 
-	I915_WRITE(GEN6_PMINTRMSK,
-		   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -4051,7 +4050,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 		if (IS_VALLEYVIEW(dev))
 			vlv_set_rps_idle(dev_priv);
 		else
-			gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+			gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
 		dev_priv->rps.last_adj = 0;
 	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
@@ -4209,6 +4208,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
 					dev_priv->rps.max_freq);
 	}
 
+	dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
+
 	/* Preserve min/max settings in case of re-init */
 	if (dev_priv->rps.max_freq_softlimit == 0)
 		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
@@ -4375,7 +4376,7 @@ static void gen8_enable_rps(struct drm_device *dev)
 	/* 6: Ring frequency + overclocking (our driver does this later */
 
 	dev_priv->rps.power = HIGH_POWER; /* force a reset */
-	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+	gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
@@ -4469,7 +4470,7 @@ static void gen6_enable_rps(struct drm_device *dev)
 	}
 
 	dev_priv->rps.power = HIGH_POWER; /* force a reset */
-	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+	gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
 
 	rc6vids = 0;
 	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
@@ -4834,6 +4835,8 @@ static void valleyview_init_gt_powersave(struct drm_device *dev)
 			 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
 			 dev_priv->rps.min_freq);
 
+	dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
+
 	/* Preserve min/max settings in case of re-init */
 	if (dev_priv->rps.max_freq_softlimit == 0)
 		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
@@ -4909,6 +4912,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
 		   dev_priv->rps.min_freq) & 1,
 		  "Odd GPU freq values\n");
 
+	dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
+
 	/* Preserve min/max settings in case of re-init */
 	if (dev_priv->rps.max_freq_softlimit == 0)
 		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
@@ -5686,6 +5691,13 @@ static void intel_gen6_powersave_work(struct work_struct *work)
 		gen6_enable_rps(dev);
 		__gen6_update_ring_freq(dev);
 	}
+
+	WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
+	WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
+
+	WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
+	WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
+
 	dev_priv->rps.enabled = true;
 
 	gen6_enable_rps_interrupts(dev);
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 2/7] drm/i915: Improved w/a for rps on Baytrail
  2015-03-18  9:48 RPS tuning for VLV and pageflips Chris Wilson
  2015-03-18  9:48 ` [PATCH v2 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
@ 2015-03-18  9:48 ` Chris Wilson
  2015-03-18  9:48 ` [PATCH v2 3/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2015-03-18  9:48 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Rodrigo Vivi

Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212
Author: Deepak S <deepak.s@linux.intel.com>
Date:   Thu Jul 3 17:33:01 2014 -0400

    drm/i915/vlv: WA for Turbo and RC6 to work together.

Other than code clarity, the major improvement is to disable the extra
interrupts generated when idle.  However, the reclocking remains rather
slow under the new manual regime, in particular it fails to downclock as
quickly as desired. The second major improvement is that for certain
workloads, like games, we need to combine render+media activity counters
as the work of displaying the frame is split across the engines and both
need to be taken into account when deciding the global GPU frequency as
memory cycles are shared.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Deepak S<deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c      | 155 +++++++++++++----------------------
 drivers/gpu/drm/i915/i915_reg.h      |   4 +-
 drivers/gpu/drm/i915/intel_display.c |   2 +
 drivers/gpu/drm/i915/intel_drv.h     |   2 +
 drivers/gpu/drm/i915/intel_pm.c      |  22 ++++-
 5 files changed, 81 insertions(+), 104 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 49ad5fb82ace..8d8d33d068dd 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -997,129 +997,84 @@ static void notify_ring(struct drm_device *dev,
 	wake_up_all(&ring->irq_queue);
 }
 
-static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
-			    struct intel_rps_ei *rps_ei)
+static void vlv_c0_read(struct drm_i915_private *dev_priv,
+			struct intel_rps_ei *ei)
 {
-	u32 cz_ts, cz_freq_khz;
-	u32 render_count, media_count;
-	u32 elapsed_render, elapsed_media, elapsed_time;
-	u32 residency = 0;
-
-	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
-	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
-
-	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
-	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
-
-	if (rps_ei->cz_clock == 0) {
-		rps_ei->cz_clock = cz_ts;
-		rps_ei->render_c0 = render_count;
-		rps_ei->media_c0 = media_count;
-
-		return dev_priv->rps.cur_freq;
-	}
-
-	elapsed_time = cz_ts - rps_ei->cz_clock;
-	rps_ei->cz_clock = cz_ts;
+	ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
+	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
+}
 
-	elapsed_render = render_count - rps_ei->render_c0;
-	rps_ei->render_c0 = render_count;
+static bool vlv_c0_above(struct drm_i915_private *dev_priv,
+			 const struct intel_rps_ei *old,
+			 const struct intel_rps_ei *now,
+			 int threshold)
+{
+	u64 time, c0;
 
-	elapsed_media = media_count - rps_ei->media_c0;
-	rps_ei->media_c0 = media_count;
+	if (old->cz_clock == 0)
+		return false;
 
-	/* Convert all the counters into common unit of milli sec */
-	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
-	elapsed_render /=  cz_freq_khz;
-	elapsed_media /= cz_freq_khz;
+	time = now->cz_clock - old->cz_clock;
+	time *= threshold * dev_priv->mem_freq;
 
-	/*
-	 * Calculate overall C0 residency percentage
-	 * only if elapsed time is non zero
+	/* Workload can be split between render + media, e.g. SwapBuffers
+	 * being blitted in X after being rendered in mesa. To account for
+	 * this we need to combine both engines into our activity counter.
 	 */
-	if (elapsed_time) {
-		residency =
-			((max(elapsed_render, elapsed_media) * 100)
-				/ elapsed_time);
-	}
+	c0 = now->render_c0 - old->render_c0;
+	c0 += now->media_c0 - old->media_c0;
+	c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
 
-	return residency;
+	return c0 >= time;
 }
 
-/**
- * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
- * busy-ness calculated from C0 counters of render & media power wells
- * @dev_priv: DRM device private
- *
- */
-static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
 {
-	u32 residency_C0_up = 0, residency_C0_down = 0;
-	int new_delay, adj;
-
-	dev_priv->rps.ei_interrupt_count++;
-
-	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
+	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+	dev_priv->rps.ei_interrupt_count = 0;
+}
 
+static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	struct intel_rps_ei now;
+	u32 events = 0;
 
-	if (dev_priv->rps.up_ei.cz_clock == 0) {
-		vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei);
-		vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
-		return dev_priv->rps.cur_freq;
-	}
+	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+		return 0;
 
+	vlv_c0_read(dev_priv, &now);
+	if (now.cz_clock == 0)
+		return 0;
 
 	/*
 	 * To down throttle, C0 residency should be less than down threshold
 	 * for continous EI intervals. So calculate down EI counters
 	 * once in VLV_INT_COUNT_FOR_DOWN_EI
 	 */
-	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
-
+	if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
+		pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
 		dev_priv->rps.ei_interrupt_count = 0;
-
-		residency_C0_down = vlv_c0_residency(dev_priv,
-						     &dev_priv->rps.down_ei);
-	} else {
-		residency_C0_up = vlv_c0_residency(dev_priv,
-						   &dev_priv->rps.up_ei);
 	}
 
-	new_delay = dev_priv->rps.cur_freq;
-
-	adj = dev_priv->rps.last_adj;
-	/* C0 residency is greater than UP threshold. Increase Frequency */
-	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
-		if (adj > 0)
-			adj *= 2;
-		else
-			adj = 1;
-
-		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
-			new_delay = dev_priv->rps.cur_freq + adj;
-
-		/*
-		 * For better performance, jump directly
-		 * to RPe if we're below it.
-		 */
-		if (new_delay < dev_priv->rps.efficient_freq)
-			new_delay = dev_priv->rps.efficient_freq;
+	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
+		if (!vlv_c0_above(dev_priv,
+				  &dev_priv->rps.down_ei, &now,
+				  VLV_RP_DOWN_EI_THRESHOLD))
+			events |= GEN6_PM_RP_DOWN_THRESHOLD;
+		dev_priv->rps.down_ei = now;
+	}
 
-	} else if (!dev_priv->rps.ei_interrupt_count &&
-			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
-		if (adj < 0)
-			adj *= 2;
-		else
-			adj = -1;
-		/*
-		 * This means, C0 residency is less than down threshold over
-		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
-		 */
-		if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
-			new_delay = dev_priv->rps.cur_freq + adj;
+	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		if (vlv_c0_above(dev_priv,
+				 &dev_priv->rps.up_ei, &now,
+				 VLV_RP_UP_EI_THRESHOLD))
+			events |= GEN6_PM_RP_UP_THRESHOLD;
+		dev_priv->rps.up_ei = now;
 	}
 
-	return new_delay;
+	return events;
 }
 
 static void gen6_pm_rps_work(struct work_struct *work)
@@ -1149,6 +1104,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 
 	mutex_lock(&dev_priv->rps.hw_lock);
 
+	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
+
 	adj = dev_priv->rps.last_adj;
 	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
 		if (adj > 0)
@@ -1171,8 +1128,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		else
 			new_delay = dev_priv->rps.min_freq_softlimit;
 		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
-		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index cc8ebabc488d..2d76c566d843 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6220,8 +6220,8 @@ enum skl_disp_power_wells {
 
 #define GEN6_GT_GFX_RC6p			0x13810C
 #define GEN6_GT_GFX_RC6pp			0x138110
-#define VLV_RENDER_C0_COUNT_REG		0x138118
-#define VLV_MEDIA_C0_COUNT_REG			0x13811C
+#define VLV_RENDER_C0_COUNT			0x138118
+#define VLV_MEDIA_C0_COUNT			0x13811C
 
 #define GEN6_PCODE_MAILBOX			0x138124
 #define   GEN6_PCODE_READY			(1<<31)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 90b460cf2b57..f1c0295f69e5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9201,6 +9201,8 @@ void intel_mark_busy(struct drm_device *dev)
 
 	intel_runtime_pm_get(dev_priv);
 	i915_update_gfx_val(dev_priv);
+	if (INTEL_INFO(dev)->gen >= 6)
+		gen6_rps_busy(dev_priv);
 	dev_priv->mm.busy = true;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a1baaa188b0a..8bb18e507f5f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1242,6 +1242,8 @@ void intel_disable_gt_powersave(struct drm_device *dev);
 void intel_suspend_gt_powersave(struct drm_device *dev);
 void intel_reset_gt_powersave(struct drm_device *dev);
 void gen6_update_ring_freq(struct drm_device *dev);
+void gen6_rps_busy(struct drm_i915_private *dev_priv);
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct drm_i915_private *dev_priv);
 void ilk_wm_get_hw_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index beab305e320d..68c9cc252d36 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4041,6 +4041,18 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 }
 
+void gen6_rps_busy(struct drm_i915_private *dev_priv)
+{
+	mutex_lock(&dev_priv->rps.hw_lock);
+	if (dev_priv->rps.enabled) {
+		if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+			gen6_rps_reset_ei(dev_priv);
+		I915_WRITE(GEN6_PMINTRMSK,
+			   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+	}
+	mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -4052,15 +4064,21 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 		else
 			gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
 		dev_priv->rps.last_adj = 0;
+		I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
 	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
 void gen6_rps_boost(struct drm_i915_private *dev_priv)
 {
+	u32 val;
+
 	mutex_lock(&dev_priv->rps.hw_lock);
-	if (dev_priv->rps.enabled) {
-		intel_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
+	val = dev_priv->rps.max_freq_softlimit;
+	if (dev_priv->rps.enabled &&
+	    dev_priv->mm.busy &&
+	    dev_priv->rps.cur_freq < val) {
+		intel_set_rps(dev_priv->dev, val);
 		dev_priv->rps.last_adj = 0;
 	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 3/7] drm/i915: Use down ei for manual Baytrail RPS calculations
  2015-03-18  9:48 RPS tuning for VLV and pageflips Chris Wilson
  2015-03-18  9:48 ` [PATCH v2 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
  2015-03-18  9:48 ` [PATCH v2 2/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
@ 2015-03-18  9:48 ` Chris Wilson
  2015-03-18 13:53   ` Daniel Vetter
  2015-03-18  9:48 ` [PATCH v2 4/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2015-03-18  9:48 UTC (permalink / raw)
  To: intel-gfx

Use both up/down manual ei calcuations for symmetry and greater
flexibility for reclocking, instead of faking the down interrupt based
on a fixed integer number of up interrupts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Deepak S<deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  2 --
 drivers/gpu/drm/i915/i915_irq.c | 15 ++-------------
 drivers/gpu/drm/i915/i915_reg.h |  1 -
 drivers/gpu/drm/i915/intel_pm.c |  5 ++---
 4 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a06536cfce6d..b156bc30c9c9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1031,8 +1031,6 @@ struct intel_gen6_power_mgmt {
 	u8 rp0_freq;		/* Non-overclocked max frequency. */
 	u32 cz_freq;
 
-	u32 ei_interrupt_count;
-
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8d8d33d068dd..6d8340d5a111 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1033,7 +1033,6 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
 {
 	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
 	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
-	dev_priv->rps.ei_interrupt_count = 0;
 }
 
 static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
@@ -1041,23 +1040,13 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 	struct intel_rps_ei now;
 	u32 events = 0;
 
-	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+	if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
 		return 0;
 
 	vlv_c0_read(dev_priv, &now);
 	if (now.cz_clock == 0)
 		return 0;
 
-	/*
-	 * To down throttle, C0 residency should be less than down threshold
-	 * for continous EI intervals. So calculate down EI counters
-	 * once in VLV_INT_COUNT_FOR_DOWN_EI
-	 */
-	if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
-		pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
-		dev_priv->rps.ei_interrupt_count = 0;
-	}
-
 	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
 		if (!vlv_c0_above(dev_priv,
 				  &dev_priv->rps.down_ei, &now,
@@ -4254,7 +4243,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	/* Let's track the enabled rps events */
 	if (IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
 		/* WaGsvRC0ResidencyMethod:vlv */
-		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+		dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
 	else
 		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 2d76c566d843..5b84ee686f99 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -673,7 +673,6 @@ enum skl_disp_power_wells {
 #define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
 #define VLV_RP_UP_EI_THRESHOLD			90
 #define VLV_RP_DOWN_EI_THRESHOLD		70
-#define VLV_INT_COUNT_FOR_DOWN_EI		5
 
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 68c9cc252d36..e18f0fd22cf2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3922,11 +3922,10 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
 	u32 mask = 0;
 
 	if (val > dev_priv->rps.min_freq_softlimit)
-		mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+		mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
 	if (val < dev_priv->rps.max_freq_softlimit)
-		mask |= GEN6_PM_RP_UP_THRESHOLD;
+		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
 
-	mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
 	mask &= dev_priv->pm_rps_events;
 
 	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 4/7] drm/i915: Agressive downclocking on Baytrail
  2015-03-18  9:48 RPS tuning for VLV and pageflips Chris Wilson
                   ` (2 preceding siblings ...)
  2015-03-18  9:48 ` [PATCH v2 3/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
@ 2015-03-18  9:48 ` Chris Wilson
  2015-03-18 11:15   ` Deepak S
  2015-03-18  9:48 ` [PATCH v2 5/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2015-03-18  9:48 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Rodrigo Vivi

Reuse the same reclocking strategy for Baytail as on its bigger brethren,
Sandybridge and Ivybridge. In particular, this makes the device quicker
to reclock (both up and down) though the tendency now is to downclock
more aggressively to compensate for the RPS boosts.

v2: Rebase

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h | 3 +++
 drivers/gpu/drm/i915/i915_irq.c | 4 ++--
 drivers/gpu/drm/i915/i915_reg.h | 2 --
 drivers/gpu/drm/i915/intel_pm.c | 7 ++++++-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b156bc30c9c9..afb552c1a4f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1031,6 +1031,9 @@ struct intel_gen6_power_mgmt {
 	u8 rp0_freq;		/* Non-overclocked max frequency. */
 	u32 cz_freq;
 
+	u8 up_threshold; /* Current %busy required to uplock */
+	u8 down_threshold; /* Current %busy required to downclock */
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6d8340d5a111..58af8e239971 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1050,7 +1050,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
 		if (!vlv_c0_above(dev_priv,
 				  &dev_priv->rps.down_ei, &now,
-				  VLV_RP_DOWN_EI_THRESHOLD))
+				  dev_priv->rps.down_threshold))
 			events |= GEN6_PM_RP_DOWN_THRESHOLD;
 		dev_priv->rps.down_ei = now;
 	}
@@ -1058,7 +1058,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
 		if (vlv_c0_above(dev_priv,
 				 &dev_priv->rps.up_ei, &now,
-				 VLV_RP_UP_EI_THRESHOLD))
+				 dev_priv->rps.up_threshold))
 			events |= GEN6_PM_RP_UP_THRESHOLD;
 		dev_priv->rps.up_ei = now;
 	}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5b84ee686f99..c94c06b21052 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -671,8 +671,6 @@ enum skl_disp_power_wells {
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
 #define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
-#define VLV_RP_UP_EI_THRESHOLD			90
-#define VLV_RP_DOWN_EI_THRESHOLD		70
 
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e18f0fd22cf2..8b16bb3ae09f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3914,6 +3914,8 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		    GEN6_RP_DOWN_IDLE_AVG);
 
 	dev_priv->rps.power = new_power;
+	dev_priv->rps.up_threshold = threshold_up;
+	dev_priv->rps.down_threshold = threshold_down;
 	dev_priv->rps.last_adj = 0;
 }
 
@@ -3985,8 +3987,10 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
 		      "Odd GPU freq value\n"))
 		val &= ~1;
 
-	if (val != dev_priv->rps.cur_freq)
+	if (val != dev_priv->rps.cur_freq) {
 		vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+		gen6_set_rps_thresholds(dev_priv, val);
+	}
 
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
@@ -4035,6 +4039,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 				& GENFREQSTATUS) == 0, 100))
 		DRM_ERROR("timed out waiting for Punit\n");
 
+	gen6_set_rps_thresholds(dev_priv, val);
 	vlv_force_gfx_clock(dev_priv, false);
 
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 5/7] drm/i915: Fix computation of last_adjustment for RPS autotuning
  2015-03-18  9:48 RPS tuning for VLV and pageflips Chris Wilson
                   ` (3 preceding siblings ...)
  2015-03-18  9:48 ` [PATCH v2 4/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
@ 2015-03-18  9:48 ` Chris Wilson
  2015-03-18  9:48 ` [PATCH v2 6/7] drm/i915: Add i915_gem_request_unreference__unlocked Chris Wilson
  2015-03-18  9:48 ` [PATCH v2 7/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
  6 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2015-03-18  9:48 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

The issue is that by computing the last_adj value after applying the
clamping, we can end up with a bogus value for feeding into the next RPS
autotuning step.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Deepak S <deepak.s@linux.intel.com>
Reviewed-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 58af8e239971..8bcadfe8f00c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1096,21 +1096,20 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
 
 	adj = dev_priv->rps.last_adj;
+	new_delay = dev_priv->rps.cur_freq;
 	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
 		if (adj > 0)
 			adj *= 2;
-		else {
-			/* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv->dev) ? 2 : 1;
-		}
-		new_delay = dev_priv->rps.cur_freq + adj;
-
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
 		/*
 		 * For better performance, jump directly
 		 * to RPe if we're below it.
 		 */
-		if (new_delay < dev_priv->rps.efficient_freq)
+		if (new_delay < dev_priv->rps.efficient_freq - adj) {
 			new_delay = dev_priv->rps.efficient_freq;
+			adj = 0;
+		}
 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
 		if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
 			new_delay = dev_priv->rps.efficient_freq;
@@ -1120,24 +1119,22 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
-		else {
-			/* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv->dev) ? -2 : -1;
-		}
-		new_delay = dev_priv->rps.cur_freq + adj;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
 	} else { /* unknown event */
-		new_delay = dev_priv->rps.cur_freq;
+		adj = 0;
 	}
 
+	dev_priv->rps.last_adj = adj;
+
 	/* sysfs frequency interfaces may have snuck in while servicing the
 	 * interrupt
 	 */
+	new_delay += adj;
 	new_delay = clamp_t(int, new_delay,
 			    dev_priv->rps.min_freq_softlimit,
 			    dev_priv->rps.max_freq_softlimit);
 
-	dev_priv->rps.last_adj = new_delay - dev_priv->rps.cur_freq;
-
 	intel_set_rps(dev_priv->dev, new_delay);
 
 	mutex_unlock(&dev_priv->rps.hw_lock);
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 6/7] drm/i915: Add i915_gem_request_unreference__unlocked
  2015-03-18  9:48 RPS tuning for VLV and pageflips Chris Wilson
                   ` (4 preceding siblings ...)
  2015-03-18  9:48 ` [PATCH v2 5/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
@ 2015-03-18  9:48 ` Chris Wilson
  2015-03-18  9:48 ` [PATCH v2 7/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
  6 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2015-03-18  9:48 UTC (permalink / raw)
  To: intel-gfx

We were missing a convenience stub to aquire the right mutex whilst
dropping the request, so add it.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h | 13 +++++++++++++
 drivers/gpu/drm/i915/i915_gem.c |  8 ++------
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index afb552c1a4f8..eaf21605738f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2143,6 +2143,19 @@ i915_gem_request_unreference(struct drm_i915_gem_request *req)
 	kref_put(&req->ref, i915_gem_request_free);
 }
 
+static inline void
+i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req)
+{
+	if (req && !atomic_add_unless(&req->ref.refcount, -1, 1)) {
+		struct drm_device *dev = req->ring->dev;
+
+		mutex_lock(&dev->struct_mutex);
+		if (likely(atomic_dec_and_test(&req->ref.refcount)))
+			i915_gem_request_free(&req->ref);
+		mutex_unlock(&dev->struct_mutex);
+	}
+}
+
 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 					   struct drm_i915_gem_request *src)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e2876bf83da7..3df65fc2aa74 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2957,9 +2957,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	ret = __i915_wait_request(req, reset_counter, true,
 				  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
 				  file->driver_priv);
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_request_unreference(req);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_request_unreference__unlocked(req);
 	return ret;
 
 out:
@@ -4146,9 +4144,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	if (ret == 0)
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
 
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_request_unreference(target);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_request_unreference__unlocked(target);
 
 	return ret;
 }
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 7/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips
  2015-03-18  9:48 RPS tuning for VLV and pageflips Chris Wilson
                   ` (5 preceding siblings ...)
  2015-03-18  9:48 ` [PATCH v2 6/7] drm/i915: Add i915_gem_request_unreference__unlocked Chris Wilson
@ 2015-03-18  9:48 ` Chris Wilson
  2015-03-18 11:16   ` Deepak S
  6 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2015-03-18  9:48 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

If we hit a vblank and see that have a pageflip queue but not yet
processed, ensure that the GPU is running at maximum in order to clear
the backlog. Pageflips are only queued for the following vblank, if we
miss it, there will be a visible stutter. Boosting the GPU frequency
doesn't prevent us from missing the target vblank, but it should help
the subsequent frames hitting theirs.

v2: Reorder vblank vs flip-complete so that we only check for a missed
flip after processing the completion events, and avoid spurious boosts.

v3: Rename missed_vblank
v4: Rebase
v5: Cancel the outstanding work in runtime suspend
v6: Rebase
v7: Rebase required fixing

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Deepak S<deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 11 ++++++++---
 drivers/gpu/drm/i915/intel_drv.h     |  2 ++
 drivers/gpu/drm/i915/intel_pm.c      | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f1c0295f69e5..0efb19a9b9a5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9852,6 +9852,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_unpin_work *work;
 
 	WARN_ON(!in_interrupt());
 
@@ -9859,12 +9860,16 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
 		return;
 
 	spin_lock(&dev->event_lock);
-	if (intel_crtc->unpin_work && __intel_pageflip_stall_check(dev, crtc)) {
+	work = intel_crtc->unpin_work;
+	if (work != NULL && __intel_pageflip_stall_check(dev, crtc)) {
 		WARN_ONCE(1, "Kicking stuck page flip: queued at %d, now %d\n",
-			 intel_crtc->unpin_work->flip_queued_vblank,
-			 drm_vblank_count(dev, pipe));
+			 work->flip_queued_vblank, drm_vblank_count(dev, pipe));
 		page_flip_completed(intel_crtc);
+		work = NULL;
 	}
+	if (work != NULL &&
+	    drm_vblank_count(dev, pipe) - work->flip_queued_vblank > 1)
+		intel_queue_rps_boost_for_request(dev, work->flip_queued_req);
 	spin_unlock(&dev->event_lock);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 8bb18e507f5f..d6e7ac8c2284 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1246,6 +1246,8 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct drm_i915_private *dev_priv);
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+				       struct drm_i915_gem_request *rq);
 void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 8b16bb3ae09f..e8111be32ed0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6751,6 +6751,41 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 		return val / GT_FREQUENCY_MULTIPLIER;
 }
 
+struct request_boost {
+	struct work_struct work;
+	struct drm_i915_gem_request *rq;
+};
+
+static void __intel_rps_boost_work(struct work_struct *work)
+{
+	struct request_boost *boost = container_of(work, struct request_boost, work);
+
+	if (!i915_gem_request_completed(boost->rq, true))
+		gen6_rps_boost(to_i915(boost->rq->ring->dev));
+
+	i915_gem_request_unreference__unlocked(boost->rq);
+	kfree(boost);
+}
+
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+				       struct drm_i915_gem_request *rq)
+{
+	struct request_boost *boost;
+
+	if (rq == NULL || INTEL_INFO(dev)->gen < 6)
+		return;
+
+	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
+	if (boost == NULL)
+		return;
+
+	i915_gem_request_reference(rq);
+	boost->rq = rq;
+	 
+	INIT_WORK(&boost->work, __intel_rps_boost_work);
+	queue_work(to_i915(dev)->wq, &boost->work);
+}
+
 void intel_pm_setup(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle
  2015-03-18  9:48 ` [PATCH v2 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
@ 2015-03-18 11:12   ` Deepak S
  0 siblings, 0 replies; 14+ messages in thread
From: Deepak S @ 2015-03-18 11:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



On Wednesday 18 March 2015 03:18 PM, Chris Wilson wrote:
> When we idle, we set the GPU frequency to the hardware minimum (not user
> minimum). We introduce a new variable to distinguish between the
> different roles, and to allow easy tuning of the idle frequency without
> impacting over aspects of RPS. Setting the minimum frequency should be a
> safety blanket as the pcu on the GPU should be power gating itself
> anyway. However, in order for us to do set the absolute minimum
> frequency, we need to relax a few of our assertions that we do not
> exceed the user limits.
>
> v2: Add idle_freq
> v3: Init idle_freq for vlv and add a bunch of WARNs
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Deepak S <deepak.s@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c |  6 +++++
>   drivers/gpu/drm/i915/i915_drv.h     |  1 +
>   drivers/gpu/drm/i915/intel_pm.c     | 44 +++++++++++++++++++++++--------------
>   3 files changed, 35 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index aaf756047a20..007c7d7d8295 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1200,6 +1200,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>   
>   		seq_printf(m, "Max overclocked frequency: %dMHz\n",
>   			   intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
> +
> +		seq_printf(m, "Idle freq: %d MHz\n",
> +			   intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
>   	} else if (IS_VALLEYVIEW(dev)) {
>   		u32 freq_sts;
>   
> @@ -1214,6 +1217,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>   		seq_printf(m, "min GPU freq: %d MHz\n",
>   			   intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
>   
> +		seq_printf(m, "idle GPU freq: %d MHz\n",
> +			   intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
> +
>   		seq_printf(m,
>   			   "efficient (RPe) frequency: %d MHz\n",
>   			   intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 81f60b48def2..a06536cfce6d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1025,6 +1025,7 @@ struct intel_gen6_power_mgmt {
>   	u8 max_freq_softlimit;	/* Max frequency permitted by the driver */
>   	u8 max_freq;		/* Maximum frequency, RP0 if not overclocking */
>   	u8 min_freq;		/* AKA RPn. Minimum frequency */
> +	u8 idle_freq;		/* Frequency to request when we are idle */
>   	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
>   	u8 rp1_freq;		/* "less than" RP0 power/freqency */
>   	u8 rp0_freq;		/* Non-overclocked max frequency. */
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 288c9d24098e..beab305e320d 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3855,9 +3855,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
>   		break;
>   	}
>   	/* Max/min bins are special */
> -	if (val == dev_priv->rps.min_freq_softlimit)
> +	if (val <= dev_priv->rps.min_freq_softlimit)
>   		new_power = LOW_POWER;
> -	if (val == dev_priv->rps.max_freq_softlimit)
> +	if (val >= dev_priv->rps.max_freq_softlimit)
>   		new_power = HIGH_POWER;
>   	if (new_power == dev_priv->rps.power)
>   		return;
> @@ -3940,8 +3940,8 @@ static void gen6_set_rps(struct drm_device *dev, u8 val)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   
>   	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> -	WARN_ON(val > dev_priv->rps.max_freq_softlimit);
> -	WARN_ON(val < dev_priv->rps.min_freq_softlimit);
> +	WARN_ON(val > dev_priv->rps.max_freq);
> +	WARN_ON(val < dev_priv->rps.min_freq);
>   
>   	/* min/max delay may still have been modified so be sure to
>   	 * write the limits value.
> @@ -3979,8 +3979,8 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   
>   	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> -	WARN_ON(val > dev_priv->rps.max_freq_softlimit);
> -	WARN_ON(val < dev_priv->rps.min_freq_softlimit);
> +	WARN_ON(val > dev_priv->rps.max_freq);
> +	WARN_ON(val < dev_priv->rps.min_freq);
>   
>   	if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
>   		      "Odd GPU freq value\n"))
> @@ -4007,10 +4007,11 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
>   static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>   {
>   	struct drm_device *dev = dev_priv->dev;
> +	u32 val = dev_priv->rps.idle_freq;
>   
>   	/* CHV and latest VLV don't need to force the gfx clock */
>   	if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
> -		valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> +		valleyview_set_rps(dev_priv->dev, val);
>   		return;
>   	}
>   
> @@ -4018,7 +4019,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>   	 * When we are idle.  Drop to min voltage state.
>   	 */
>   
> -	if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
> +	if (dev_priv->rps.cur_freq <= val)
>   		return;
>   
>   	/* Mask turbo interrupt so that they will not come in between */
> @@ -4027,10 +4028,9 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>   
>   	vlv_force_gfx_clock(dev_priv, true);
>   
> -	dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
> +	dev_priv->rps.cur_freq = val;
>   
> -	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
> -					dev_priv->rps.min_freq_softlimit);
> +	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
>   
>   	if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
>   				& GENFREQSTATUS) == 0, 100))
> @@ -4038,8 +4038,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>   
>   	vlv_force_gfx_clock(dev_priv, false);
>   
> -	I915_WRITE(GEN6_PMINTRMSK,
> -		   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> +	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
>   }
>   
>   void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -4051,7 +4050,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
>   		if (IS_VALLEYVIEW(dev))
>   			vlv_set_rps_idle(dev_priv);
>   		else
> -			gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> +			gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
>   		dev_priv->rps.last_adj = 0;
>   	}
>   	mutex_unlock(&dev_priv->rps.hw_lock);
> @@ -4209,6 +4208,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
>   					dev_priv->rps.max_freq);
>   	}
>   
> +	dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
> +
>   	/* Preserve min/max settings in case of re-init */
>   	if (dev_priv->rps.max_freq_softlimit == 0)
>   		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
> @@ -4375,7 +4376,7 @@ static void gen8_enable_rps(struct drm_device *dev)
>   	/* 6: Ring frequency + overclocking (our driver does this later */
>   
>   	dev_priv->rps.power = HIGH_POWER; /* force a reset */
> -	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> +	gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
>   
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   }
> @@ -4469,7 +4470,7 @@ static void gen6_enable_rps(struct drm_device *dev)
>   	}
>   
>   	dev_priv->rps.power = HIGH_POWER; /* force a reset */
> -	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> +	gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
>   
>   	rc6vids = 0;
>   	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
> @@ -4834,6 +4835,8 @@ static void valleyview_init_gt_powersave(struct drm_device *dev)
>   			 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
>   			 dev_priv->rps.min_freq);
>   
> +	dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
> +
>   	/* Preserve min/max settings in case of re-init */
>   	if (dev_priv->rps.max_freq_softlimit == 0)
>   		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
> @@ -4909,6 +4912,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
>   		   dev_priv->rps.min_freq) & 1,
>   		  "Odd GPU freq values\n");
>   
> +	dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
> +
>   	/* Preserve min/max settings in case of re-init */
>   	if (dev_priv->rps.max_freq_softlimit == 0)
>   		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
> @@ -5686,6 +5691,13 @@ static void intel_gen6_powersave_work(struct work_struct *work)
>   		gen6_enable_rps(dev);
>   		__gen6_update_ring_freq(dev);
>   	}
> +
> +	WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq);
> +	WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq);
> +
> +	WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq);
> +	WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq);
> +
>   	dev_priv->rps.enabled = true;
>   
>   	gen6_enable_rps_interrupts(dev);
>
Looks fine now :)

Reviewed-by: Deepak S<deepak.s@linux.intel.com>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 4/7] drm/i915: Agressive downclocking on Baytrail
  2015-03-18  9:48 ` [PATCH v2 4/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
@ 2015-03-18 11:15   ` Deepak S
  2015-03-18 11:23     ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Deepak S @ 2015-03-18 11:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter, Rodrigo Vivi



On Wednesday 18 March 2015 03:18 PM, Chris Wilson wrote:
> Reuse the same reclocking strategy for Baytail as on its bigger brethren,
> Sandybridge and Ivybridge. In particular, this makes the device quicker
> to reclock (both up and down) though the tendency now is to downclock
> more aggressively to compensate for the RPS boosts.
>
> v2: Rebase
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Deepak S <deepak.s@linux.intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>   drivers/gpu/drm/i915/i915_drv.h | 3 +++
>   drivers/gpu/drm/i915/i915_irq.c | 4 ++--
>   drivers/gpu/drm/i915/i915_reg.h | 2 --
>   drivers/gpu/drm/i915/intel_pm.c | 7 ++++++-
>   4 files changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b156bc30c9c9..afb552c1a4f8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1031,6 +1031,9 @@ struct intel_gen6_power_mgmt {
>   	u8 rp0_freq;		/* Non-overclocked max frequency. */
>   	u32 cz_freq;
>   
> +	u8 up_threshold; /* Current %busy required to uplock */
> +	u8 down_threshold; /* Current %busy required to downclock */
> +
>   	int last_adj;
>   	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>   
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 6d8340d5a111..58af8e239971 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1050,7 +1050,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
>   	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
>   		if (!vlv_c0_above(dev_priv,
>   				  &dev_priv->rps.down_ei, &now,
> -				  VLV_RP_DOWN_EI_THRESHOLD))
> +				  dev_priv->rps.down_threshold))
>   			events |= GEN6_PM_RP_DOWN_THRESHOLD;
>   		dev_priv->rps.down_ei = now;
>   	}
> @@ -1058,7 +1058,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
>   	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
>   		if (vlv_c0_above(dev_priv,
>   				 &dev_priv->rps.up_ei, &now,
> -				 VLV_RP_UP_EI_THRESHOLD))
> +				 dev_priv->rps.up_threshold))
>   			events |= GEN6_PM_RP_UP_THRESHOLD;
>   		dev_priv->rps.up_ei = now;
>   	}
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 5b84ee686f99..c94c06b21052 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -671,8 +671,6 @@ enum skl_disp_power_wells {
>   #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>   
>   #define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
> -#define VLV_RP_UP_EI_THRESHOLD			90
> -#define VLV_RP_DOWN_EI_THRESHOLD		70
>   
>   /* vlv2 north clock has */
>   #define CCK_FUSE_REG				0x8
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index e18f0fd22cf2..8b16bb3ae09f 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3914,6 +3914,8 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
>   		    GEN6_RP_DOWN_IDLE_AVG);
>   
>   	dev_priv->rps.power = new_power;
> +	dev_priv->rps.up_threshold = threshold_up;
> +	dev_priv->rps.down_threshold = threshold_down;
>   	dev_priv->rps.last_adj = 0;
>   }
>   
> @@ -3985,8 +3987,10 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
>   		      "Odd GPU freq value\n"))
>   		val &= ~1;
>   
> -	if (val != dev_priv->rps.cur_freq)
> +	if (val != dev_priv->rps.cur_freq) {
>   		vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
> +		gen6_set_rps_thresholds(dev_priv, val);

I think gen6_set_rps_thresholds should be under baytrail specific with platform check?

> +	}
>   
>   	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
>   
> @@ -4035,6 +4039,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>   				& GENFREQSTATUS) == 0, 100))
>   		DRM_ERROR("timed out waiting for Punit\n");
>   
> +	gen6_set_rps_thresholds(dev_priv, val);
>   	vlv_force_gfx_clock(dev_priv, false);
>   
>   	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 7/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips
  2015-03-18  9:48 ` [PATCH v2 7/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
@ 2015-03-18 11:16   ` Deepak S
  0 siblings, 0 replies; 14+ messages in thread
From: Deepak S @ 2015-03-18 11:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter



On Wednesday 18 March 2015 03:18 PM, Chris Wilson wrote:
> If we hit a vblank and see that have a pageflip queue but not yet
> processed, ensure that the GPU is running at maximum in order to clear
> the backlog. Pageflips are only queued for the following vblank, if we
> miss it, there will be a visible stutter. Boosting the GPU frequency
> doesn't prevent us from missing the target vblank, but it should help
> the subsequent frames hitting theirs.
>
> v2: Reorder vblank vs flip-complete so that we only check for a missed
> flip after processing the completion events, and avoid spurious boosts.
>
> v3: Rename missed_vblank
> v4: Rebase
> v5: Cancel the outstanding work in runtime suspend
> v6: Rebase
> v7: Rebase required fixing
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Deepak S<deepak.s@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/intel_display.c | 11 ++++++++---
>   drivers/gpu/drm/i915/intel_drv.h     |  2 ++
>   drivers/gpu/drm/i915/intel_pm.c      | 35 +++++++++++++++++++++++++++++++++++
>   3 files changed, 45 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index f1c0295f69e5..0efb19a9b9a5 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9852,6 +9852,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	struct intel_unpin_work *work;
>   
>   	WARN_ON(!in_interrupt());
>   
> @@ -9859,12 +9860,16 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
>   		return;
>   
>   	spin_lock(&dev->event_lock);
> -	if (intel_crtc->unpin_work && __intel_pageflip_stall_check(dev, crtc)) {
> +	work = intel_crtc->unpin_work;
> +	if (work != NULL && __intel_pageflip_stall_check(dev, crtc)) {
>   		WARN_ONCE(1, "Kicking stuck page flip: queued at %d, now %d\n",
> -			 intel_crtc->unpin_work->flip_queued_vblank,
> -			 drm_vblank_count(dev, pipe));
> +			 work->flip_queued_vblank, drm_vblank_count(dev, pipe));
>   		page_flip_completed(intel_crtc);
> +		work = NULL;
>   	}
> +	if (work != NULL &&
> +	    drm_vblank_count(dev, pipe) - work->flip_queued_vblank > 1)
> +		intel_queue_rps_boost_for_request(dev, work->flip_queued_req);
>   	spin_unlock(&dev->event_lock);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 8bb18e507f5f..d6e7ac8c2284 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1246,6 +1246,8 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv);
>   void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
>   void gen6_rps_idle(struct drm_i915_private *dev_priv);
>   void gen6_rps_boost(struct drm_i915_private *dev_priv);
> +void intel_queue_rps_boost_for_request(struct drm_device *dev,
> +				       struct drm_i915_gem_request *rq);
>   void ilk_wm_get_hw_state(struct drm_device *dev);
>   void skl_wm_get_hw_state(struct drm_device *dev);
>   void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 8b16bb3ae09f..e8111be32ed0 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6751,6 +6751,41 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
>   		return val / GT_FREQUENCY_MULTIPLIER;
>   }
>   
> +struct request_boost {
> +	struct work_struct work;
> +	struct drm_i915_gem_request *rq;
> +};
> +
> +static void __intel_rps_boost_work(struct work_struct *work)
> +{
> +	struct request_boost *boost = container_of(work, struct request_boost, work);
> +
> +	if (!i915_gem_request_completed(boost->rq, true))
> +		gen6_rps_boost(to_i915(boost->rq->ring->dev));
> +
> +	i915_gem_request_unreference__unlocked(boost->rq);
> +	kfree(boost);
> +}
> +
> +void intel_queue_rps_boost_for_request(struct drm_device *dev,
> +				       struct drm_i915_gem_request *rq)
> +{
> +	struct request_boost *boost;
> +
> +	if (rq == NULL || INTEL_INFO(dev)->gen < 6)
> +		return;
> +
> +	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
> +	if (boost == NULL)
> +		return;
> +
> +	i915_gem_request_reference(rq);
> +	boost->rq = rq;
> +	
> +	INIT_WORK(&boost->work, __intel_rps_boost_work);
> +	queue_work(to_i915(dev)->wq, &boost->work);
> +}
> +
>   void intel_pm_setup(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;

Patch looks fine

Reviewed-by: Deepak S<deepak.s@linux.intel.com>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 4/7] drm/i915: Agressive downclocking on Baytrail
  2015-03-18 11:15   ` Deepak S
@ 2015-03-18 11:23     ` Chris Wilson
  2015-03-18 11:27       ` Deepak S
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2015-03-18 11:23 UTC (permalink / raw)
  To: Deepak S; +Cc: Daniel Vetter, intel-gfx, Rodrigo Vivi

On Wed, Mar 18, 2015 at 04:45:08PM +0530, Deepak S wrote:
> >+	if (val != dev_priv->rps.cur_freq) {
> >  		vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
> >+		gen6_set_rps_thresholds(dev_priv, val);
> 
> I think gen6_set_rps_thresholds should be under baytrail specific with platform check?

The only difference for cherryview is that it doesn't use
GEN6_RP_MEDIA_TURBO. Was that intentional?

The whole idea is that RPS should be autotuning for different workloads,
but those metrics are equivalent across GPU.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 4/7] drm/i915: Agressive downclocking on Baytrail
  2015-03-18 11:23     ` Chris Wilson
@ 2015-03-18 11:27       ` Deepak S
  0 siblings, 0 replies; 14+ messages in thread
From: Deepak S @ 2015-03-18 11:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Ville Syrjälä,
	Rodrigo Vivi, Daniel Vetter



On Wednesday 18 March 2015 04:53 PM, Chris Wilson wrote:
> On Wed, Mar 18, 2015 at 04:45:08PM +0530, Deepak S wrote:
>>> +	if (val != dev_priv->rps.cur_freq) {
>>>   		vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
>>> +		gen6_set_rps_thresholds(dev_priv, val);
>> I think gen6_set_rps_thresholds should be under baytrail specific with platform check?
> The only difference for cherryview is that it doesn't use
> GEN6_RP_MEDIA_TURBO. Was that intentional?
>
> The whole idea is that RPS should be autotuning for different workloads,
> but those metrics are equivalent across GPU.
> -Chris
>
Atleast based on the spec GEN6_RP_MEDIA_TURBO left out of CHV. I am yet to look at the latest Spec.

Also, Most of RP register for CHV falls under Comman well, I hope re-adjusting the rps_threshold will
not causing power issues since we have to wakeup both RENDER/MEDIA to access the register

-Deepak

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 3/7] drm/i915: Use down ei for manual Baytrail RPS calculations
  2015-03-18  9:48 ` [PATCH v2 3/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
@ 2015-03-18 13:53   ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2015-03-18 13:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Mar 18, 2015 at 09:48:23AM +0000, Chris Wilson wrote:
> Use both up/down manual ei calcuations for symmetry and greater
> flexibility for reclocking, instead of faking the down interrupt based
> on a fixed integer number of up interrupts.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Deepak S<deepak.s@linux.intel.com>

Merged up to this patch, thanks.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h |  2 --
>  drivers/gpu/drm/i915/i915_irq.c | 15 ++-------------
>  drivers/gpu/drm/i915/i915_reg.h |  1 -
>  drivers/gpu/drm/i915/intel_pm.c |  5 ++---
>  4 files changed, 4 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a06536cfce6d..b156bc30c9c9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1031,8 +1031,6 @@ struct intel_gen6_power_mgmt {
>  	u8 rp0_freq;		/* Non-overclocked max frequency. */
>  	u32 cz_freq;
>  
> -	u32 ei_interrupt_count;
> -
>  	int last_adj;
>  	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>  
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 8d8d33d068dd..6d8340d5a111 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1033,7 +1033,6 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
>  {
>  	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
>  	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> -	dev_priv->rps.ei_interrupt_count = 0;
>  }
>  
>  static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> @@ -1041,23 +1040,13 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
>  	struct intel_rps_ei now;
>  	u32 events = 0;
>  
> -	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> +	if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
>  		return 0;
>  
>  	vlv_c0_read(dev_priv, &now);
>  	if (now.cz_clock == 0)
>  		return 0;
>  
> -	/*
> -	 * To down throttle, C0 residency should be less than down threshold
> -	 * for continous EI intervals. So calculate down EI counters
> -	 * once in VLV_INT_COUNT_FOR_DOWN_EI
> -	 */
> -	if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
> -		pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
> -		dev_priv->rps.ei_interrupt_count = 0;
> -	}
> -
>  	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
>  		if (!vlv_c0_above(dev_priv,
>  				  &dev_priv->rps.down_ei, &now,
> @@ -4254,7 +4243,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
>  	/* Let's track the enabled rps events */
>  	if (IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
>  		/* WaGsvRC0ResidencyMethod:vlv */
> -		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> +		dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
>  	else
>  		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 2d76c566d843..5b84ee686f99 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -673,7 +673,6 @@ enum skl_disp_power_wells {
>  #define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
>  #define VLV_RP_UP_EI_THRESHOLD			90
>  #define VLV_RP_DOWN_EI_THRESHOLD		70
> -#define VLV_INT_COUNT_FOR_DOWN_EI		5
>  
>  /* vlv2 north clock has */
>  #define CCK_FUSE_REG				0x8
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 68c9cc252d36..e18f0fd22cf2 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3922,11 +3922,10 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
>  	u32 mask = 0;
>  
>  	if (val > dev_priv->rps.min_freq_softlimit)
> -		mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> +		mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
>  	if (val < dev_priv->rps.max_freq_softlimit)
> -		mask |= GEN6_PM_RP_UP_THRESHOLD;
> +		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
>  
> -	mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
>  	mask &= dev_priv->pm_rps_events;
>  
>  	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
> -- 
> 2.1.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-03-18 13:51 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-18  9:48 RPS tuning for VLV and pageflips Chris Wilson
2015-03-18  9:48 ` [PATCH v2 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
2015-03-18 11:12   ` Deepak S
2015-03-18  9:48 ` [PATCH v2 2/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
2015-03-18  9:48 ` [PATCH v2 3/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
2015-03-18 13:53   ` Daniel Vetter
2015-03-18  9:48 ` [PATCH v2 4/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
2015-03-18 11:15   ` Deepak S
2015-03-18 11:23     ` Chris Wilson
2015-03-18 11:27       ` Deepak S
2015-03-18  9:48 ` [PATCH v2 5/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
2015-03-18  9:48 ` [PATCH v2 6/7] drm/i915: Add i915_gem_request_unreference__unlocked Chris Wilson
2015-03-18  9:48 ` [PATCH v2 7/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
2015-03-18 11:16   ` Deepak S

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.