* [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle
@ 2015-03-06 15:06 Chris Wilson
2015-03-06 15:06 ` [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
` (8 more replies)
0 siblings, 9 replies; 27+ messages in thread
From: Chris Wilson @ 2015-03-06 15:06 UTC (permalink / raw)
To: intel-gfx
When we idle, we set the GPU frequency to the hardware minimum (not user
minimum). We introduce a new variable to distinguish between the
different roles, and to allow easy tuning of the idle frequency without
impacting over aspects of RPS. Setting the minimum frequency should be a
safety blanket as the pcu on the GPU should be power gating itself
anyway. However, in order for us to do set the absolute minimum
frequency, we need to relax a few of our assertions that we do not
exceed the user limits.
v2: Add idle_freq
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++++
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/intel_pm.c | 35 +++++++++++++++++++----------------
3 files changed, 26 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c3a5322bc7f4..cc83cc0ff823 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1219,6 +1219,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
seq_printf(m, "Max overclocked frequency: %dMHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
+
+ seq_printf(m, "Idle freq: %d MHz\n",
+ intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
} else if (IS_VALLEYVIEW(dev)) {
u32 freq_sts;
@@ -1233,6 +1236,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
seq_printf(m, "min GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
+ seq_printf(m, "idle GPU freq: %d MHz\n",
+ intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
+
seq_printf(m,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 18a6e56c7c94..73292a183492 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1022,6 +1022,7 @@ struct intel_gen6_power_mgmt {
u8 max_freq_softlimit; /* Max frequency permitted by the driver */
u8 max_freq; /* Maximum frequency, RP0 if not overclocking */
u8 min_freq; /* AKA RPn. Minimum frequency */
+ u8 idle_freq; /* Frequency to request when we are idle */
u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
u8 rp1_freq; /* "less than" RP0 power/freqency */
u8 rp0_freq; /* Non-overclocked max frequency. */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 6b277f9f7ffa..e53724516852 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3711,9 +3711,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
break;
}
/* Max/min bins are special */
- if (val == dev_priv->rps.min_freq_softlimit)
+ if (val <= dev_priv->rps.min_freq_softlimit)
new_power = LOW_POWER;
- if (val == dev_priv->rps.max_freq_softlimit)
+ if (val >= dev_priv->rps.max_freq_softlimit)
new_power = HIGH_POWER;
if (new_power == dev_priv->rps.power)
return;
@@ -3802,8 +3802,8 @@ static void gen6_set_rps(struct drm_device *dev, u8 val)
struct drm_i915_private *dev_priv = dev->dev_private;
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
- WARN_ON(val > dev_priv->rps.max_freq_softlimit);
- WARN_ON(val < dev_priv->rps.min_freq_softlimit);
+ WARN_ON(val > dev_priv->rps.max_freq);
+ WARN_ON(val < dev_priv->rps.min_freq);
/* min/max delay may still have been modified so be sure to
* write the limits value.
@@ -3836,8 +3836,8 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
struct drm_i915_private *dev_priv = dev->dev_private;
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
- WARN_ON(val > dev_priv->rps.max_freq_softlimit);
- WARN_ON(val < dev_priv->rps.min_freq_softlimit);
+ WARN_ON(val > dev_priv->rps.max_freq);
+ WARN_ON(val < dev_priv->rps.min_freq);
if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
"Odd GPU freq value\n"))
@@ -3864,10 +3864,11 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = dev_priv->dev;
+ u32 val = dev_priv->rps.idle_freq;
/* CHV and latest VLV don't need to force the gfx clock */
if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
- valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+ valleyview_set_rps(dev_priv->dev, val);
return;
}
@@ -3875,7 +3876,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
* When we are idle. Drop to min voltage state.
*/
- if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
+ if (dev_priv->rps.cur_freq <= val)
return;
/* Mask turbo interrupt so that they will not come in between */
@@ -3884,10 +3885,9 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
vlv_force_gfx_clock(dev_priv, true);
- dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
+ dev_priv->rps.cur_freq = val;
- vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
- dev_priv->rps.min_freq_softlimit);
+ vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
& GENFREQSTATUS) == 0, 100))
@@ -3895,8 +3895,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
vlv_force_gfx_clock(dev_priv, false);
- I915_WRITE(GEN6_PMINTRMSK,
- gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+ I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
}
void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -3908,7 +3907,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
if (IS_VALLEYVIEW(dev))
vlv_set_rps_idle(dev_priv);
else
- gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+ gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
dev_priv->rps.last_adj = 0;
}
mutex_unlock(&dev_priv->rps.hw_lock);
@@ -4059,6 +4058,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
dev_priv->rps.max_freq);
}
+ dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
+
/* Preserve min/max settings in case of re-init */
if (dev_priv->rps.max_freq_softlimit == 0)
dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
@@ -4227,7 +4228,7 @@ static void gen8_enable_rps(struct drm_device *dev)
/* 6: Ring frequency + overclocking (our driver does this later */
dev_priv->rps.power = HIGH_POWER; /* force a reset */
- gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+ gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
@@ -4321,7 +4322,7 @@ static void gen6_enable_rps(struct drm_device *dev)
}
dev_priv->rps.power = HIGH_POWER; /* force a reset */
- gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+ gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
rc6vids = 0;
ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
@@ -4761,6 +4762,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
dev_priv->rps.min_freq) & 1,
"Odd GPU freq values\n");
+ dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
+
/* Preserve min/max settings in case of re-init */
if (dev_priv->rps.max_freq_softlimit == 0)
dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
--
2.1.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
@ 2015-03-06 15:06 ` Chris Wilson
2015-03-06 17:32 ` Daniel Vetter
2015-03-18 6:56 ` Deepak S
2015-03-06 15:06 ` [PATCH 3/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
` (7 subsequent siblings)
8 siblings, 2 replies; 27+ messages in thread
From: Chris Wilson @ 2015-03-06 15:06 UTC (permalink / raw)
To: intel-gfx
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_irq.c | 27 ++++++++++++---------------
1 file changed, 12 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9baecb79de8c..1296ce37e435 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1150,21 +1150,20 @@ static void gen6_pm_rps_work(struct work_struct *work)
mutex_lock(&dev_priv->rps.hw_lock);
adj = dev_priv->rps.last_adj;
+ new_delay = dev_priv->rps.cur_freq;
if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
if (adj > 0)
adj *= 2;
- else {
- /* CHV needs even encode values */
- adj = IS_CHERRYVIEW(dev_priv->dev) ? 2 : 1;
- }
- new_delay = dev_priv->rps.cur_freq + adj;
-
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
/*
* For better performance, jump directly
* to RPe if we're below it.
*/
- if (new_delay < dev_priv->rps.efficient_freq)
+ if (new_delay < dev_priv->rps.efficient_freq - adj) {
new_delay = dev_priv->rps.efficient_freq;
+ adj = 0;
+ }
} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
new_delay = dev_priv->rps.efficient_freq;
@@ -1176,24 +1175,22 @@ static void gen6_pm_rps_work(struct work_struct *work)
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
if (adj < 0)
adj *= 2;
- else {
- /* CHV needs even encode values */
- adj = IS_CHERRYVIEW(dev_priv->dev) ? -2 : -1;
- }
- new_delay = dev_priv->rps.cur_freq + adj;
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
} else { /* unknown event */
- new_delay = dev_priv->rps.cur_freq;
+ adj = 0;
}
+ dev_priv->rps.last_adj = adj;
+
/* sysfs frequency interfaces may have snuck in while servicing the
* interrupt
*/
+ new_delay += adj;
new_delay = clamp_t(int, new_delay,
dev_priv->rps.min_freq_softlimit,
dev_priv->rps.max_freq_softlimit);
- dev_priv->rps.last_adj = new_delay - dev_priv->rps.cur_freq;
-
intel_set_rps(dev_priv->dev, new_delay);
mutex_unlock(&dev_priv->rps.hw_lock);
--
2.1.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 3/7] drm/i915: Improved w/a for rps on Baytrail
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
2015-03-06 15:06 ` [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
@ 2015-03-06 15:06 ` Chris Wilson
2015-03-18 7:56 ` Deepak S
2015-03-06 15:06 ` [PATCH 4/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
` (6 subsequent siblings)
8 siblings, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2015-03-06 15:06 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Rodrigo Vivi
Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212
Author: Deepak S <deepak.s@linux.intel.com>
Date: Thu Jul 3 17:33:01 2014 -0400
drm/i915/vlv: WA for Turbo and RC6 to work together.
Other than code clarity, the major improvement is to disable the extra
interrupts generated when idle. However, the reclocking remains rather
slow under the new manual regime, in particular it fails to downclock as
quickly as desired. The second major improvement is that for certain
workloads, like games, we need to combine render+media activity counters
as the work of displaying the frame is split across the engines and both
need to be taken into account when deciding the global GPU frequency as
memory cycles are shared.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Conflicts:
drivers/gpu/drm/i915/intel_pm.c
---
drivers/gpu/drm/i915/i915_irq.c | 155 +++++++++++++----------------------
drivers/gpu/drm/i915/i915_reg.h | 4 +-
drivers/gpu/drm/i915/intel_display.c | 2 +
drivers/gpu/drm/i915/intel_drv.h | 2 +
drivers/gpu/drm/i915/intel_pm.c | 22 ++++-
5 files changed, 81 insertions(+), 104 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1296ce37e435..4b7b86298b37 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -997,129 +997,84 @@ static void notify_ring(struct drm_device *dev,
wake_up_all(&ring->irq_queue);
}
-static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
- struct intel_rps_ei *rps_ei)
+static void vlv_c0_read(struct drm_i915_private *dev_priv,
+ struct intel_rps_ei *ei)
{
- u32 cz_ts, cz_freq_khz;
- u32 render_count, media_count;
- u32 elapsed_render, elapsed_media, elapsed_time;
- u32 residency = 0;
-
- cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
- cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
-
- render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
- media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
-
- if (rps_ei->cz_clock == 0) {
- rps_ei->cz_clock = cz_ts;
- rps_ei->render_c0 = render_count;
- rps_ei->media_c0 = media_count;
-
- return dev_priv->rps.cur_freq;
- }
-
- elapsed_time = cz_ts - rps_ei->cz_clock;
- rps_ei->cz_clock = cz_ts;
+ ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+ ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
+ ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
+}
- elapsed_render = render_count - rps_ei->render_c0;
- rps_ei->render_c0 = render_count;
+static bool vlv_c0_above(struct drm_i915_private *dev_priv,
+ const struct intel_rps_ei *old,
+ const struct intel_rps_ei *now,
+ int threshold)
+{
+ u64 time, c0;
- elapsed_media = media_count - rps_ei->media_c0;
- rps_ei->media_c0 = media_count;
+ if (old->cz_clock == 0)
+ return false;
- /* Convert all the counters into common unit of milli sec */
- elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
- elapsed_render /= cz_freq_khz;
- elapsed_media /= cz_freq_khz;
+ time = now->cz_clock - old->cz_clock;
+ time *= threshold * dev_priv->mem_freq;
- /*
- * Calculate overall C0 residency percentage
- * only if elapsed time is non zero
+ /* Workload can be split between render + media, e.g. SwapBuffers
+ * being blitted in X after being rendered in mesa. To account for
+ * this we need to combine both engines into our activity counter.
*/
- if (elapsed_time) {
- residency =
- ((max(elapsed_render, elapsed_media) * 100)
- / elapsed_time);
- }
+ c0 = now->render_c0 - old->render_c0;
+ c0 += now->media_c0 - old->media_c0;
+ c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
- return residency;
+ return c0 >= time;
}
-/**
- * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
- * busy-ness calculated from C0 counters of render & media power wells
- * @dev_priv: DRM device private
- *
- */
-static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
{
- u32 residency_C0_up = 0, residency_C0_down = 0;
- int new_delay, adj;
-
- dev_priv->rps.ei_interrupt_count++;
-
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
+ dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+ dev_priv->rps.ei_interrupt_count = 0;
+}
+static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+ struct intel_rps_ei now;
+ u32 events = 0;
- if (dev_priv->rps.up_ei.cz_clock == 0) {
- vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei);
- vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
- return dev_priv->rps.cur_freq;
- }
+ if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ return 0;
+ vlv_c0_read(dev_priv, &now);
+ if (now.cz_clock == 0)
+ return 0;
/*
* To down throttle, C0 residency should be less than down threshold
* for continous EI intervals. So calculate down EI counters
* once in VLV_INT_COUNT_FOR_DOWN_EI
*/
- if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
-
+ if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
+ pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
dev_priv->rps.ei_interrupt_count = 0;
-
- residency_C0_down = vlv_c0_residency(dev_priv,
- &dev_priv->rps.down_ei);
- } else {
- residency_C0_up = vlv_c0_residency(dev_priv,
- &dev_priv->rps.up_ei);
}
- new_delay = dev_priv->rps.cur_freq;
-
- adj = dev_priv->rps.last_adj;
- /* C0 residency is greater than UP threshold. Increase Frequency */
- if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
- if (adj > 0)
- adj *= 2;
- else
- adj = 1;
-
- if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
- new_delay = dev_priv->rps.cur_freq + adj;
-
- /*
- * For better performance, jump directly
- * to RPe if we're below it.
- */
- if (new_delay < dev_priv->rps.efficient_freq)
- new_delay = dev_priv->rps.efficient_freq;
+ if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
+ if (!vlv_c0_above(dev_priv,
+ &dev_priv->rps.down_ei, &now,
+ VLV_RP_DOWN_EI_THRESHOLD))
+ events |= GEN6_PM_RP_DOWN_THRESHOLD;
+ dev_priv->rps.down_ei = now;
+ }
- } else if (!dev_priv->rps.ei_interrupt_count &&
- (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
- if (adj < 0)
- adj *= 2;
- else
- adj = -1;
- /*
- * This means, C0 residency is less than down threshold over
- * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
- */
- if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
- new_delay = dev_priv->rps.cur_freq + adj;
+ if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+ if (vlv_c0_above(dev_priv,
+ &dev_priv->rps.up_ei, &now,
+ VLV_RP_UP_EI_THRESHOLD))
+ events |= GEN6_PM_RP_UP_THRESHOLD;
+ dev_priv->rps.up_ei = now;
}
- return new_delay;
+ return events;
}
static void gen6_pm_rps_work(struct work_struct *work)
@@ -1149,6 +1104,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
mutex_lock(&dev_priv->rps.hw_lock);
+ pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
+
adj = dev_priv->rps.last_adj;
new_delay = dev_priv->rps.cur_freq;
if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
@@ -1170,8 +1127,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
else
new_delay = dev_priv->rps.min_freq_softlimit;
adj = 0;
- } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
- new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
if (adj < 0)
adj *= 2;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 56b97c41bf17..324922d8f8a1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6190,8 +6190,8 @@ enum skl_disp_power_wells {
#define GEN6_GT_GFX_RC6p 0x13810C
#define GEN6_GT_GFX_RC6pp 0x138110
-#define VLV_RENDER_C0_COUNT_REG 0x138118
-#define VLV_MEDIA_C0_COUNT_REG 0x13811C
+#define VLV_RENDER_C0_COUNT 0x138118
+#define VLV_MEDIA_C0_COUNT 0x13811C
#define GEN6_PCODE_MAILBOX 0x138124
#define GEN6_PCODE_READY (1<<31)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 77113d95fb71..2412002d510b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9167,6 +9167,8 @@ void intel_mark_busy(struct drm_device *dev)
intel_runtime_pm_get(dev_priv);
i915_update_gfx_val(dev_priv);
+ if (INTEL_INFO(dev)->gen >= 6)
+ gen6_rps_busy(dev_priv);
dev_priv->mm.busy = true;
}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index ff79dca2ff8e..1e564da2fd38 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1238,6 +1238,8 @@ void intel_disable_gt_powersave(struct drm_device *dev);
void intel_suspend_gt_powersave(struct drm_device *dev);
void intel_reset_gt_powersave(struct drm_device *dev);
void gen6_update_ring_freq(struct drm_device *dev);
+void gen6_rps_busy(struct drm_i915_private *dev_priv);
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct drm_i915_private *dev_priv);
void ilk_wm_get_hw_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e53724516852..b37d634bea99 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3898,6 +3898,18 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
}
+void gen6_rps_busy(struct drm_i915_private *dev_priv)
+{
+ mutex_lock(&dev_priv->rps.hw_lock);
+ if (dev_priv->rps.enabled) {
+ if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+ gen6_rps_reset_ei(dev_priv);
+ I915_WRITE(GEN6_PMINTRMSK,
+ gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+ }
+ mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
void gen6_rps_idle(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = dev_priv->dev;
@@ -3909,15 +3921,21 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
else
gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
dev_priv->rps.last_adj = 0;
+ I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
}
mutex_unlock(&dev_priv->rps.hw_lock);
}
void gen6_rps_boost(struct drm_i915_private *dev_priv)
{
+ u32 val;
+
mutex_lock(&dev_priv->rps.hw_lock);
- if (dev_priv->rps.enabled) {
- intel_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
+ val = dev_priv->rps.max_freq_softlimit;
+ if (dev_priv->rps.enabled &&
+ dev_priv->mm.busy &&
+ dev_priv->rps.cur_freq < val) {
+ intel_set_rps(dev_priv->dev, val);
dev_priv->rps.last_adj = 0;
}
mutex_unlock(&dev_priv->rps.hw_lock);
--
2.1.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 4/7] drm/i915: Use down ei for manual Baytrail RPS calculations
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
2015-03-06 15:06 ` [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
2015-03-06 15:06 ` [PATCH 3/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
@ 2015-03-06 15:06 ` Chris Wilson
2015-03-18 8:04 ` Deepak S
2015-03-06 15:06 ` [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
` (5 subsequent siblings)
8 siblings, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2015-03-06 15:06 UTC (permalink / raw)
To: intel-gfx
Use both up/down manual ei calcuations for symmetry and greater
flexibility for reclocking, instead of faking the down interrupt based
on a fixed integer number of up interrupts.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 2 --
drivers/gpu/drm/i915/i915_irq.c | 15 ++-------------
drivers/gpu/drm/i915/i915_reg.h | 1 -
drivers/gpu/drm/i915/intel_pm.c | 5 ++---
4 files changed, 4 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 73292a183492..efa98c9e5777 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1028,8 +1028,6 @@ struct intel_gen6_power_mgmt {
u8 rp0_freq; /* Non-overclocked max frequency. */
u32 cz_freq;
- u32 ei_interrupt_count;
-
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4b7b86298b37..8892dbdfb629 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1033,7 +1033,6 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
{
vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
dev_priv->rps.up_ei = dev_priv->rps.down_ei;
- dev_priv->rps.ei_interrupt_count = 0;
}
static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
@@ -1041,23 +1040,13 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
struct intel_rps_ei now;
u32 events = 0;
- if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
return 0;
vlv_c0_read(dev_priv, &now);
if (now.cz_clock == 0)
return 0;
- /*
- * To down throttle, C0 residency should be less than down threshold
- * for continous EI intervals. So calculate down EI counters
- * once in VLV_INT_COUNT_FOR_DOWN_EI
- */
- if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
- pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
- dev_priv->rps.ei_interrupt_count = 0;
- }
-
if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
if (!vlv_c0_above(dev_priv,
&dev_priv->rps.down_ei, &now,
@@ -4247,7 +4236,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
/* Let's track the enabled rps events */
if (IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
/* WaGsvRC0ResidencyMethod:vlv */
- dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+ dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
else
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 324922d8f8a1..ca24a2d4a823 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -665,7 +665,6 @@ enum skl_disp_power_wells {
#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
#define VLV_RP_UP_EI_THRESHOLD 90
#define VLV_RP_DOWN_EI_THRESHOLD 70
-#define VLV_INT_COUNT_FOR_DOWN_EI 5
/* vlv2 north clock has */
#define CCK_FUSE_REG 0x8
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b37d634bea99..55dc406cd195 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3784,11 +3784,10 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
u32 mask = 0;
if (val > dev_priv->rps.min_freq_softlimit)
- mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+ mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
if (val < dev_priv->rps.max_freq_softlimit)
- mask |= GEN6_PM_RP_UP_THRESHOLD;
+ mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
- mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
mask &= dev_priv->pm_rps_events;
return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
--
2.1.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
` (2 preceding siblings ...)
2015-03-06 15:06 ` [PATCH 4/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
@ 2015-03-06 15:06 ` Chris Wilson
2015-03-18 8:12 ` Deepak S
2015-03-06 15:06 ` [PATCH 6/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
` (4 subsequent siblings)
8 siblings, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2015-03-06 15:06 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Rodrigo Vivi
Reuse the same reclocking strategy for Baytail as on its bigger brethren,
Sandybridge and Ivybridge. In particular, this makes the device quicker
to reclock (both up and down) though the tendency now is to downclock
more aggressively to compensate for the RPS boosts.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Conflicts:
drivers/gpu/drm/i915/intel_pm.c
---
drivers/gpu/drm/i915/i915_drv.h | 3 +++
drivers/gpu/drm/i915/i915_irq.c | 4 ++--
drivers/gpu/drm/i915/i915_reg.h | 2 --
drivers/gpu/drm/i915/intel_pm.c | 11 ++++++++++-
4 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index efa98c9e5777..bfa6e11f802e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1028,6 +1028,9 @@ struct intel_gen6_power_mgmt {
u8 rp0_freq; /* Non-overclocked max frequency. */
u32 cz_freq;
+ u8 up_threshold; /* Current %busy required to uplock */
+ u8 down_threshold; /* Current %busy required to downclock */
+
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8892dbdfb629..483079d96957 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1050,7 +1050,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
if (!vlv_c0_above(dev_priv,
&dev_priv->rps.down_ei, &now,
- VLV_RP_DOWN_EI_THRESHOLD))
+ dev_priv->rps.down_threshold))
events |= GEN6_PM_RP_DOWN_THRESHOLD;
dev_priv->rps.down_ei = now;
}
@@ -1058,7 +1058,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
if (vlv_c0_above(dev_priv,
&dev_priv->rps.up_ei, &now,
- VLV_RP_UP_EI_THRESHOLD))
+ dev_priv->rps.up_threshold))
events |= GEN6_PM_RP_UP_THRESHOLD;
dev_priv->rps.up_ei = now;
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ca24a2d4a823..13ec6c2b1fcf 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -663,8 +663,6 @@ enum skl_disp_power_wells {
#define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000
#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
-#define VLV_RP_UP_EI_THRESHOLD 90
-#define VLV_RP_DOWN_EI_THRESHOLD 70
/* vlv2 north clock has */
#define CCK_FUSE_REG 0x8
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 55dc406cd195..972333d2211d 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3722,10 +3722,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
switch (new_power) {
case LOW_POWER:
/* Upclock if more than 95% busy over 16ms */
+ dev_priv->rps.up_threshold = 95;
I915_WRITE(GEN6_RP_UP_EI, 12500);
I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
/* Downclock if less than 85% busy over 32ms */
+ dev_priv->rps.down_threshold = 85;
I915_WRITE(GEN6_RP_DOWN_EI, 25000);
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
@@ -3740,10 +3742,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
case BETWEEN:
/* Upclock if more than 90% busy over 13ms */
+ dev_priv->rps.up_threshold = 90;
I915_WRITE(GEN6_RP_UP_EI, 10250);
I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
/* Downclock if less than 75% busy over 32ms */
+ dev_priv->rps.down_threshold = 75;
I915_WRITE(GEN6_RP_DOWN_EI, 25000);
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
@@ -3758,10 +3762,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
case HIGH_POWER:
/* Upclock if more than 85% busy over 10ms */
+ dev_priv->rps.up_threshold = 85;
I915_WRITE(GEN6_RP_UP_EI, 8000);
I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
/* Downclock if less than 60% busy over 32ms */
+ dev_priv->rps.down_threshold = 60;
I915_WRITE(GEN6_RP_DOWN_EI, 25000);
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
@@ -3842,8 +3848,10 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
"Odd GPU freq value\n"))
val &= ~1;
- if (val != dev_priv->rps.cur_freq)
+ if (val != dev_priv->rps.cur_freq) {
vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+ gen6_set_rps_thresholds(dev_priv, val);
+ }
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
@@ -3892,6 +3900,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
& GENFREQSTATUS) == 0, 100))
DRM_ERROR("timed out waiting for Punit\n");
+ gen6_set_rps_thresholds(dev_priv, val);
vlv_force_gfx_clock(dev_priv, false);
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
--
2.1.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 6/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
` (3 preceding siblings ...)
2015-03-06 15:06 ` [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
@ 2015-03-06 15:06 ` Chris Wilson
2015-03-18 8:18 ` Deepak S
2015-03-06 15:06 ` [PATCH 7/7] drm/i915: Deminish contribution of wait-boosting from clients Chris Wilson
` (3 subsequent siblings)
8 siblings, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2015-03-06 15:06 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter
If we hit a vblank and see that have a pageflip queue but not yet
processed, ensure that the GPU is running at maximum in order to clear
the backlog. Pageflips are only queued for the following vblank, if we
miss it, there will be a visible stutter. Boosting the GPU frequency
doesn't prevent us from missing the target vblank, but it should help
the subsequent frames hitting theirs.
v2: Reorder vblank vs flip-complete so that we only check for a missed
flip after processing the completion events, and avoid spurious boosts.
v3: Rename missed_vblank
v4: Rebase
v5: Cancel the outstanding work in runtime suspend
v6: Rebase
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
drivers/gpu/drm/i915/intel_display.c | 11 ++++++++---
drivers/gpu/drm/i915/intel_drv.h | 2 ++
drivers/gpu/drm/i915/intel_pm.c | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2412002d510b..7735f0a4184c 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9818,6 +9818,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ struct intel_unpin_work *work;
WARN_ON(!in_irq());
@@ -9825,12 +9826,16 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
return;
spin_lock(&dev->event_lock);
- if (intel_crtc->unpin_work && __intel_pageflip_stall_check(dev, crtc)) {
+ work = intel_crtc->unpin_work;
+ if (work != NULL && __intel_pageflip_stall_check(dev, crtc)) {
WARN_ONCE(1, "Kicking stuck page flip: queued at %d, now %d\n",
- intel_crtc->unpin_work->flip_queued_vblank,
- drm_vblank_count(dev, pipe));
+ work->flip_queued_vblank, drm_vblank_count(dev, pipe));
page_flip_completed(intel_crtc);
+ work = NULL;
}
+ if (work != NULL &&
+ drm_vblank_count(dev, pipe) - work->flip_queued_vblank > 1)
+ intel_queue_rps_boost_for_request(dev, work->flip_queued_req);
spin_unlock(&dev->event_lock);
}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1e564da2fd38..87e09a58191a 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1242,6 +1242,8 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv);
void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct drm_i915_private *dev_priv);
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+ struct drm_i915_gem_request *rq);
void ilk_wm_get_hw_state(struct drm_device *dev);
void skl_wm_get_hw_state(struct drm_device *dev);
void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 972333d2211d..120b8af3c60c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6598,6 +6598,40 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
return val / GT_FREQUENCY_MULTIPLIER;
}
+struct request_boost {
+ struct work_struct work;
+ struct drm_i915_gem_request *rq;
+};
+
+static void __intel_rps_boost_work(struct work_struct *work)
+{
+ struct request_boost *boost = container_of(work, struct request_boost, work);
+
+ if (!i915_gem_request_completed(boost->rq, true))
+ gen6_rps_boost(to_i915(boost->rq->ring->dev));
+
+ i915_gem_request_unreference(boost->rq);
+ kfree(boost);
+}
+
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+ struct drm_i915_gem_request *rq)
+{
+ struct request_boost *boost;
+
+ if (rq == NULL || INTEL_INFO(dev)->gen < 6)
+ return;
+
+ boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
+ if (boost == NULL)
+ return;
+
+ INIT_WORK(&boost->work, __intel_rps_boost_work);
+ i915_gem_request_assign(&boost->rq, rq);
+
+ queue_work(to_i915(dev)->wq, &boost->work);
+}
+
void intel_pm_setup(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
--
2.1.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 7/7] drm/i915: Deminish contribution of wait-boosting from clients
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
` (4 preceding siblings ...)
2015-03-06 15:06 ` [PATCH 6/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
@ 2015-03-06 15:06 ` Chris Wilson
2015-03-06 17:58 ` shuang.he
2015-03-18 9:00 ` Deepak S
2015-03-09 15:38 ` [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Jesse Barnes
` (2 subsequent siblings)
8 siblings, 2 replies; 27+ messages in thread
From: Chris Wilson @ 2015-03-06 15:06 UTC (permalink / raw)
To: intel-gfx
With boosting for missed pageflips, we have a much stronger indication
of when we need to (temporarily) boost GPU frequency to ensure smooth
delivery of frames. So now only allow each client to perform one RPS boost
in each period of GPU activity due to stalling on results.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 39 +++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_drv.h | 9 ++++++---
drivers/gpu/drm/i915/i915_gem.c | 35 ++++++++-------------------------
drivers/gpu/drm/i915/intel_drv.h | 3 ++-
drivers/gpu/drm/i915/intel_pm.c | 18 ++++++++++++++---
5 files changed, 70 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cc83cc0ff823..9366eaa50dba 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2245,6 +2245,44 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
return 0;
}
+static int i915_rps_boost_info(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_file *file;
+ int ret;
+
+ ret = mutex_lock_interruptible(&dev->struct_mutex);
+ if (ret)
+ return ret;
+
+ ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
+ if (ret)
+ goto unlock;
+
+ list_for_each_entry_reverse(file, &dev->filelist, lhead) {
+ struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(file->pid, PIDTYPE_PID);
+ seq_printf(m, "%s [%d]: %d boosts%s\n",
+ task ? task->comm : "<unknown>",
+ task ? task->pid : -1,
+ file_priv->rps_boosts,
+ list_empty(&file_priv->rps_boost) ? "" : ", active");
+ rcu_read_unlock();
+ }
+ seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
+
+ mutex_unlock(&dev_priv->rps.hw_lock);
+unlock:
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
static int i915_llc(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
@@ -4680,6 +4718,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_ddb_info", i915_ddb_info, 0},
{"i915_sseu_status", i915_sseu_status, 0},
{"i915_drrs_status", i915_drrs_status, 0},
+ {"i915_rps_boost_info", i915_rps_boost_info, 0},
};
#define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bfa6e11f802e..b207d2cec9dc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1036,6 +1036,8 @@ struct intel_gen6_power_mgmt {
bool enabled;
struct delayed_work delayed_resume_work;
+ struct list_head clients;
+ unsigned boosts;
/* manual wa residency calculations */
struct intel_rps_ei up_ei, down_ei;
@@ -2137,12 +2139,13 @@ struct drm_i915_file_private {
struct {
spinlock_t lock;
struct list_head request_list;
- struct delayed_work idle_work;
} mm;
struct idr context_idr;
- atomic_t rps_wait_boost;
- struct intel_engine_cs *bsd_ring;
+ struct list_head rps_boost;
+ struct intel_engine_cs *bsd_ring;
+
+ unsigned rps_boosts;
};
/*
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b266b31690e4..a0c35f80836c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1191,14 +1191,6 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
}
-static bool can_wait_boost(struct drm_i915_file_private *file_priv)
-{
- if (file_priv == NULL)
- return true;
-
- return !atomic_xchg(&file_priv->rps_wait_boost, true);
-}
-
/**
* __i915_wait_request - wait until execution of request has finished
* @req: duh!
@@ -1240,13 +1232,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
timeout_expire = timeout ?
jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
- if (INTEL_INFO(dev)->gen >= 6 && ring->id == RCS && can_wait_boost(file_priv)) {
- gen6_rps_boost(dev_priv);
- if (file_priv)
- mod_delayed_work(dev_priv->wq,
- &file_priv->mm.idle_work,
- msecs_to_jiffies(100));
- }
+ if (ring->id == RCS && INTEL_INFO(dev)->gen >= 6)
+ gen6_rps_boost(dev_priv, file_priv);
if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
return -ENODEV;
@@ -5070,8 +5057,6 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
- cancel_delayed_work_sync(&file_priv->mm.idle_work);
-
/* Clean up our request list when the client is going away, so that
* later retire_requests won't dereference our soon-to-be-gone
* file_priv.
@@ -5087,15 +5072,12 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
request->file_priv = NULL;
}
spin_unlock(&file_priv->mm.lock);
-}
-
-static void
-i915_gem_file_idle_work_handler(struct work_struct *work)
-{
- struct drm_i915_file_private *file_priv =
- container_of(work, typeof(*file_priv), mm.idle_work.work);
- atomic_set(&file_priv->rps_wait_boost, false);
+ if (!list_empty(&file_priv->rps_boost)) {
+ mutex_lock(&to_i915(dev)->rps.hw_lock);
+ list_del(&file_priv->rps_boost);
+ mutex_unlock(&to_i915(dev)->rps.hw_lock);
+ }
}
int i915_gem_open(struct drm_device *dev, struct drm_file *file)
@@ -5112,11 +5094,10 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
file->driver_priv = file_priv;
file_priv->dev_priv = dev->dev_private;
file_priv->file = file;
+ INIT_LIST_HEAD(&file_priv->rps_boost);
spin_lock_init(&file_priv->mm.lock);
INIT_LIST_HEAD(&file_priv->mm.request_list);
- INIT_DELAYED_WORK(&file_priv->mm.idle_work,
- i915_gem_file_idle_work_handler);
ret = i915_gem_context_open(dev, file);
if (ret)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 87e09a58191a..299d0c68f0dd 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1241,7 +1241,8 @@ void gen6_update_ring_freq(struct drm_device *dev);
void gen6_rps_busy(struct drm_i915_private *dev_priv);
void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct drm_i915_private *dev_priv);
+void gen6_rps_boost(struct drm_i915_private *dev_priv,
+ struct drm_i915_file_private *file_priv);
void intel_queue_rps_boost_for_request(struct drm_device *dev,
struct drm_i915_gem_request *rq);
void ilk_wm_get_hw_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 120b8af3c60c..307edc035af1 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3931,10 +3931,14 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
dev_priv->rps.last_adj = 0;
I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
}
+
+ while (!list_empty(&dev_priv->rps.clients))
+ list_del_init(dev_priv->rps.clients.next);
mutex_unlock(&dev_priv->rps.hw_lock);
}
-void gen6_rps_boost(struct drm_i915_private *dev_priv)
+void gen6_rps_boost(struct drm_i915_private *dev_priv,
+ struct drm_i915_file_private *file_priv)
{
u32 val;
@@ -3942,9 +3946,16 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv)
val = dev_priv->rps.max_freq_softlimit;
if (dev_priv->rps.enabled &&
dev_priv->mm.busy &&
- dev_priv->rps.cur_freq < val) {
+ dev_priv->rps.cur_freq < val &&
+ (file_priv == NULL || list_empty(&file_priv->rps_boost))) {
intel_set_rps(dev_priv->dev, val);
dev_priv->rps.last_adj = 0;
+
+ if (file_priv != NULL) {
+ list_add(&file_priv->rps_boost, &dev_priv->rps.clients);
+ file_priv->rps_boosts++;
+ } else
+ dev_priv->rps.boosts++;
}
mutex_unlock(&dev_priv->rps.hw_lock);
}
@@ -6608,7 +6619,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
struct request_boost *boost = container_of(work, struct request_boost, work);
if (!i915_gem_request_completed(boost->rq, true))
- gen6_rps_boost(to_i915(boost->rq->ring->dev));
+ gen6_rps_boost(to_i915(boost->rq->ring->dev), NULL);
i915_gem_request_unreference(boost->rq);
kfree(boost);
@@ -6640,6 +6651,7 @@ void intel_pm_setup(struct drm_device *dev)
INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
intel_gen6_powersave_work);
+ INIT_LIST_HEAD(&dev_priv->rps.clients);
dev_priv->pm.suspended = false;
}
--
2.1.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning
2015-03-06 15:06 ` [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
@ 2015-03-06 17:32 ` Daniel Vetter
2015-03-06 21:44 ` Chris Wilson
2015-03-18 6:56 ` Deepak S
1 sibling, 1 reply; 27+ messages in thread
From: Daniel Vetter @ 2015-03-06 17:32 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Fri, Mar 06, 2015 at 03:06:26PM +0000, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
I might have impending w/e syndrome, but I can't see what you're fixing
any more. Can you please augment the commit message a bit?
Thanks, Daniel
> ---
> drivers/gpu/drm/i915/i915_irq.c | 27 ++++++++++++---------------
> 1 file changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 9baecb79de8c..1296ce37e435 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1150,21 +1150,20 @@ static void gen6_pm_rps_work(struct work_struct *work)
> mutex_lock(&dev_priv->rps.hw_lock);
>
> adj = dev_priv->rps.last_adj;
> + new_delay = dev_priv->rps.cur_freq;
> if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> if (adj > 0)
> adj *= 2;
> - else {
> - /* CHV needs even encode values */
> - adj = IS_CHERRYVIEW(dev_priv->dev) ? 2 : 1;
> - }
> - new_delay = dev_priv->rps.cur_freq + adj;
> -
> + else /* CHV needs even encode values */
> + adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
> /*
> * For better performance, jump directly
> * to RPe if we're below it.
> */
> - if (new_delay < dev_priv->rps.efficient_freq)
> + if (new_delay < dev_priv->rps.efficient_freq - adj) {
> new_delay = dev_priv->rps.efficient_freq;
> + adj = 0;
> + }
> } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
> if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
> new_delay = dev_priv->rps.efficient_freq;
> @@ -1176,24 +1175,22 @@ static void gen6_pm_rps_work(struct work_struct *work)
> } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> if (adj < 0)
> adj *= 2;
> - else {
> - /* CHV needs even encode values */
> - adj = IS_CHERRYVIEW(dev_priv->dev) ? -2 : -1;
> - }
> - new_delay = dev_priv->rps.cur_freq + adj;
> + else /* CHV needs even encode values */
> + adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
> } else { /* unknown event */
> - new_delay = dev_priv->rps.cur_freq;
> + adj = 0;
> }
>
> + dev_priv->rps.last_adj = adj;
> +
> /* sysfs frequency interfaces may have snuck in while servicing the
> * interrupt
> */
> + new_delay += adj;
> new_delay = clamp_t(int, new_delay,
> dev_priv->rps.min_freq_softlimit,
> dev_priv->rps.max_freq_softlimit);
>
> - dev_priv->rps.last_adj = new_delay - dev_priv->rps.cur_freq;
> -
> intel_set_rps(dev_priv->dev, new_delay);
>
> mutex_unlock(&dev_priv->rps.hw_lock);
> --
> 2.1.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 7/7] drm/i915: Deminish contribution of wait-boosting from clients
2015-03-06 15:06 ` [PATCH 7/7] drm/i915: Deminish contribution of wait-boosting from clients Chris Wilson
@ 2015-03-06 17:58 ` shuang.he
2015-03-18 9:00 ` Deepak S
1 sibling, 0 replies; 27+ messages in thread
From: shuang.he @ 2015-03-06 17:58 UTC (permalink / raw)
To: shuang.he, ethan.gao, intel-gfx, chris
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 5906
-------------------------------------Summary-------------------------------------
Platform Delta drm-intel-nightly Series Applied
PNV -6 275/275 269/275
ILK 307/307 307/307
SNB 284/284 284/284
IVB 375/375 375/375
BYT -92 294/294 202/294
HSW -2 385/385 383/385
BDW -1 314/314 313/314
-------------------------------------Detailed-------------------------------------
Platform Test drm-intel-nightly Series Applied
*PNV igt_gem_fence_thrash_bo-write-verify-none PASS(2) FAIL(1)PASS(1)
*PNV igt_gem_fence_thrash_bo-write-verify-x PASS(2) FAIL(1)PASS(1)
*PNV igt_gem_fence_thrash_bo-write-verify-y PASS(2) FAIL(1)PASS(1)
PNV igt_gem_userptr_blits_minor-normal-sync DMESG_WARN(2)PASS(1) DMESG_WARN(1)PASS(1)
PNV igt_gen3_render_tiledx_blits FAIL(1)PASS(4) FAIL(1)PASS(1)
PNV igt_gen3_render_tiledy_blits FAIL(1)PASS(2) FAIL(1)PASS(1)
*BYT igt_drm_read_empty-block PASS(2) DMESG_WARN(1)
*BYT igt_drm_vma_limiter PASS(2) DMESG_WARN(1)
*BYT igt_drm_vma_limiter_cpu PASS(2) DMESG_WARN(1)
*BYT igt_drm_vma_limiter_gtt PASS(2) DMESG_WARN(1)
*BYT igt_gem_ctx_basic PASS(2) DMESG_WARN(1)
*BYT igt_gem_double_irq_loop PASS(2) DMESG_WARN(1)
*BYT igt_gem_dummy_reloc_loop_blt PASS(2) DMESG_WARN(1)
*BYT igt_gem_dummy_reloc_loop_bsd PASS(2) DMESG_WARN(1)
*BYT igt_gem_dummy_reloc_loop_mixed PASS(2) DMESG_WARN(1)
*BYT igt_gem_dummy_reloc_loop_mixed_multi_fd PASS(2) DMESG_WARN(1)
*BYT igt_gem_fd_exhaustion PASS(2) DMESG_WARN(1)
*BYT igt_gem_fenced_exec_thrash_2-spare-fences PASS(2) DMESG_WARN(1)
*BYT igt_gem_fenced_exec_thrash_no-spare-fences PASS(2) DMESG_WARN(1)
*BYT igt_gem_fenced_exec_thrash_no-spare-fences-interruptible PASS(2) DMESG_WARN(1)
*BYT igt_gem_fenced_exec_thrash_too-many-fences PASS(2) DMESG_WARN(1)
*BYT igt_gem_fence_thrash_bo-copy PASS(2) DMESG_WARN(1)
*BYT igt_gem_fence_thrash_bo-write-verify-none PASS(2) DMESG_WARN(1)
*BYT igt_gem_fence_thrash_bo-write-verify-x PASS(2) DMESG_WARN(1)
*BYT igt_gem_fence_thrash_bo-write-verify-y PASS(2) DMESG_WARN(1)
*BYT igt_gem_flink_race_flink_close PASS(2) DMESG_WARN(1)
*BYT igt_gem_gtt_speed PASS(2) DMESG_WARN(1)
*BYT igt_gem_largeobject PASS(2) DMESG_WARN(1)
*BYT igt_gem_mmap_gtt_copy PASS(2) DMESG_WARN(1)
*BYT igt_gem_mmap_gtt_fault-concurrent PASS(2) DMESG_WARN(1)
*BYT igt_gem_mmap_gtt_short PASS(2) DMESG_WARN(1)
*BYT igt_gem_mmap_gtt_write PASS(2) DMESG_WARN(1)
*BYT igt_gem_mmap_gtt_write-gtt PASS(2) DMESG_WARN(1)
*BYT igt_gem_mmap_gtt_write-gtt-no-prefault PASS(2) DMESG_WARN(1)
*BYT igt_gem_mmap_gtt_write-no-prefault PASS(2) DMESG_WARN(1)
*BYT igt_gem_mmap_offset_exhaustion PASS(2) DMESG_WARN(1)
*BYT igt_gem_persistent_relocs_forked PASS(2) DMESG_WARN(1)
*BYT igt_gem_persistent_relocs_forked-faulting-reloc PASS(2) DMESG_WARN(1)
*BYT igt_gem_persistent_relocs_forked-interruptible PASS(2) DMESG_WARN(1)
*BYT igt_gem_persistent_relocs_forked-interruptible-faulting-reloc PASS(2) DMESG_WARN(1)
*BYT igt_gem_pread_display PASS(2) DMESG_WARN(1)
*BYT igt_gem_pread_normal PASS(2) DMESG_WARN(1)
*BYT igt_gem_pread_snoop PASS(2) DMESG_WARN(1)
*BYT igt_gem_pread_uncached PASS(2) DMESG_WARN(1)
*BYT igt_gem_pwrite_display PASS(2) DMESG_WARN(1)
*BYT igt_gem_pwrite_normal PASS(2) DMESG_WARN(1)
*BYT igt_gem_pwrite_snoop PASS(2) DMESG_WARN(1)
*BYT igt_gem_pwrite_uncached PASS(2) DMESG_WARN(1)
*BYT igt_gem_reg_read PASS(2) DMESG_WARN(1)
*BYT igt_gem_reloc_vs_gpu_forked-faulting-reloc-thrashing PASS(2) DMESG_WARN(1)
*BYT igt_gem_reloc_vs_gpu_forked-interruptible-faulting-reloc-thrash-inactive PASS(2) DMESG_WARN(1)
*BYT igt_gem_reloc_vs_gpu_forked-interruptible-faulting-reloc-thrashing PASS(2) DMESG_WARN(1)
*BYT igt_gem_set_tiling_vs_gtt PASS(2) DMESG_WARN(1)
*BYT igt_gem_threaded_access_tiled PASS(2) DMESG_WARN(1)
*BYT igt_gem_tiled_partial_pwrite_pread_reads PASS(2) DMESG_WARN(1)
*BYT igt_gem_tiled_partial_pwrite_pread_writes PASS(2) DMESG_WARN(1)
*BYT igt_gem_tiled_partial_pwrite_pread_writes-after-reads PASS(2) DMESG_WARN(1)
*BYT igt_gem_tiled_pread PASS(2) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_create-destroy-unsync PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-multifd-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-multifd-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-swapping-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-swapping-mempressure-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-swapping-mempressure-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-swapping-multifd-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-swapping-multifd-mempressure-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-swapping-multifd-mempressure-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-swapping-multifd-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-sync-swapping-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-multifd-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-multifd-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-swapping-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-swapping-mempressure-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-swapping-mempressure-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-swapping-multifd-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-swapping-multifd-mempressure-interruptible PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-swapping-multifd-mempressure-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-swapping-multifd-normal PASS(1) DMESG_WARN(1)
*BYT igt_gem_userptr_blits_forked-unsync-swapping-normal PASS(1) DMESG_WARN(1)
*BYT igt_kms_setmode_clone-exclusive-crtc PASS(1) DMESG_WARN(1)
*BYT igt_kms_setmode_clone-single-crtc PASS(1) DMESG_WARN(1)
*BYT igt_kms_setmode_invalid-clone-exclusive-crtc PASS(1) DMESG_WARN(1)
*BYT igt_prime_self_import_reimport-vs-gem_close-race PASS(1) DMESG_WARN(1)
*BYT igt_gem_fence_thrash_bo-write-verify-threaded-none PASS(1) DMESG_WARN(1)
*BYT igt_gem_partial_pwrite_pread_reads PASS(1) DMESG_WARN(1)
*BYT igt_gem_caching_reads PASS(1) DMESG_WARN(1)
*BYT igt_gem_gtt_hog PASS(1) DMESG_WARN(1)
*BYT igt_gem_partial_pwrite_pread_reads-display PASS(1) DMESG_WARN(1)
*BYT igt_gem_partial_pwrite_pread_reads-snoop PASS(1) DMESG_WARN(1)
*BYT igt_gem_partial_pwrite_pread_reads-uncached PASS(1) DMESG_WARN(1)
*BYT igt_gem_partial_pwrite_pread_write PASS(1) DMESG_WARN(1)
*BYT igt_gem_partial_pwrite_pread_write-display PASS(1) DMESG_WARN(1)
*BYT igt_gem_partial_pwrite_pread_write-snoop PASS(1) DMESG_WARN(1)
*BYT igt_gem_tiled_pread_pwrite PASS(1) DMESG_WARN(1)
*HSW igt_pm_rpm_debugfs-read PASS(2) TIMEOUT(1)PASS(1)
*HSW igt_pm_rps_blocking PASS(2) FAIL(2)
*BDW igt_gem_gtt_hog PASS(3) DMESG_WARN(1)PASS(1)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning
2015-03-06 17:32 ` Daniel Vetter
@ 2015-03-06 21:44 ` Chris Wilson
0 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2015-03-06 21:44 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx
On Fri, Mar 06, 2015 at 06:32:27PM +0100, Daniel Vetter wrote:
> On Fri, Mar 06, 2015 at 03:06:26PM +0000, Chris Wilson wrote:
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>
> I might have impending w/e syndrome, but I can't see what you're fixing
> any more. Can you please augment the commit message a bit?
There are a couple of issues. First is when we jump to RPe, we then set
last_adj to a large value and then promptly double it. So we overshoot
lightish workloads (often landing in the high power domain and so
trapped). The second is that clamping could equally perturbs the last_adj
value.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
` (5 preceding siblings ...)
2015-03-06 15:06 ` [PATCH 7/7] drm/i915: Deminish contribution of wait-boosting from clients Chris Wilson
@ 2015-03-09 15:38 ` Jesse Barnes
2015-03-18 5:44 ` Deepak S
2015-03-18 5:52 ` Deepak S
8 siblings, 0 replies; 27+ messages in thread
From: Jesse Barnes @ 2015-03-09 15:38 UTC (permalink / raw)
To: Chris Wilson, intel-gfx, Wang, Wendy
On 03/06/2015 07:06 AM, Chris Wilson wrote:
> When we idle, we set the GPU frequency to the hardware minimum (not user
> minimum). We introduce a new variable to distinguish between the
> different roles, and to allow easy tuning of the idle frequency without
> impacting over aspects of RPS. Setting the minimum frequency should be a
> safety blanket as the pcu on the GPU should be power gating itself
> anyway. However, in order for us to do set the absolute minimum
> frequency, we need to relax a few of our assertions that we do not
> exceed the user limits.
>
> v2: Add idle_freq
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Arg, we still need some way of getting numbers (both perf and power) for
these kinds of changes across a variety of workloads.
Wendy, is that something you can do manually? Comparing BYT with and
without these patches across your power/perf tests?
Thanks,
Jesse
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
` (6 preceding siblings ...)
2015-03-09 15:38 ` [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Jesse Barnes
@ 2015-03-18 5:44 ` Deepak S
2015-03-18 9:07 ` Chris Wilson
2015-03-18 5:52 ` Deepak S
8 siblings, 1 reply; 27+ messages in thread
From: Deepak S @ 2015-03-18 5:44 UTC (permalink / raw)
To: intel-gfx
On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> When we idle, we set the GPU frequency to the hardware minimum (not user
> minimum). We introduce a new variable to distinguish between the
> different roles, and to allow easy tuning of the idle frequency without
> impacting over aspects of RPS. Setting the minimum frequency should be a
> safety blanket as the pcu on the GPU should be power gating itself
> anyway. However, in order for us to do set the absolute minimum
> frequency, we need to relax a few of our assertions that we do not
> exceed the user limits.
>
> v2: Add idle_freq
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++++
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/intel_pm.c | 35 +++++++++++++++++++----------------
> 3 files changed, 26 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c3a5322bc7f4..cc83cc0ff823 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1219,6 +1219,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>
> seq_printf(m, "Max overclocked frequency: %dMHz\n",
> intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
> +
> + seq_printf(m, "Idle freq: %d MHz\n",
> + intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
> } else if (IS_VALLEYVIEW(dev)) {
> u32 freq_sts;
>
> @@ -1233,6 +1236,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
> seq_printf(m, "min GPU freq: %d MHz\n",
> intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
>
> + seq_printf(m, "idle GPU freq: %d MHz\n",
> + intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
> +
> seq_printf(m,
> "efficient (RPe) frequency: %d MHz\n",
> intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 18a6e56c7c94..73292a183492 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1022,6 +1022,7 @@ struct intel_gen6_power_mgmt {
> u8 max_freq_softlimit; /* Max frequency permitted by the driver */
> u8 max_freq; /* Maximum frequency, RP0 if not overclocking */
> u8 min_freq; /* AKA RPn. Minimum frequency */
> + u8 idle_freq; /* Frequency to request when we are idle */
> u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
> u8 rp1_freq; /* "less than" RP0 power/freqency */
> u8 rp0_freq; /* Non-overclocked max frequency. */
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 6b277f9f7ffa..e53724516852 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3711,9 +3711,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
> break;
> }
> /* Max/min bins are special */
> - if (val == dev_priv->rps.min_freq_softlimit)
> + if (val <= dev_priv->rps.min_freq_softlimit)
> new_power = LOW_POWER;
> - if (val == dev_priv->rps.max_freq_softlimit)
> + if (val >= dev_priv->rps.max_freq_softlimit)
> new_power = HIGH_POWER;
> if (new_power == dev_priv->rps.power)
> return;
> @@ -3802,8 +3802,8 @@ static void gen6_set_rps(struct drm_device *dev, u8 val)
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> - WARN_ON(val > dev_priv->rps.max_freq_softlimit);
> - WARN_ON(val < dev_priv->rps.min_freq_softlimit);
> + WARN_ON(val > dev_priv->rps.max_freq);
> + WARN_ON(val < dev_priv->rps.min_freq);
>
> /* min/max delay may still have been modified so be sure to
> * write the limits value.
> @@ -3836,8 +3836,8 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> - WARN_ON(val > dev_priv->rps.max_freq_softlimit);
> - WARN_ON(val < dev_priv->rps.min_freq_softlimit);
> + WARN_ON(val > dev_priv->rps.max_freq);
> + WARN_ON(val < dev_priv->rps.min_freq);
>
> if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
> "Odd GPU freq value\n"))
> @@ -3864,10 +3864,11 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
> static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> {
> struct drm_device *dev = dev_priv->dev;
> + u32 val = dev_priv->rps.idle_freq;
>
> /* CHV and latest VLV don't need to force the gfx clock */
> if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
> - valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> + valleyview_set_rps(dev_priv->dev, val);
> return;
> }
>
> @@ -3875,7 +3876,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> * When we are idle. Drop to min voltage state.
> */
>
> - if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
> + if (dev_priv->rps.cur_freq <= val)
> return;
>
> /* Mask turbo interrupt so that they will not come in between */
> @@ -3884,10 +3885,9 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>
> vlv_force_gfx_clock(dev_priv, true);
>
> - dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
> + dev_priv->rps.cur_freq = val;
>
> - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
> - dev_priv->rps.min_freq_softlimit);
> + vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
>
> if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
> & GENFREQSTATUS) == 0, 100))
> @@ -3895,8 +3895,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>
> vlv_force_gfx_clock(dev_priv, false);
>
> - I915_WRITE(GEN6_PMINTRMSK,
> - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> }
>
> void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3908,7 +3907,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
> if (IS_VALLEYVIEW(dev))
> vlv_set_rps_idle(dev_priv);
> else
> - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> + gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
> dev_priv->rps.last_adj = 0;
> }
> mutex_unlock(&dev_priv->rps.hw_lock);
> @@ -4059,6 +4058,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
> dev_priv->rps.max_freq);
> }
>
> + dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
> +
> /* Preserve min/max settings in case of re-init */
> if (dev_priv->rps.max_freq_softlimit == 0)
> dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
> @@ -4227,7 +4228,7 @@ static void gen8_enable_rps(struct drm_device *dev)
> /* 6: Ring frequency + overclocking (our driver does this later */
>
> dev_priv->rps.power = HIGH_POWER; /* force a reset */
> - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> + gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
>
> intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> }
> @@ -4321,7 +4322,7 @@ static void gen6_enable_rps(struct drm_device *dev)
> }
>
> dev_priv->rps.power = HIGH_POWER; /* force a reset */
> - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> + gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
>
> rc6vids = 0;
> ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
> @@ -4761,6 +4762,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
I think we missed initializing idle_freq in valleyview init platform?
> dev_priv->rps.min_freq) & 1,
> "Odd GPU freq values\n");
>
> + dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
> +
> /* Preserve min/max settings in case of re-init */
> if (dev_priv->rps.max_freq_softlimit == 0)
> dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
` (7 preceding siblings ...)
2015-03-18 5:44 ` Deepak S
@ 2015-03-18 5:52 ` Deepak S
8 siblings, 0 replies; 27+ messages in thread
From: Deepak S @ 2015-03-18 5:52 UTC (permalink / raw)
To: intel-gfx
On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> When we idle, we set the GPU frequency to the hardware minimum (not user
> minimum). We introduce a new variable to distinguish between the
> different roles, and to allow easy tuning of the idle frequency without
> impacting over aspects of RPS. Setting the minimum frequency should be a
> safety blanket as the pcu on the GPU should be power gating itself
> anyway. However, in order for us to do set the absolute minimum
> frequency, we need to relax a few of our assertions that we do not
> exceed the user limits.
>
> v2: Add idle_freq
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++++
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/intel_pm.c | 35 +++++++++++++++++++----------------
> 3 files changed, 26 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c3a5322bc7f4..cc83cc0ff823 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1219,6 +1219,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>
> seq_printf(m, "Max overclocked frequency: %dMHz\n",
> intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
> +
> + seq_printf(m, "Idle freq: %d MHz\n",
> + intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
> } else if (IS_VALLEYVIEW(dev)) {
> u32 freq_sts;
>
> @@ -1233,6 +1236,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
> seq_printf(m, "min GPU freq: %d MHz\n",
> intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
>
> + seq_printf(m, "idle GPU freq: %d MHz\n",
> + intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
> +
> seq_printf(m,
> "efficient (RPe) frequency: %d MHz\n",
> intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 18a6e56c7c94..73292a183492 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1022,6 +1022,7 @@ struct intel_gen6_power_mgmt {
> u8 max_freq_softlimit; /* Max frequency permitted by the driver */
> u8 max_freq; /* Maximum frequency, RP0 if not overclocking */
> u8 min_freq; /* AKA RPn. Minimum frequency */
> + u8 idle_freq; /* Frequency to request when we are idle */
> u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
> u8 rp1_freq; /* "less than" RP0 power/freqency */
> u8 rp0_freq; /* Non-overclocked max frequency. */
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 6b277f9f7ffa..e53724516852 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3711,9 +3711,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
> break;
> }
> /* Max/min bins are special */
> - if (val == dev_priv->rps.min_freq_softlimit)
> + if (val <= dev_priv->rps.min_freq_softlimit)
> new_power = LOW_POWER;
> - if (val == dev_priv->rps.max_freq_softlimit)
> + if (val >= dev_priv->rps.max_freq_softlimit)
> new_power = HIGH_POWER;
> if (new_power == dev_priv->rps.power)
> return;
> @@ -3802,8 +3802,8 @@ static void gen6_set_rps(struct drm_device *dev, u8 val)
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> - WARN_ON(val > dev_priv->rps.max_freq_softlimit);
> - WARN_ON(val < dev_priv->rps.min_freq_softlimit);
> + WARN_ON(val > dev_priv->rps.max_freq);
> + WARN_ON(val < dev_priv->rps.min_freq);
>
> /* min/max delay may still have been modified so be sure to
> * write the limits value.
> @@ -3836,8 +3836,8 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
> struct drm_i915_private *dev_priv = dev->dev_private;
>
> WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> - WARN_ON(val > dev_priv->rps.max_freq_softlimit);
> - WARN_ON(val < dev_priv->rps.min_freq_softlimit);
> + WARN_ON(val > dev_priv->rps.max_freq);
> + WARN_ON(val < dev_priv->rps.min_freq);
>
> if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
> "Odd GPU freq value\n"))
> @@ -3864,10 +3864,11 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
> static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> {
> struct drm_device *dev = dev_priv->dev;
> + u32 val = dev_priv->rps.idle_freq;
>
> /* CHV and latest VLV don't need to force the gfx clock */
> if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
> - valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> + valleyview_set_rps(dev_priv->dev, val);
> return;
> }
>
> @@ -3875,7 +3876,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> * When we are idle. Drop to min voltage state.
> */
>
> - if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
> + if (dev_priv->rps.cur_freq <= val)
> return;
>
> /* Mask turbo interrupt so that they will not come in between */
> @@ -3884,10 +3885,9 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>
> vlv_force_gfx_clock(dev_priv, true);
>
> - dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
> + dev_priv->rps.cur_freq = val;
>
> - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
> - dev_priv->rps.min_freq_softlimit);
> + vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
>
> if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
> & GENFREQSTATUS) == 0, 100))
> @@ -3895,8 +3895,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>
> vlv_force_gfx_clock(dev_priv, false);
>
> - I915_WRITE(GEN6_PMINTRMSK,
> - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> }
>
> void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3908,7 +3907,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
> if (IS_VALLEYVIEW(dev))
> vlv_set_rps_idle(dev_priv);
> else
> - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> + gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
> dev_priv->rps.last_adj = 0;
> }
> mutex_unlock(&dev_priv->rps.hw_lock);
> @@ -4059,6 +4058,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
> dev_priv->rps.max_freq);
> }
>
> + dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
> +
> /* Preserve min/max settings in case of re-init */
> if (dev_priv->rps.max_freq_softlimit == 0)
> dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
> @@ -4227,7 +4228,7 @@ static void gen8_enable_rps(struct drm_device *dev)
> /* 6: Ring frequency + overclocking (our driver does this later */
>
> dev_priv->rps.power = HIGH_POWER; /* force a reset */
> - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> + gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
>
> intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> }
> @@ -4321,7 +4322,7 @@ static void gen6_enable_rps(struct drm_device *dev)
> }
>
> dev_priv->rps.power = HIGH_POWER; /* force a reset */
> - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
> + gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
>
> rc6vids = 0;
> ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
> @@ -4761,6 +4762,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
I think we missed initializing idle_freq for valleyview platform.
> dev_priv->rps.min_freq) & 1,
> "Odd GPU freq values\n");
>
> + dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
> +
> /* Preserve min/max settings in case of re-init */
> if (dev_priv->rps.max_freq_softlimit == 0)
> dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning
2015-03-06 15:06 ` [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
2015-03-06 17:32 ` Daniel Vetter
@ 2015-03-18 6:56 ` Deepak S
2015-03-18 9:05 ` Chris Wilson
2015-03-18 9:20 ` Chris Wilson
1 sibling, 2 replies; 27+ messages in thread
From: Deepak S @ 2015-03-18 6:56 UTC (permalink / raw)
To: intel-gfx
On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_irq.c | 27 ++++++++++++---------------
> 1 file changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 9baecb79de8c..1296ce37e435 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1150,21 +1150,20 @@ static void gen6_pm_rps_work(struct work_struct *work)
> mutex_lock(&dev_priv->rps.hw_lock);
>
> adj = dev_priv->rps.last_adj;
> + new_delay = dev_priv->rps.cur_freq;
> if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> if (adj > 0)
> adj *= 2;
> - else {
> - /* CHV needs even encode values */
> - adj = IS_CHERRYVIEW(dev_priv->dev) ? 2 : 1;
> - }
> - new_delay = dev_priv->rps.cur_freq + adj;
> -
> + else /* CHV needs even encode values */
> + adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
> /*
> * For better performance, jump directly
> * to RPe if we're below it.
> */
> - if (new_delay < dev_priv->rps.efficient_freq)
> + if (new_delay < dev_priv->rps.efficient_freq - adj) {
> new_delay = dev_priv->rps.efficient_freq;
> + adj = 0;
> + }
> } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
> if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
> new_delay = dev_priv->rps.efficient_freq;
> @@ -1176,24 +1175,22 @@ static void gen6_pm_rps_work(struct work_struct *work)
I think we should modify adj in GEN6_PM_RP_UP_EI_EXPIRED?
if not not we might request higher freq since we add adj to new_delay before request freq.
Other than this. Patch looks fine
Reviewed-by: Deepak S <deepak.s@linux.intel.com>
> } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> if (adj < 0)
> adj *= 2;
> - else {
> - /* CHV needs even encode values */
> - adj = IS_CHERRYVIEW(dev_priv->dev) ? -2 : -1;
> - }
> - new_delay = dev_priv->rps.cur_freq + adj;
> + else /* CHV needs even encode values */
> + adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
> } else { /* unknown event */
> - new_delay = dev_priv->rps.cur_freq;
> + adj = 0;
> }
>
> + dev_priv->rps.last_adj = adj;
> +
> /* sysfs frequency interfaces may have snuck in while servicing the
> * interrupt
> */
> + new_delay += adj;
> new_delay = clamp_t(int, new_delay,
> dev_priv->rps.min_freq_softlimit,
> dev_priv->rps.max_freq_softlimit);
>
> - dev_priv->rps.last_adj = new_delay - dev_priv->rps.cur_freq;
> -
> intel_set_rps(dev_priv->dev, new_delay);
>
> mutex_unlock(&dev_priv->rps.hw_lock);
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 3/7] drm/i915: Improved w/a for rps on Baytrail
2015-03-06 15:06 ` [PATCH 3/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
@ 2015-03-18 7:56 ` Deepak S
0 siblings, 0 replies; 27+ messages in thread
From: Deepak S @ 2015-03-18 7:56 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter, Rodrigo Vivi
On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212
> Author: Deepak S <deepak.s@linux.intel.com>
> Date: Thu Jul 3 17:33:01 2014 -0400
>
> drm/i915/vlv: WA for Turbo and RC6 to work together.
>
> Other than code clarity, the major improvement is to disable the extra
> interrupts generated when idle. However, the reclocking remains rather
> slow under the new manual regime, in particular it fails to downclock as
> quickly as desired. The second major improvement is that for certain
> workloads, like games, we need to combine render+media activity counters
> as the work of displaying the frame is split across the engines and both
> need to be taken into account when deciding the global GPU frequency as
> memory cycles are shared.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Deepak S <deepak.s@linux.intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> Conflicts:
> drivers/gpu/drm/i915/intel_pm.c
> ---
> drivers/gpu/drm/i915/i915_irq.c | 155 +++++++++++++----------------------
> drivers/gpu/drm/i915/i915_reg.h | 4 +-
> drivers/gpu/drm/i915/intel_display.c | 2 +
> drivers/gpu/drm/i915/intel_drv.h | 2 +
> drivers/gpu/drm/i915/intel_pm.c | 22 ++++-
> 5 files changed, 81 insertions(+), 104 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 1296ce37e435..4b7b86298b37 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -997,129 +997,84 @@ static void notify_ring(struct drm_device *dev,
> wake_up_all(&ring->irq_queue);
> }
>
> -static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> - struct intel_rps_ei *rps_ei)
> +static void vlv_c0_read(struct drm_i915_private *dev_priv,
> + struct intel_rps_ei *ei)
> {
> - u32 cz_ts, cz_freq_khz;
> - u32 render_count, media_count;
> - u32 elapsed_render, elapsed_media, elapsed_time;
> - u32 residency = 0;
> -
> - cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> - cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> -
> - render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> - media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> -
> - if (rps_ei->cz_clock == 0) {
> - rps_ei->cz_clock = cz_ts;
> - rps_ei->render_c0 = render_count;
> - rps_ei->media_c0 = media_count;
> -
> - return dev_priv->rps.cur_freq;
> - }
> -
> - elapsed_time = cz_ts - rps_ei->cz_clock;
> - rps_ei->cz_clock = cz_ts;
> + ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> + ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
> + ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> +}
>
> - elapsed_render = render_count - rps_ei->render_c0;
> - rps_ei->render_c0 = render_count;
> +static bool vlv_c0_above(struct drm_i915_private *dev_priv,
> + const struct intel_rps_ei *old,
> + const struct intel_rps_ei *now,
> + int threshold)
> +{
> + u64 time, c0;
>
> - elapsed_media = media_count - rps_ei->media_c0;
> - rps_ei->media_c0 = media_count;
> + if (old->cz_clock == 0)
> + return false;
>
> - /* Convert all the counters into common unit of milli sec */
> - elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> - elapsed_render /= cz_freq_khz;
> - elapsed_media /= cz_freq_khz;
> + time = now->cz_clock - old->cz_clock;
> + time *= threshold * dev_priv->mem_freq;
>
> - /*
> - * Calculate overall C0 residency percentage
> - * only if elapsed time is non zero
> + /* Workload can be split between render + media, e.g. SwapBuffers
> + * being blitted in X after being rendered in mesa. To account for
> + * this we need to combine both engines into our activity counter.
> */
> - if (elapsed_time) {
> - residency =
> - ((max(elapsed_render, elapsed_media) * 100)
> - / elapsed_time);
> - }
> + c0 = now->render_c0 - old->render_c0;
> + c0 += now->media_c0 - old->media_c0;
> + c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
>
> - return residency;
> + return c0 >= time;
> }
>
> -/**
> - * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> - * busy-ness calculated from C0 counters of render & media power wells
> - * @dev_priv: DRM device private
> - *
> - */
> -static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
> {
> - u32 residency_C0_up = 0, residency_C0_down = 0;
> - int new_delay, adj;
> -
> - dev_priv->rps.ei_interrupt_count++;
> -
> - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> + vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> + dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> + dev_priv->rps.ei_interrupt_count = 0;
> +}
>
> +static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> +{
> + struct intel_rps_ei now;
> + u32 events = 0;
>
> - if (dev_priv->rps.up_ei.cz_clock == 0) {
> - vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei);
> - vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
> - return dev_priv->rps.cur_freq;
> - }
> + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> + return 0;
>
> + vlv_c0_read(dev_priv, &now);
> + if (now.cz_clock == 0)
> + return 0;
>
> /*
> * To down throttle, C0 residency should be less than down threshold
> * for continous EI intervals. So calculate down EI counters
> * once in VLV_INT_COUNT_FOR_DOWN_EI
> */
> - if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> -
> + if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
> + pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
> dev_priv->rps.ei_interrupt_count = 0;
> -
> - residency_C0_down = vlv_c0_residency(dev_priv,
> - &dev_priv->rps.down_ei);
> - } else {
> - residency_C0_up = vlv_c0_residency(dev_priv,
> - &dev_priv->rps.up_ei);
> }
>
> - new_delay = dev_priv->rps.cur_freq;
> -
> - adj = dev_priv->rps.last_adj;
> - /* C0 residency is greater than UP threshold. Increase Frequency */
> - if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> - if (adj > 0)
> - adj *= 2;
> - else
> - adj = 1;
> -
> - if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
> - new_delay = dev_priv->rps.cur_freq + adj;
> -
> - /*
> - * For better performance, jump directly
> - * to RPe if we're below it.
> - */
> - if (new_delay < dev_priv->rps.efficient_freq)
> - new_delay = dev_priv->rps.efficient_freq;
> + if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> + if (!vlv_c0_above(dev_priv,
> + &dev_priv->rps.down_ei, &now,
> + VLV_RP_DOWN_EI_THRESHOLD))
> + events |= GEN6_PM_RP_DOWN_THRESHOLD;
> + dev_priv->rps.down_ei = now;
> + }
>
> - } else if (!dev_priv->rps.ei_interrupt_count &&
> - (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> - if (adj < 0)
> - adj *= 2;
> - else
> - adj = -1;
> - /*
> - * This means, C0 residency is less than down threshold over
> - * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> - */
> - if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
> - new_delay = dev_priv->rps.cur_freq + adj;
> + if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> + if (vlv_c0_above(dev_priv,
> + &dev_priv->rps.up_ei, &now,
> + VLV_RP_UP_EI_THRESHOLD))
> + events |= GEN6_PM_RP_UP_THRESHOLD;
> + dev_priv->rps.up_ei = now;
> }
>
> - return new_delay;
> + return events;
> }
>
> static void gen6_pm_rps_work(struct work_struct *work)
> @@ -1149,6 +1104,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>
> mutex_lock(&dev_priv->rps.hw_lock);
>
> + pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
> +
> adj = dev_priv->rps.last_adj;
> new_delay = dev_priv->rps.cur_freq;
> if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> @@ -1170,8 +1127,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
> else
> new_delay = dev_priv->rps.min_freq_softlimit;
> adj = 0;
> - } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> - new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
> } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> if (adj < 0)
> adj *= 2;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 56b97c41bf17..324922d8f8a1 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6190,8 +6190,8 @@ enum skl_disp_power_wells {
>
> #define GEN6_GT_GFX_RC6p 0x13810C
> #define GEN6_GT_GFX_RC6pp 0x138110
> -#define VLV_RENDER_C0_COUNT_REG 0x138118
> -#define VLV_MEDIA_C0_COUNT_REG 0x13811C
> +#define VLV_RENDER_C0_COUNT 0x138118
> +#define VLV_MEDIA_C0_COUNT 0x13811C
>
> #define GEN6_PCODE_MAILBOX 0x138124
> #define GEN6_PCODE_READY (1<<31)
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 77113d95fb71..2412002d510b 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9167,6 +9167,8 @@ void intel_mark_busy(struct drm_device *dev)
>
> intel_runtime_pm_get(dev_priv);
> i915_update_gfx_val(dev_priv);
> + if (INTEL_INFO(dev)->gen >= 6)
> + gen6_rps_busy(dev_priv);
> dev_priv->mm.busy = true;
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index ff79dca2ff8e..1e564da2fd38 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1238,6 +1238,8 @@ void intel_disable_gt_powersave(struct drm_device *dev);
> void intel_suspend_gt_powersave(struct drm_device *dev);
> void intel_reset_gt_powersave(struct drm_device *dev);
> void gen6_update_ring_freq(struct drm_device *dev);
> +void gen6_rps_busy(struct drm_i915_private *dev_priv);
> +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
> void gen6_rps_idle(struct drm_i915_private *dev_priv);
> void gen6_rps_boost(struct drm_i915_private *dev_priv);
> void ilk_wm_get_hw_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index e53724516852..b37d634bea99 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3898,6 +3898,18 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> }
>
> +void gen6_rps_busy(struct drm_i915_private *dev_priv)
> +{
> + mutex_lock(&dev_priv->rps.hw_lock);
> + if (dev_priv->rps.enabled) {
> + if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
> + gen6_rps_reset_ei(dev_priv);
> + I915_WRITE(GEN6_PMINTRMSK,
> + gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> + }
> + mutex_unlock(&dev_priv->rps.hw_lock);
> +}
> +
> void gen6_rps_idle(struct drm_i915_private *dev_priv)
> {
> struct drm_device *dev = dev_priv->dev;
> @@ -3909,15 +3921,21 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
> else
> gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
> dev_priv->rps.last_adj = 0;
> + I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
> }
> mutex_unlock(&dev_priv->rps.hw_lock);
> }
>
> void gen6_rps_boost(struct drm_i915_private *dev_priv)
> {
> + u32 val;
> +
> mutex_lock(&dev_priv->rps.hw_lock);
> - if (dev_priv->rps.enabled) {
> - intel_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
> + val = dev_priv->rps.max_freq_softlimit;
> + if (dev_priv->rps.enabled &&
> + dev_priv->mm.busy &&
> + dev_priv->rps.cur_freq < val) {
> + intel_set_rps(dev_priv->dev, val);
> dev_priv->rps.last_adj = 0;
> }
> mutex_unlock(&dev_priv->rps.hw_lock);
EI Calculation and changes looks fine to me
Reviewed-by: Deepak S<deepak.s@linux.intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/7] drm/i915: Use down ei for manual Baytrail RPS calculations
2015-03-06 15:06 ` [PATCH 4/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
@ 2015-03-18 8:04 ` Deepak S
0 siblings, 0 replies; 27+ messages in thread
From: Deepak S @ 2015-03-18 8:04 UTC (permalink / raw)
To: intel-gfx
On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> Use both up/down manual ei calcuations for symmetry and greater
> flexibility for reclocking, instead of faking the down interrupt based
> on a fixed integer number of up interrupts.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 2 --
> drivers/gpu/drm/i915/i915_irq.c | 15 ++-------------
> drivers/gpu/drm/i915/i915_reg.h | 1 -
> drivers/gpu/drm/i915/intel_pm.c | 5 ++---
> 4 files changed, 4 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 73292a183492..efa98c9e5777 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1028,8 +1028,6 @@ struct intel_gen6_power_mgmt {
> u8 rp0_freq; /* Non-overclocked max frequency. */
> u32 cz_freq;
>
> - u32 ei_interrupt_count;
> -
> int last_adj;
> enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 4b7b86298b37..8892dbdfb629 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1033,7 +1033,6 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
> {
> vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> - dev_priv->rps.ei_interrupt_count = 0;
> }
>
> static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> @@ -1041,23 +1040,13 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> struct intel_rps_ei now;
> u32 events = 0;
>
> - if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> + if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
> return 0;
>
> vlv_c0_read(dev_priv, &now);
> if (now.cz_clock == 0)
> return 0;
>
> - /*
> - * To down throttle, C0 residency should be less than down threshold
> - * for continous EI intervals. So calculate down EI counters
> - * once in VLV_INT_COUNT_FOR_DOWN_EI
> - */
> - if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
> - pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
> - dev_priv->rps.ei_interrupt_count = 0;
> - }
> -
> if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> if (!vlv_c0_above(dev_priv,
> &dev_priv->rps.down_ei, &now,
> @@ -4247,7 +4236,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
> /* Let's track the enabled rps events */
> if (IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
> /* WaGsvRC0ResidencyMethod:vlv */
> - dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> + dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
> else
> dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 324922d8f8a1..ca24a2d4a823 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -665,7 +665,6 @@ enum skl_disp_power_wells {
> #define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
> #define VLV_RP_UP_EI_THRESHOLD 90
> #define VLV_RP_DOWN_EI_THRESHOLD 70
> -#define VLV_INT_COUNT_FOR_DOWN_EI 5
>
> /* vlv2 north clock has */
> #define CCK_FUSE_REG 0x8
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b37d634bea99..55dc406cd195 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3784,11 +3784,10 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
> u32 mask = 0;
>
> if (val > dev_priv->rps.min_freq_softlimit)
> - mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> + mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> if (val < dev_priv->rps.max_freq_softlimit)
> - mask |= GEN6_PM_RP_UP_THRESHOLD;
> + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
>
> - mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
> mask &= dev_priv->pm_rps_events;
>
> return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
>
Patch looks fine
Reviewed-by: Deepak S<deepak.s@linux.intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail
2015-03-06 15:06 ` [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
@ 2015-03-18 8:12 ` Deepak S
2015-03-18 9:48 ` Daniel Vetter
0 siblings, 1 reply; 27+ messages in thread
From: Deepak S @ 2015-03-18 8:12 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter, Rodrigo Vivi
On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> Reuse the same reclocking strategy for Baytail as on its bigger brethren,
> Sandybridge and Ivybridge. In particular, this makes the device quicker
> to reclock (both up and down) though the tendency now is to downclock
> more aggressively to compensate for the RPS boosts.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Deepak S <deepak.s@linux.intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> Conflicts:
> drivers/gpu/drm/i915/intel_pm.c
> ---
> drivers/gpu/drm/i915/i915_drv.h | 3 +++
> drivers/gpu/drm/i915/i915_irq.c | 4 ++--
> drivers/gpu/drm/i915/i915_reg.h | 2 --
> drivers/gpu/drm/i915/intel_pm.c | 11 ++++++++++-
> 4 files changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index efa98c9e5777..bfa6e11f802e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1028,6 +1028,9 @@ struct intel_gen6_power_mgmt {
> u8 rp0_freq; /* Non-overclocked max frequency. */
> u32 cz_freq;
>
> + u8 up_threshold; /* Current %busy required to uplock */
> + u8 down_threshold; /* Current %busy required to downclock */
> +
> int last_adj;
> enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 8892dbdfb629..483079d96957 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1050,7 +1050,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> if (!vlv_c0_above(dev_priv,
> &dev_priv->rps.down_ei, &now,
> - VLV_RP_DOWN_EI_THRESHOLD))
> + dev_priv->rps.down_threshold))
> events |= GEN6_PM_RP_DOWN_THRESHOLD;
> dev_priv->rps.down_ei = now;
> }
> @@ -1058,7 +1058,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> if (vlv_c0_above(dev_priv,
> &dev_priv->rps.up_ei, &now,
> - VLV_RP_UP_EI_THRESHOLD))
> + dev_priv->rps.up_threshold))
> events |= GEN6_PM_RP_UP_THRESHOLD;
> dev_priv->rps.up_ei = now;
> }
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index ca24a2d4a823..13ec6c2b1fcf 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -663,8 +663,6 @@ enum skl_disp_power_wells {
> #define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000
>
> #define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
> -#define VLV_RP_UP_EI_THRESHOLD 90
> -#define VLV_RP_DOWN_EI_THRESHOLD 70
>
> /* vlv2 north clock has */
> #define CCK_FUSE_REG 0x8
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 55dc406cd195..972333d2211d 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3722,10 +3722,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
> switch (new_power) {
> case LOW_POWER:
> /* Upclock if more than 95% busy over 16ms */
> + dev_priv->rps.up_threshold = 95;
> I915_WRITE(GEN6_RP_UP_EI, 12500);
> I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
>
> /* Downclock if less than 85% busy over 32ms */
> + dev_priv->rps.down_threshold = 85;
> I915_WRITE(GEN6_RP_DOWN_EI, 25000);
> I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
>
> @@ -3740,10 +3742,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
>
> case BETWEEN:
> /* Upclock if more than 90% busy over 13ms */
> + dev_priv->rps.up_threshold = 90;
> I915_WRITE(GEN6_RP_UP_EI, 10250);
> I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
>
> /* Downclock if less than 75% busy over 32ms */
> + dev_priv->rps.down_threshold = 75;
> I915_WRITE(GEN6_RP_DOWN_EI, 25000);
> I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
>
> @@ -3758,10 +3762,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
>
> case HIGH_POWER:
> /* Upclock if more than 85% busy over 10ms */
> + dev_priv->rps.up_threshold = 85;
> I915_WRITE(GEN6_RP_UP_EI, 8000);
> I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
>
> /* Downclock if less than 60% busy over 32ms */
> + dev_priv->rps.down_threshold = 60;
> I915_WRITE(GEN6_RP_DOWN_EI, 25000);
> I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
>
> @@ -3842,8 +3848,10 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
> "Odd GPU freq value\n"))
> val &= ~1;
>
> - if (val != dev_priv->rps.cur_freq)
> + if (val != dev_priv->rps.cur_freq) {
> vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
> + gen6_set_rps_thresholds(dev_priv, val);
> If only for BYT, we might want to add a platform check before "gen6_set_rps_thresholds"
> + }
>
> I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
>
> @@ -3892,6 +3900,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> & GENFREQSTATUS) == 0, 100))
> DRM_ERROR("timed out waiting for Punit\n");
>
> + gen6_set_rps_thresholds(dev_priv, val);
> vlv_force_gfx_clock(dev_priv, false);
>
> I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 6/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips
2015-03-06 15:06 ` [PATCH 6/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
@ 2015-03-18 8:18 ` Deepak S
2015-03-18 8:20 ` Deepak S
0 siblings, 1 reply; 27+ messages in thread
From: Deepak S @ 2015-03-18 8:18 UTC (permalink / raw)
To: intel-gfx
On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> If we hit a vblank and see that have a pageflip queue but not yet
> processed, ensure that the GPU is running at maximum in order to clear
> the backlog. Pageflips are only queued for the following vblank, if we
> miss it, there will be a visible stutter. Boosting the GPU frequency
> doesn't prevent us from missing the target vblank, but it should help
> the subsequent frames hitting theirs.
>
> v2: Reorder vblank vs flip-complete so that we only check for a missed
> flip after processing the completion events, and avoid spurious boosts.
>
> v3: Rename missed_vblank
> v4: Rebase
> v5: Cancel the outstanding work in runtime suspend
> v6: Rebase
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
> drivers/gpu/drm/i915/intel_display.c | 11 ++++++++---
> drivers/gpu/drm/i915/intel_drv.h | 2 ++
> drivers/gpu/drm/i915/intel_pm.c | 34 ++++++++++++++++++++++++++++++++++
> 3 files changed, 44 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 2412002d510b..7735f0a4184c 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9818,6 +9818,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> + struct intel_unpin_work *work;
>
> WARN_ON(!in_irq());
>
> @@ -9825,12 +9826,16 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
> return;
>
> spin_lock(&dev->event_lock);
> - if (intel_crtc->unpin_work && __intel_pageflip_stall_check(dev, crtc)) {
> + work = intel_crtc->unpin_work;
> + if (work != NULL && __intel_pageflip_stall_check(dev, crtc)) {
> WARN_ONCE(1, "Kicking stuck page flip: queued at %d, now %d\n",
> - intel_crtc->unpin_work->flip_queued_vblank,
> - drm_vblank_count(dev, pipe));
> + work->flip_queued_vblank, drm_vblank_count(dev, pipe));
> page_flip_completed(intel_crtc);
> + work = NULL;
> }
> + if (work != NULL &&
> + drm_vblank_count(dev, pipe) - work->flip_queued_vblank > 1)
> + intel_queue_rps_boost_for_request(dev, work->flip_queued_req);
> spin_unlock(&dev->event_lock);
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 1e564da2fd38..87e09a58191a 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1242,6 +1242,8 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv);
> void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
> void gen6_rps_idle(struct drm_i915_private *dev_priv);
> void gen6_rps_boost(struct drm_i915_private *dev_priv);
> +void intel_queue_rps_boost_for_request(struct drm_device *dev,
> + struct drm_i915_gem_request *rq);
> void ilk_wm_get_hw_state(struct drm_device *dev);
> void skl_wm_get_hw_state(struct drm_device *dev);
> void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 972333d2211d..120b8af3c60c 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6598,6 +6598,40 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
> return val / GT_FREQUENCY_MULTIPLIER;
> }
>
> +struct request_boost {
> + struct work_struct work;
> + struct drm_i915_gem_request *rq;
> +};
> +
> +static void __intel_rps_boost_work(struct work_struct *work)
> +{
> + struct request_boost *boost = container_of(work, struct request_boost, work);
> +
> + if (!i915_gem_request_completed(boost->rq, true))
> + gen6_rps_boost(to_i915(boost->rq->ring->dev));
> +
> + i915_gem_request_unreference(boost->rq);
> + kfree(boost);
> +}
> +
> +void intel_queue_rps_boost_for_request(struct drm_device *dev,
> + struct drm_i915_gem_request *rq)
> +{
> + struct request_boost *boost;
> +
> + if (rq == NULL || INTEL_INFO(dev)->gen < 6)
> + return;
> +
> + boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
> + if (boost == NULL)
> + return;
> +
> + INIT_WORK(&boost->work, __intel_rps_boost_work);
> + i915_gem_request_assign(&boost->rq, rq);
> +
> + queue_work(to_i915(dev)->wq, &boost->work);
> +}
> +
> void intel_pm_setup(struct drm_device *dev)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 6/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips
2015-03-18 8:18 ` Deepak S
@ 2015-03-18 8:20 ` Deepak S
0 siblings, 0 replies; 27+ messages in thread
From: Deepak S @ 2015-03-18 8:20 UTC (permalink / raw)
To: intel-gfx
On Wednesday 18 March 2015 01:48 PM, Deepak S wrote:
>
>
> On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
>> If we hit a vblank and see that have a pageflip queue but not yet
>> processed, ensure that the GPU is running at maximum in order to clear
>> the backlog. Pageflips are only queued for the following vblank, if we
>> miss it, there will be a visible stutter. Boosting the GPU frequency
>> doesn't prevent us from missing the target vblank, but it should help
>> the subsequent frames hitting theirs.
>>
>> v2: Reorder vblank vs flip-complete so that we only check for a missed
>> flip after processing the completion events, and avoid spurious boosts.
>>
>> v3: Rename missed_vblank
>> v4: Rebase
>> v5: Cancel the outstanding work in runtime suspend
>> v6: Rebase
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/intel_display.c | 11 ++++++++---
>> drivers/gpu/drm/i915/intel_drv.h | 2 ++
>> drivers/gpu/drm/i915/intel_pm.c | 34
>> ++++++++++++++++++++++++++++++++++
>> 3 files changed, 44 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_display.c
>> b/drivers/gpu/drm/i915/intel_display.c
>> index 2412002d510b..7735f0a4184c 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -9818,6 +9818,7 @@ void intel_check_page_flip(struct drm_device
>> *dev, int pipe)
>> struct drm_i915_private *dev_priv = dev->dev_private;
>> struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
>> struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> + struct intel_unpin_work *work;
>> WARN_ON(!in_irq());
>> @@ -9825,12 +9826,16 @@ void intel_check_page_flip(struct
>> drm_device *dev, int pipe)
>> return;
>> spin_lock(&dev->event_lock);
>> - if (intel_crtc->unpin_work && __intel_pageflip_stall_check(dev,
>> crtc)) {
>> + work = intel_crtc->unpin_work;
>> + if (work != NULL && __intel_pageflip_stall_check(dev, crtc)) {
>> WARN_ONCE(1, "Kicking stuck page flip: queued at %d, now
>> %d\n",
>> - intel_crtc->unpin_work->flip_queued_vblank,
>> - drm_vblank_count(dev, pipe));
>> + work->flip_queued_vblank, drm_vblank_count(dev, pipe));
>> page_flip_completed(intel_crtc);
>> + work = NULL;
>> }
>> + if (work != NULL &&
>> + drm_vblank_count(dev, pipe) - work->flip_queued_vblank > 1)
>> + intel_queue_rps_boost_for_request(dev, work->flip_queued_req);
>> spin_unlock(&dev->event_lock);
>> }
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h
>> b/drivers/gpu/drm/i915/intel_drv.h
>> index 1e564da2fd38..87e09a58191a 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -1242,6 +1242,8 @@ void gen6_rps_busy(struct drm_i915_private
>> *dev_priv);
>> void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
>> void gen6_rps_idle(struct drm_i915_private *dev_priv);
>> void gen6_rps_boost(struct drm_i915_private *dev_priv);
>> +void intel_queue_rps_boost_for_request(struct drm_device *dev,
>> + struct drm_i915_gem_request *rq);
>> void ilk_wm_get_hw_state(struct drm_device *dev);
>> void skl_wm_get_hw_state(struct drm_device *dev);
>> void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c
>> b/drivers/gpu/drm/i915/intel_pm.c
>> index 972333d2211d..120b8af3c60c 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -6598,6 +6598,40 @@ int intel_freq_opcode(struct drm_i915_private
>> *dev_priv, int val)
>> return val / GT_FREQUENCY_MULTIPLIER;
>> }
>> +struct request_boost {
>> + struct work_struct work;
>> + struct drm_i915_gem_request *rq;
>> +};
>> +
>> +static void __intel_rps_boost_work(struct work_struct *work)
>> +{
>> + struct request_boost *boost = container_of(work, struct
>> request_boost, work);
>> +
>> + if (!i915_gem_request_completed(boost->rq, true))
>> + gen6_rps_boost(to_i915(boost->rq->ring->dev));
>> +
>> + i915_gem_request_unreference(boost->rq);
>> + kfree(boost);
>> +}
>> +
>> +void intel_queue_rps_boost_for_request(struct drm_device *dev,
>> + struct drm_i915_gem_request *rq)
>> +{
>> + struct request_boost *boost;
>> +
>> + if (rq == NULL || INTEL_INFO(dev)->gen < 6)
>> + return;
>> +
>> + boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
>> + if (boost == NULL)
>> + return;
>> +
>> + INIT_WORK(&boost->work, __intel_rps_boost_work);
>> + i915_gem_request_assign(&boost->rq, rq);
>> +
>> + queue_work(to_i915(dev)->wq, &boost->work);
>> +}
>> +
>> void intel_pm_setup(struct drm_device *dev)
>> {
>> struct drm_i915_private *dev_priv = dev->dev_private;
Patch looks fine
Reviewed-by: Deepak S<deepak.s@linux.intel.com>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 7/7] drm/i915: Deminish contribution of wait-boosting from clients
2015-03-06 15:06 ` [PATCH 7/7] drm/i915: Deminish contribution of wait-boosting from clients Chris Wilson
2015-03-06 17:58 ` shuang.he
@ 2015-03-18 9:00 ` Deepak S
1 sibling, 0 replies; 27+ messages in thread
From: Deepak S @ 2015-03-18 9:00 UTC (permalink / raw)
To: intel-gfx
On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> With boosting for missed pageflips, we have a much stronger indication
> of when we need to (temporarily) boost GPU frequency to ensure smooth
> delivery of frames. So now only allow each client to perform one RPS boost
> in each period of GPU activity due to stalling on results.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 39 +++++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_drv.h | 9 ++++++---
> drivers/gpu/drm/i915/i915_gem.c | 35 ++++++++-------------------------
> drivers/gpu/drm/i915/intel_drv.h | 3 ++-
> drivers/gpu/drm/i915/intel_pm.c | 18 ++++++++++++++---
> 5 files changed, 70 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index cc83cc0ff823..9366eaa50dba 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2245,6 +2245,44 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
> return 0;
> }
>
> +static int i915_rps_boost_info(struct seq_file *m, void *data)
> +{
> + struct drm_info_node *node = m->private;
> + struct drm_device *dev = node->minor->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_file *file;
> + int ret;
> +
> + ret = mutex_lock_interruptible(&dev->struct_mutex);
> + if (ret)
> + return ret;
> +
> + ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
> + if (ret)
> + goto unlock;
> +
> + list_for_each_entry_reverse(file, &dev->filelist, lhead) {
> + struct drm_i915_file_private *file_priv = file->driver_priv;
> + struct task_struct *task;
> +
> + rcu_read_lock();
> + task = pid_task(file->pid, PIDTYPE_PID);
> + seq_printf(m, "%s [%d]: %d boosts%s\n",
> + task ? task->comm : "<unknown>",
> + task ? task->pid : -1,
> + file_priv->rps_boosts,
> + list_empty(&file_priv->rps_boost) ? "" : ", active");
> + rcu_read_unlock();
> + }
> + seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
> +
> + mutex_unlock(&dev_priv->rps.hw_lock);
> +unlock:
> + mutex_unlock(&dev->struct_mutex);
> +
> + return ret;
> +}
> +
> static int i915_llc(struct seq_file *m, void *data)
> {
> struct drm_info_node *node = m->private;
> @@ -4680,6 +4718,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
> {"i915_ddb_info", i915_ddb_info, 0},
> {"i915_sseu_status", i915_sseu_status, 0},
> {"i915_drrs_status", i915_drrs_status, 0},
> + {"i915_rps_boost_info", i915_rps_boost_info, 0},
> };
> #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bfa6e11f802e..b207d2cec9dc 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1036,6 +1036,8 @@ struct intel_gen6_power_mgmt {
>
> bool enabled;
> struct delayed_work delayed_resume_work;
> + struct list_head clients;
> + unsigned boosts;
>
> /* manual wa residency calculations */
> struct intel_rps_ei up_ei, down_ei;
> @@ -2137,12 +2139,13 @@ struct drm_i915_file_private {
> struct {
> spinlock_t lock;
> struct list_head request_list;
> - struct delayed_work idle_work;
> } mm;
> struct idr context_idr;
>
> - atomic_t rps_wait_boost;
> - struct intel_engine_cs *bsd_ring;
> + struct list_head rps_boost;
> + struct intel_engine_cs *bsd_ring;
> +
> + unsigned rps_boosts;
> };
>
> /*
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b266b31690e4..a0c35f80836c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1191,14 +1191,6 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
> return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
> }
>
> -static bool can_wait_boost(struct drm_i915_file_private *file_priv)
> -{
> - if (file_priv == NULL)
> - return true;
> -
> - return !atomic_xchg(&file_priv->rps_wait_boost, true);
> -}
> -
> /**
> * __i915_wait_request - wait until execution of request has finished
> * @req: duh!
> @@ -1240,13 +1232,8 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
> timeout_expire = timeout ?
> jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
>
> - if (INTEL_INFO(dev)->gen >= 6 && ring->id == RCS && can_wait_boost(file_priv)) {
> - gen6_rps_boost(dev_priv);
> - if (file_priv)
> - mod_delayed_work(dev_priv->wq,
> - &file_priv->mm.idle_work,
> - msecs_to_jiffies(100));
> - }
> + if (ring->id == RCS && INTEL_INFO(dev)->gen >= 6)
> + gen6_rps_boost(dev_priv, file_priv);
>
> if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
> return -ENODEV;
> @@ -5070,8 +5057,6 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
> {
> struct drm_i915_file_private *file_priv = file->driver_priv;
>
> - cancel_delayed_work_sync(&file_priv->mm.idle_work);
> -
> /* Clean up our request list when the client is going away, so that
> * later retire_requests won't dereference our soon-to-be-gone
> * file_priv.
> @@ -5087,15 +5072,12 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
> request->file_priv = NULL;
> }
> spin_unlock(&file_priv->mm.lock);
> -}
> -
> -static void
> -i915_gem_file_idle_work_handler(struct work_struct *work)
> -{
> - struct drm_i915_file_private *file_priv =
> - container_of(work, typeof(*file_priv), mm.idle_work.work);
>
> - atomic_set(&file_priv->rps_wait_boost, false);
> + if (!list_empty(&file_priv->rps_boost)) {
> + mutex_lock(&to_i915(dev)->rps.hw_lock);
> + list_del(&file_priv->rps_boost);
> + mutex_unlock(&to_i915(dev)->rps.hw_lock);
> + }
> }
>
> int i915_gem_open(struct drm_device *dev, struct drm_file *file)
> @@ -5112,11 +5094,10 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
> file->driver_priv = file_priv;
> file_priv->dev_priv = dev->dev_private;
> file_priv->file = file;
> + INIT_LIST_HEAD(&file_priv->rps_boost);
>
> spin_lock_init(&file_priv->mm.lock);
> INIT_LIST_HEAD(&file_priv->mm.request_list);
> - INIT_DELAYED_WORK(&file_priv->mm.idle_work,
> - i915_gem_file_idle_work_handler);
>
> ret = i915_gem_context_open(dev, file);
> if (ret)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 87e09a58191a..299d0c68f0dd 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1241,7 +1241,8 @@ void gen6_update_ring_freq(struct drm_device *dev);
> void gen6_rps_busy(struct drm_i915_private *dev_priv);
> void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
> void gen6_rps_idle(struct drm_i915_private *dev_priv);
> -void gen6_rps_boost(struct drm_i915_private *dev_priv);
> +void gen6_rps_boost(struct drm_i915_private *dev_priv,
> + struct drm_i915_file_private *file_priv);
> void intel_queue_rps_boost_for_request(struct drm_device *dev,
> struct drm_i915_gem_request *rq);
> void ilk_wm_get_hw_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 120b8af3c60c..307edc035af1 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3931,10 +3931,14 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
> dev_priv->rps.last_adj = 0;
> I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
> }
> +
> + while (!list_empty(&dev_priv->rps.clients))
> + list_del_init(dev_priv->rps.clients.next);
> mutex_unlock(&dev_priv->rps.hw_lock);
> }
>
> -void gen6_rps_boost(struct drm_i915_private *dev_priv)
> +void gen6_rps_boost(struct drm_i915_private *dev_priv,
> + struct drm_i915_file_private *file_priv)
> {
> u32 val;
>
> @@ -3942,9 +3946,16 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv)
> val = dev_priv->rps.max_freq_softlimit;
> if (dev_priv->rps.enabled &&
> dev_priv->mm.busy &&
> - dev_priv->rps.cur_freq < val) {
> + dev_priv->rps.cur_freq < val &&
> + (file_priv == NULL || list_empty(&file_priv->rps_boost))) {
> intel_set_rps(dev_priv->dev, val);
> dev_priv->rps.last_adj = 0;
> +
> + if (file_priv != NULL) {
> + list_add(&file_priv->rps_boost, &dev_priv->rps.clients);
> + file_priv->rps_boosts++;
> + } else
> + dev_priv->rps.boosts++;
> }
> mutex_unlock(&dev_priv->rps.hw_lock);
> }
> @@ -6608,7 +6619,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
> struct request_boost *boost = container_of(work, struct request_boost, work);
>
> if (!i915_gem_request_completed(boost->rq, true))
> - gen6_rps_boost(to_i915(boost->rq->ring->dev));
> + gen6_rps_boost(to_i915(boost->rq->ring->dev), NULL);
>
> i915_gem_request_unreference(boost->rq);
> kfree(boost);
> @@ -6640,6 +6651,7 @@ void intel_pm_setup(struct drm_device *dev)
>
> INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
> intel_gen6_powersave_work);
> + INIT_LIST_HEAD(&dev_priv->rps.clients);
>
> dev_priv->pm.suspended = false;
> }
Cool. Patch looks fine
Reviewed-by: Deepak S<deepak.s@linux.intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning
2015-03-18 6:56 ` Deepak S
@ 2015-03-18 9:05 ` Chris Wilson
2015-03-18 9:20 ` Chris Wilson
1 sibling, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2015-03-18 9:05 UTC (permalink / raw)
To: Deepak S; +Cc: intel-gfx
On Wed, Mar 18, 2015 at 12:26:49PM +0530, Deepak S wrote:
> On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> I think we should modify adj in GEN6_PM_RP_UP_EI_EXPIRED?
> if not not we might request higher freq since we add adj to new_delay before request freq.
Oh yeah. That branch didn't exist when I first wrote the patch, and
didn't exist at the end of the series so I never saw it!
For this point in the patch series, yes, we do need to set adj=0.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle
2015-03-18 5:44 ` Deepak S
@ 2015-03-18 9:07 ` Chris Wilson
0 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2015-03-18 9:07 UTC (permalink / raw)
To: Deepak S; +Cc: intel-gfx
On Wed, Mar 18, 2015 at 11:14:15AM +0530, Deepak S wrote:
> >@@ -4761,6 +4762,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
>
> I think we missed initializing idle_freq in valleyview init platform?
Indeed, I did.
Thanks,
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning
2015-03-18 6:56 ` Deepak S
2015-03-18 9:05 ` Chris Wilson
@ 2015-03-18 9:20 ` Chris Wilson
2015-03-18 11:09 ` Deepak S
1 sibling, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2015-03-18 9:20 UTC (permalink / raw)
To: Deepak S; +Cc: intel-gfx
On Wed, Mar 18, 2015 at 12:26:49PM +0530, Deepak S wrote:
>
>
> On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> > drivers/gpu/drm/i915/i915_irq.c | 27 ++++++++++++---------------
> > 1 file changed, 12 insertions(+), 15 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> >index 9baecb79de8c..1296ce37e435 100644
> >--- a/drivers/gpu/drm/i915/i915_irq.c
> >+++ b/drivers/gpu/drm/i915/i915_irq.c
> >@@ -1150,21 +1150,20 @@ static void gen6_pm_rps_work(struct work_struct *work)
> > mutex_lock(&dev_priv->rps.hw_lock);
> > adj = dev_priv->rps.last_adj;
> >+ new_delay = dev_priv->rps.cur_freq;
> > if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> > if (adj > 0)
> > adj *= 2;
> >- else {
> >- /* CHV needs even encode values */
> >- adj = IS_CHERRYVIEW(dev_priv->dev) ? 2 : 1;
> >- }
> >- new_delay = dev_priv->rps.cur_freq + adj;
> >-
> >+ else /* CHV needs even encode values */
> >+ adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
> > /*
> > * For better performance, jump directly
> > * to RPe if we're below it.
> > */
> >- if (new_delay < dev_priv->rps.efficient_freq)
> >+ if (new_delay < dev_priv->rps.efficient_freq - adj) {
> > new_delay = dev_priv->rps.efficient_freq;
> >+ adj = 0;
> >+ }
> > } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
> > if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
> > new_delay = dev_priv->rps.efficient_freq;
> >@@ -1176,24 +1175,22 @@ static void gen6_pm_rps_work(struct work_struct *work)
>
> I think we should modify adj in GEN6_PM_RP_UP_EI_EXPIRED?
> if not not we might request higher freq since we add adj to new_delay before request freq.
The best way to resolve the conflict appears to be just to reorder this
patch later after the removal of the vlv specific adj paths
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail
2015-03-18 8:12 ` Deepak S
@ 2015-03-18 9:48 ` Daniel Vetter
2015-03-18 9:49 ` Chris Wilson
2015-03-18 11:06 ` Deepak S
0 siblings, 2 replies; 27+ messages in thread
From: Daniel Vetter @ 2015-03-18 9:48 UTC (permalink / raw)
To: Deepak S; +Cc: Daniel Vetter, intel-gfx, Rodrigo Vivi
On Wed, Mar 18, 2015 at 01:42:58PM +0530, Deepak S wrote:
>
>
I guess your empty reply wasn't intentional?
-Daniel
> On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> >Reuse the same reclocking strategy for Baytail as on its bigger brethren,
> >Sandybridge and Ivybridge. In particular, this makes the device quicker
> >to reclock (both up and down) though the tendency now is to downclock
> >more aggressively to compensate for the RPS boosts.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Deepak S <deepak.s@linux.intel.com>
> >Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> >Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> >
> >Conflicts:
> > drivers/gpu/drm/i915/intel_pm.c
> >---
> > drivers/gpu/drm/i915/i915_drv.h | 3 +++
> > drivers/gpu/drm/i915/i915_irq.c | 4 ++--
> > drivers/gpu/drm/i915/i915_reg.h | 2 --
> > drivers/gpu/drm/i915/intel_pm.c | 11 ++++++++++-
> > 4 files changed, 15 insertions(+), 5 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index efa98c9e5777..bfa6e11f802e 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -1028,6 +1028,9 @@ struct intel_gen6_power_mgmt {
> > u8 rp0_freq; /* Non-overclocked max frequency. */
> > u32 cz_freq;
> >+ u8 up_threshold; /* Current %busy required to uplock */
> >+ u8 down_threshold; /* Current %busy required to downclock */
> >+
> > int last_adj;
> > enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
> >diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> >index 8892dbdfb629..483079d96957 100644
> >--- a/drivers/gpu/drm/i915/i915_irq.c
> >+++ b/drivers/gpu/drm/i915/i915_irq.c
> >@@ -1050,7 +1050,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> > if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> > if (!vlv_c0_above(dev_priv,
> > &dev_priv->rps.down_ei, &now,
> >- VLV_RP_DOWN_EI_THRESHOLD))
> >+ dev_priv->rps.down_threshold))
> > events |= GEN6_PM_RP_DOWN_THRESHOLD;
> > dev_priv->rps.down_ei = now;
> > }
> >@@ -1058,7 +1058,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> > if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> > if (vlv_c0_above(dev_priv,
> > &dev_priv->rps.up_ei, &now,
> >- VLV_RP_UP_EI_THRESHOLD))
> >+ dev_priv->rps.up_threshold))
> > events |= GEN6_PM_RP_UP_THRESHOLD;
> > dev_priv->rps.up_ei = now;
> > }
> >diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> >index ca24a2d4a823..13ec6c2b1fcf 100644
> >--- a/drivers/gpu/drm/i915/i915_reg.h
> >+++ b/drivers/gpu/drm/i915/i915_reg.h
> >@@ -663,8 +663,6 @@ enum skl_disp_power_wells {
> > #define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000
> > #define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
> >-#define VLV_RP_UP_EI_THRESHOLD 90
> >-#define VLV_RP_DOWN_EI_THRESHOLD 70
> > /* vlv2 north clock has */
> > #define CCK_FUSE_REG 0x8
> >diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> >index 55dc406cd195..972333d2211d 100644
> >--- a/drivers/gpu/drm/i915/intel_pm.c
> >+++ b/drivers/gpu/drm/i915/intel_pm.c
> >@@ -3722,10 +3722,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
> > switch (new_power) {
> > case LOW_POWER:
> > /* Upclock if more than 95% busy over 16ms */
> >+ dev_priv->rps.up_threshold = 95;
> > I915_WRITE(GEN6_RP_UP_EI, 12500);
> > I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
> > /* Downclock if less than 85% busy over 32ms */
> >+ dev_priv->rps.down_threshold = 85;
> > I915_WRITE(GEN6_RP_DOWN_EI, 25000);
> > I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
> >@@ -3740,10 +3742,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
> > case BETWEEN:
> > /* Upclock if more than 90% busy over 13ms */
> >+ dev_priv->rps.up_threshold = 90;
> > I915_WRITE(GEN6_RP_UP_EI, 10250);
> > I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
> > /* Downclock if less than 75% busy over 32ms */
> >+ dev_priv->rps.down_threshold = 75;
> > I915_WRITE(GEN6_RP_DOWN_EI, 25000);
> > I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
> >@@ -3758,10 +3762,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
> > case HIGH_POWER:
> > /* Upclock if more than 85% busy over 10ms */
> >+ dev_priv->rps.up_threshold = 85;
> > I915_WRITE(GEN6_RP_UP_EI, 8000);
> > I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
> > /* Downclock if less than 60% busy over 32ms */
> >+ dev_priv->rps.down_threshold = 60;
> > I915_WRITE(GEN6_RP_DOWN_EI, 25000);
> > I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
> >@@ -3842,8 +3848,10 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
> > "Odd GPU freq value\n"))
> > val &= ~1;
> >- if (val != dev_priv->rps.cur_freq)
> >+ if (val != dev_priv->rps.cur_freq) {
> > vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
> >+ gen6_set_rps_thresholds(dev_priv, val);
>
> >If only for BYT, we might want to add a platform check before "gen6_set_rps_thresholds"
> >+ }
> > I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> >@@ -3892,6 +3900,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> > & GENFREQSTATUS) == 0, 100))
> > DRM_ERROR("timed out waiting for Punit\n");
> >+ gen6_set_rps_thresholds(dev_priv, val);
> > vlv_force_gfx_clock(dev_priv, false);
> > I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail
2015-03-18 9:48 ` Daniel Vetter
@ 2015-03-18 9:49 ` Chris Wilson
2015-03-18 11:06 ` Deepak S
1 sibling, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2015-03-18 9:49 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Daniel Vetter, intel-gfx, Rodrigo Vivi
On Wed, Mar 18, 2015 at 10:48:36AM +0100, Daniel Vetter wrote:
> On Wed, Mar 18, 2015 at 01:42:58PM +0530, Deepak S wrote:
> >
> >
>
> I guess your empty reply wasn't intentional?
I heard "This needs to be rebased against -nightly".
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail
2015-03-18 9:48 ` Daniel Vetter
2015-03-18 9:49 ` Chris Wilson
@ 2015-03-18 11:06 ` Deepak S
1 sibling, 0 replies; 27+ messages in thread
From: Deepak S @ 2015-03-18 11:06 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Daniel Vetter, intel-gfx, Rodrigo Vivi
On Wednesday 18 March 2015 03:18 PM, Daniel Vetter wrote:
> On Wed, Mar 18, 2015 at 01:42:58PM +0530, Deepak S wrote:
>>
> I guess your empty reply wasn't intentional?
> -Daniel
Sorry, that was not intentional :)
>> On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
>>> Reuse the same reclocking strategy for Baytail as on its bigger brethren,
>>> Sandybridge and Ivybridge. In particular, this makes the device quicker
>>> to reclock (both up and down) though the tendency now is to downclock
>>> more aggressively to compensate for the RPS boosts.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Deepak S <deepak.s@linux.intel.com>
>>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>
>>> Conflicts:
>>> drivers/gpu/drm/i915/intel_pm.c
>>> ---
>>> drivers/gpu/drm/i915/i915_drv.h | 3 +++
>>> drivers/gpu/drm/i915/i915_irq.c | 4 ++--
>>> drivers/gpu/drm/i915/i915_reg.h | 2 --
>>> drivers/gpu/drm/i915/intel_pm.c | 11 ++++++++++-
>>> 4 files changed, 15 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index efa98c9e5777..bfa6e11f802e 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -1028,6 +1028,9 @@ struct intel_gen6_power_mgmt {
>>> u8 rp0_freq; /* Non-overclocked max frequency. */
>>> u32 cz_freq;
>>> + u8 up_threshold; /* Current %busy required to uplock */
>>> + u8 down_threshold; /* Current %busy required to downclock */
>>> +
>>> int last_adj;
>>> enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>>> index 8892dbdfb629..483079d96957 100644
>>> --- a/drivers/gpu/drm/i915/i915_irq.c
>>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>>> @@ -1050,7 +1050,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
>>> if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
>>> if (!vlv_c0_above(dev_priv,
>>> &dev_priv->rps.down_ei, &now,
>>> - VLV_RP_DOWN_EI_THRESHOLD))
>>> + dev_priv->rps.down_threshold))
>>> events |= GEN6_PM_RP_DOWN_THRESHOLD;
>>> dev_priv->rps.down_ei = now;
>>> }
>>> @@ -1058,7 +1058,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
>>> if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
>>> if (vlv_c0_above(dev_priv,
>>> &dev_priv->rps.up_ei, &now,
>>> - VLV_RP_UP_EI_THRESHOLD))
>>> + dev_priv->rps.up_threshold))
>>> events |= GEN6_PM_RP_UP_THRESHOLD;
>>> dev_priv->rps.up_ei = now;
>>> }
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>>> index ca24a2d4a823..13ec6c2b1fcf 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -663,8 +663,6 @@ enum skl_disp_power_wells {
>>> #define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000
>>> #define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
>>> -#define VLV_RP_UP_EI_THRESHOLD 90
>>> -#define VLV_RP_DOWN_EI_THRESHOLD 70
>>> /* vlv2 north clock has */
>>> #define CCK_FUSE_REG 0x8
>>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>>> index 55dc406cd195..972333d2211d 100644
>>> --- a/drivers/gpu/drm/i915/intel_pm.c
>>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>>> @@ -3722,10 +3722,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
>>> switch (new_power) {
>>> case LOW_POWER:
>>> /* Upclock if more than 95% busy over 16ms */
>>> + dev_priv->rps.up_threshold = 95;
>>> I915_WRITE(GEN6_RP_UP_EI, 12500);
>>> I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
>>> /* Downclock if less than 85% busy over 32ms */
>>> + dev_priv->rps.down_threshold = 85;
>>> I915_WRITE(GEN6_RP_DOWN_EI, 25000);
>>> I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
>>> @@ -3740,10 +3742,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
>>> case BETWEEN:
>>> /* Upclock if more than 90% busy over 13ms */
>>> + dev_priv->rps.up_threshold = 90;
>>> I915_WRITE(GEN6_RP_UP_EI, 10250);
>>> I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
>>> /* Downclock if less than 75% busy over 32ms */
>>> + dev_priv->rps.down_threshold = 75;
>>> I915_WRITE(GEN6_RP_DOWN_EI, 25000);
>>> I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
>>> @@ -3758,10 +3762,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
>>> case HIGH_POWER:
>>> /* Upclock if more than 85% busy over 10ms */
>>> + dev_priv->rps.up_threshold = 85;
>>> I915_WRITE(GEN6_RP_UP_EI, 8000);
>>> I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
>>> /* Downclock if less than 60% busy over 32ms */
>>> + dev_priv->rps.down_threshold = 60;
>>> I915_WRITE(GEN6_RP_DOWN_EI, 25000);
>>> I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
>>> @@ -3842,8 +3848,10 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
>>> "Odd GPU freq value\n"))
>>> val &= ~1;
>>> - if (val != dev_priv->rps.cur_freq)
>>> + if (val != dev_priv->rps.cur_freq) {
>>> vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
>>> + gen6_set_rps_thresholds(dev_priv, val);
>>> If only for BYT, we might want to add a platform check before "gen6_set_rps_thresholds"
>>> + }
>>> I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
>>> @@ -3892,6 +3900,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>>> & GENFREQSTATUS) == 0, 100))
>>> DRM_ERROR("timed out waiting for Punit\n");
>>> + gen6_set_rps_thresholds(dev_priv, val);
>>> vlv_force_gfx_clock(dev_priv, false);
>>> I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning
2015-03-18 9:20 ` Chris Wilson
@ 2015-03-18 11:09 ` Deepak S
0 siblings, 0 replies; 27+ messages in thread
From: Deepak S @ 2015-03-18 11:09 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On Wednesday 18 March 2015 02:50 PM, Chris Wilson wrote:
> On Wed, Mar 18, 2015 at 12:26:49PM +0530, Deepak S wrote:
>>
>> On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>> drivers/gpu/drm/i915/i915_irq.c | 27 ++++++++++++---------------
>>> 1 file changed, 12 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>>> index 9baecb79de8c..1296ce37e435 100644
>>> --- a/drivers/gpu/drm/i915/i915_irq.c
>>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>>> @@ -1150,21 +1150,20 @@ static void gen6_pm_rps_work(struct work_struct *work)
>>> mutex_lock(&dev_priv->rps.hw_lock);
>>> adj = dev_priv->rps.last_adj;
>>> + new_delay = dev_priv->rps.cur_freq;
>>> if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
>>> if (adj > 0)
>>> adj *= 2;
>>> - else {
>>> - /* CHV needs even encode values */
>>> - adj = IS_CHERRYVIEW(dev_priv->dev) ? 2 : 1;
>>> - }
>>> - new_delay = dev_priv->rps.cur_freq + adj;
>>> -
>>> + else /* CHV needs even encode values */
>>> + adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
>>> /*
>>> * For better performance, jump directly
>>> * to RPe if we're below it.
>>> */
>>> - if (new_delay < dev_priv->rps.efficient_freq)
>>> + if (new_delay < dev_priv->rps.efficient_freq - adj) {
>>> new_delay = dev_priv->rps.efficient_freq;
>>> + adj = 0;
>>> + }
>>> } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
>>> if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
>>> new_delay = dev_priv->rps.efficient_freq;
>>> @@ -1176,24 +1175,22 @@ static void gen6_pm_rps_work(struct work_struct *work)
>> I think we should modify adj in GEN6_PM_RP_UP_EI_EXPIRED?
>> if not not we might request higher freq since we add adj to new_delay before request freq.
> The best way to resolve the conflict appears to be just to reorder this
> patch later after the removal of the vlv specific adj paths
> -Chris
Yes, I saw the reorder patch. looks fine.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2015-03-18 11:12 UTC | newest]
Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
2015-03-06 15:06 ` [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
2015-03-06 17:32 ` Daniel Vetter
2015-03-06 21:44 ` Chris Wilson
2015-03-18 6:56 ` Deepak S
2015-03-18 9:05 ` Chris Wilson
2015-03-18 9:20 ` Chris Wilson
2015-03-18 11:09 ` Deepak S
2015-03-06 15:06 ` [PATCH 3/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
2015-03-18 7:56 ` Deepak S
2015-03-06 15:06 ` [PATCH 4/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
2015-03-18 8:04 ` Deepak S
2015-03-06 15:06 ` [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
2015-03-18 8:12 ` Deepak S
2015-03-18 9:48 ` Daniel Vetter
2015-03-18 9:49 ` Chris Wilson
2015-03-18 11:06 ` Deepak S
2015-03-06 15:06 ` [PATCH 6/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
2015-03-18 8:18 ` Deepak S
2015-03-18 8:20 ` Deepak S
2015-03-06 15:06 ` [PATCH 7/7] drm/i915: Deminish contribution of wait-boosting from clients Chris Wilson
2015-03-06 17:58 ` shuang.he
2015-03-18 9:00 ` Deepak S
2015-03-09 15:38 ` [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Jesse Barnes
2015-03-18 5:44 ` Deepak S
2015-03-18 9:07 ` Chris Wilson
2015-03-18 5:52 ` Deepak S
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.